5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("summary");
27 types.insert("accnos");
28 types.insert("column");
29 types.insert("design");
30 types.insert("group");
33 types.insert("oligos");
34 types.insert("order");
35 types.insert("ordergroup");
36 types.insert("phylip");
37 types.insert("qfile");
38 types.insert("relabund");
39 types.insert("sabund");
40 types.insert("rabund");
42 types.insert("shared");
43 types.insert("taxonomy");
47 types.insert("count");
48 types.insert("processors");
53 errorOut(e, "MothurOut", "getCurrentTypes");
57 /*********************************************************************************************/
58 void MothurOut::printCurrentFiles() {
62 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
63 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
64 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
65 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
66 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
67 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
68 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
69 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
70 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
71 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
72 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
73 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
74 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
75 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
76 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
77 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
78 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
79 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
80 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
81 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
82 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
83 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
84 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
85 if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); }
89 errorOut(e, "MothurOut", "printCurrentFiles");
93 /*********************************************************************************************/
94 bool MothurOut::hasCurrentFiles() {
96 bool hasCurrent = false;
98 if (accnosfile != "") { return true; }
99 if (columnfile != "") { return true; }
100 if (designfile != "") { return true; }
101 if (fastafile != "") { return true; }
102 if (groupfile != "") { return true; }
103 if (listfile != "") { return true; }
104 if (namefile != "") { return true; }
105 if (oligosfile != "") { return true; }
106 if (orderfile != "") { return true; }
107 if (ordergroupfile != "") { return true; }
108 if (phylipfile != "") { return true; }
109 if (qualfile != "") { return true; }
110 if (rabundfile != "") { return true; }
111 if (relabundfile != "") { return true; }
112 if (sabundfile != "") { return true; }
113 if (sfffile != "") { return true; }
114 if (sharedfile != "") { return true; }
115 if (taxonomyfile != "") { return true; }
116 if (treefile != "") { return true; }
117 if (flowfile != "") { return true; }
118 if (biomfile != "") { return true; }
119 if (counttablefile != "") { return true; }
120 if (summaryfile != "") { return true; }
121 if (processors != "1") { return true; }
126 catch(exception& e) {
127 errorOut(e, "MothurOut", "hasCurrentFiles");
132 /*********************************************************************************************/
133 void MothurOut::clearCurrentFiles() {
160 catch(exception& e) {
161 errorOut(e, "MothurOut", "clearCurrentFiles");
165 /***********************************************************************/
166 string MothurOut::findProgramPath(string programName){
169 string envPath = getenv("PATH");
172 //delimiting path char
174 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 //break apart path variable by ':'
182 splitAtChar(envPath, dirs, delim);
184 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
186 //get path related to mothur
187 for (int i = 0; i < dirs.size(); i++) {
189 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
191 //to lower so we can find it
192 string tempLower = "";
193 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
195 //is this mothurs path?
196 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
199 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
202 //add programName so it looks like what argv would look like
203 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
204 pPath += "/" + programName;
206 pPath += "\\" + programName;
209 //okay programName is not in the path, so the folder programName is in must be in the path
210 //lets find out which one
212 //get path related to the program
213 for (int i = 0; i < dirs.size(); i++) {
215 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
217 //is this the programs path?
219 string tempIn = dirs[i];
220 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
221 tempIn += "/" + programName;
223 tempIn += "\\" + programName;
225 openInputFile(tempIn, in, "");
227 //if this file exists
228 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
235 catch(exception& e) {
236 errorOut(e, "MothurOut", "findProgramPath");
240 /*********************************************************************************************/
241 void MothurOut::setFileName(string filename) {
243 logFileName = filename;
247 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
249 if (pid == 0) { //only one process should output to screen
252 openOutputFile(filename, out);
258 catch(exception& e) {
259 errorOut(e, "MothurOut", "setFileName");
263 /*********************************************************************************************/
264 void MothurOut::setDefaultPath(string pathname) {
267 //add / to name if needed
268 string lastChar = pathname.substr(pathname.length()-1);
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
270 if (lastChar != "/") { pathname += "/"; }
272 if (lastChar != "\\") { pathname += "\\"; }
275 defaultPath = pathname;
278 catch(exception& e) {
279 errorOut(e, "MothurOut", "setDefaultPath");
283 /*********************************************************************************************/
284 void MothurOut::setOutputDir(string pathname) {
286 outputDir = pathname;
288 catch(exception& e) {
289 errorOut(e, "MothurOut", "setOutputDir");
293 /*********************************************************************************************/
294 void MothurOut::closeLog() {
299 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
301 if (pid == 0) { //only one process should output to screen
310 catch(exception& e) {
311 errorOut(e, "MothurOut", "closeLog");
316 /*********************************************************************************************/
317 MothurOut::~MothurOut() {
322 catch(exception& e) {
323 errorOut(e, "MothurOut", "MothurOut");
327 /*********************************************************************************************/
328 void MothurOut::mothurOut(string output) {
333 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
335 if (pid == 0) { //only one process should output to screen
345 catch(exception& e) {
346 errorOut(e, "MothurOut", "MothurOut");
350 /*********************************************************************************************/
351 void MothurOut::mothurOutJustToScreen(string output) {
356 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
358 if (pid == 0) { //only one process should output to screen
366 catch(exception& e) {
367 errorOut(e, "MothurOut", "MothurOut");
371 /*********************************************************************************************/
372 void MothurOut::mothurOutEndLine() {
376 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
378 if (pid == 0) { //only one process should output to screen
388 catch(exception& e) {
389 errorOut(e, "MothurOut", "MothurOutEndLine");
393 /*********************************************************************************************/
394 void MothurOut::mothurOut(string output, ofstream& outputFile) {
399 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
401 if (pid == 0) { //only one process should output to screen
406 outputFile << output;
414 catch(exception& e) {
415 errorOut(e, "MothurOut", "MothurOut");
419 /*********************************************************************************************/
420 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
424 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
426 if (pid == 0) { //only one process should output to screen
437 catch(exception& e) {
438 errorOut(e, "MothurOut", "MothurOutEndLine");
442 /*********************************************************************************************/
443 void MothurOut::mothurOutJustToLog(string output) {
447 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
449 if (pid == 0) { //only one process should output to screen
458 catch(exception& e) {
459 errorOut(e, "MothurOut", "MothurOutJustToLog");
463 /*********************************************************************************************/
464 void MothurOut::errorOut(exception& e, string object, string function) {
466 //mem_usage(vm, rss);
468 string errorType = toString(e.what());
470 int pos = errorType.find("bad_alloc");
471 mothurOut("[ERROR]: ");
472 mothurOut(errorType);
474 if (pos == string::npos) { //not bad_alloc
475 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
478 if (object == "cluster"){
479 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
480 }else if (object == "shhh.flows"){
481 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
483 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
487 /*********************************************************************************************/
488 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
489 // process_mem_usage(double &, double &) - takes two doubles by reference,
490 // attempts to read the system-dependent data for a process' virtual memory
491 // size and resident set size, and return the results in KB.
493 // On failure, returns 0.0, 0.0
494 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
495 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
500 // 'file' stat seems to give the most reliable results
502 ifstream stat_stream("/proc/self/stat",ios_base::in);
504 // dummy vars for leading entries in stat that we don't care about
506 string pid, comm, state, ppid, pgrp, session, tty_nr;
507 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
508 string utime, stime, cutime, cstime, priority, nice;
509 string O, itrealvalue, starttime;
511 // the two fields we want
516 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
517 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
518 >> utime >> stime >> cutime >> cstime >> priority >> nice
519 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
521 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
522 vm_usage = vsize / 1024.0;
523 resident_set = rss * page_size_kb;
525 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
529 /* //windows memory usage
530 // Get the list of process identifiers.
531 DWORD aProcesses[1024], cbNeeded, cProcesses;
533 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
535 // Calculate how many process identifiers were returned.
536 cProcesses = cbNeeded / sizeof(DWORD);
538 // Print the memory usage for each process
539 for (int i = 0; i < cProcesses; i++ ) {
540 DWORD processID = aProcesses[i];
542 PROCESS_MEMORY_COUNTERS pmc;
544 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
546 // Print the process identifier.
547 printf( "\nProcess ID: %u\n", processID);
549 if (NULL != hProcess) {
551 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
552 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
553 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
554 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
555 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
556 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
557 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
558 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
559 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
560 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
562 CloseHandle(hProcess);
572 /***********************************************************************/
573 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
575 fileName = getFullPathName(fileName);
577 fileHandle.open(fileName.c_str(), ios::app);
579 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
586 catch(exception& e) {
587 errorOut(e, "MothurOut", "openOutputFileAppend");
591 /***********************************************************************/
592 int MothurOut::openOutputFileBinaryAppend(string fileName, ofstream& fileHandle){
594 fileName = getFullPathName(fileName);
596 fileHandle.open(fileName.c_str(), ios::app | ios::binary);
598 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
605 catch(exception& e) {
606 errorOut(e, "MothurOut", "openOutputFileAppend");
611 /***********************************************************************/
612 void MothurOut::gobble(istream& f){
616 while(isspace(d=f.get())) { ;}
617 if(!f.eof()) { f.putback(d); }
619 catch(exception& e) {
620 errorOut(e, "MothurOut", "gobble");
624 /***********************************************************************/
625 void MothurOut::gobble(istringstream& f){
628 while(isspace(d=f.get())) {;}
629 if(!f.eof()) { f.putback(d); }
631 catch(exception& e) {
632 errorOut(e, "MothurOut", "gobble");
637 /***********************************************************************/
639 string MothurOut::getline(istringstream& fileHandle) {
644 while (!fileHandle.eof()) {
646 char c = fileHandle.get();
648 //are you at the end of the line
649 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
656 catch(exception& e) {
657 errorOut(e, "MothurOut", "getline");
661 /***********************************************************************/
663 string MothurOut::getline(ifstream& fileHandle) {
670 char c = fileHandle.get();
672 //are you at the end of the line
673 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
680 catch(exception& e) {
681 errorOut(e, "MothurOut", "getline");
685 /***********************************************************************/
687 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
688 #ifdef USE_COMPRESSION
689 inline bool endsWith(string s, const char * suffix){
690 size_t suffixLength = strlen(suffix);
691 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
696 string MothurOut::getRootName(string longName){
699 string rootName = longName;
701 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
702 #ifdef USE_COMPRESSION
703 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
704 int pos = rootName.find_last_of('.');
705 rootName = rootName.substr(0, pos);
706 cerr << "shortening " << longName << " to " << rootName << "\n";
710 if(rootName.find_last_of(".") != rootName.npos){
711 int pos = rootName.find_last_of('.')+1;
712 rootName = rootName.substr(0, pos);
717 catch(exception& e) {
718 errorOut(e, "MothurOut", "getRootName");
722 /***********************************************************************/
724 string MothurOut::getSimpleName(string longName){
726 string simpleName = longName;
729 found=longName.find_last_of("/\\");
731 if(found != longName.npos){
732 simpleName = longName.substr(found+1);
737 catch(exception& e) {
738 errorOut(e, "MothurOut", "getSimpleName");
743 /***********************************************************************/
745 int MothurOut::getRandomIndex(int highest){
748 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
752 catch(exception& e) {
753 errorOut(e, "MothurOut", "getRandomIndex");
758 /**********************************************************************/
760 string MothurOut::getPathName(string longName){
762 string rootPathName = longName;
764 if(longName.find_last_of("/\\") != longName.npos){
765 int pos = longName.find_last_of("/\\")+1;
766 rootPathName = longName.substr(0, pos);
771 catch(exception& e) {
772 errorOut(e, "MothurOut", "getPathName");
777 /***********************************************************************/
779 bool MothurOut::dirCheck(string& dirName){
782 if (dirName == "") { return false; }
787 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
792 //add / to name if needed
793 string lastChar = dirName.substr(dirName.length()-1);
794 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
795 if (lastChar != "/") { dirName += "/"; }
797 if (lastChar != "\\") { dirName += "\\"; }
800 //test to make sure directory exists
801 dirName = getFullPathName(dirName);
802 string outTemp = dirName + tag + "temp"+ toString(time(NULL));
804 out.open(outTemp.c_str(), ios::trunc);
806 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
809 mothurRemove(outTemp);
815 catch(exception& e) {
816 errorOut(e, "MothurOut", "dirCheck");
821 //**********************************************************************************************************************
823 map<string, vector<string> > MothurOut::parseClasses(string classes){
825 map<string, vector<string> > parts;
827 //treatment<Early|Late>-age<young|old>
828 vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
830 for (int i = 0; i < pieces.size(); i++) {
831 string category = ""; string value = "";
832 bool foundOpen = false;
833 for (int j = 0; j < pieces[i].length(); j++) {
834 if (control_pressed) { return parts; }
836 if (pieces[i][j] == '<') { foundOpen = true; }
837 else if (pieces[i][j] == '>') { j += pieces[i].length(); }
839 if (!foundOpen) { category += pieces[i][j]; }
840 else { value += pieces[i][j]; }
843 vector<string> values; splitAtChar(value, values, '|');
844 parts[category] = values;
849 catch(exception& e) {
850 errorOut(e, "MothurOut", "parseClasses");
854 /***********************************************************************/
856 string MothurOut::hasPath(string longName){
861 found=longName.find_last_of("~/\\");
863 if(found != longName.npos){
864 path = longName.substr(0, found+1);
869 catch(exception& e) {
870 errorOut(e, "MothurOut", "hasPath");
875 /***********************************************************************/
877 string MothurOut::getExtension(string longName){
879 string extension = "";
881 if(longName.find_last_of('.') != longName.npos){
882 int pos = longName.find_last_of('.');
883 extension = longName.substr(pos, longName.length());
888 catch(exception& e) {
889 errorOut(e, "MothurOut", "getExtension");
893 /***********************************************************************/
894 bool MothurOut::isBlank(string fileName){
897 fileName = getFullPathName(fileName);
900 fileHandle.open(fileName.c_str());
902 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
905 //check for blank file
907 if (fileHandle.eof()) { fileHandle.close(); return true; }
912 catch(exception& e) {
913 errorOut(e, "MothurOut", "isBlank");
917 /***********************************************************************/
919 string MothurOut::getFullPathName(string fileName){
922 string path = hasPath(fileName);
926 if (path == "") { return fileName; } //its a simple name
927 else { //we need to complete the pathname
928 // ex. ../../../filename
929 // cwd = /user/work/desktop
932 //get current working directory
933 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
935 if (path.find("~") != -1) { //go to home directory
938 char *homepath = NULL;
939 homepath = getenv ("HOME");
940 if ( homepath != NULL) { homeDir = homepath; }
941 else { homeDir = ""; }
943 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
946 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
947 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
949 //char* cwdpath = new char[1024];
951 //cwdpath=getcwd(cwdpath,size);
954 char *cwdpath = NULL;
955 cwdpath = getcwd(NULL, 0); // or _getcwd
956 if ( cwdpath != NULL) { cwd = cwdpath; }
962 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
964 //break apart the current working directory
966 while (simpleCWD.find_first_of('/') != string::npos) {
967 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
968 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
971 //get last one // ex. ../../../filename = /user/work/desktop/filename
972 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
975 int index = dirs.size()-1;
977 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
978 if (pos == 0) { break; //you are at the end
979 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
980 path = path.substr(0, pos-1);
982 if (index == 0) { break; }
983 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
984 path = path.substr(0, pos);
985 }else if (pos == 1) { break; //you are at the end
986 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
989 for (int i = index; i >= 0; i--) {
990 newFileName = dirs[i] + "/" + newFileName;
993 newFileName = "/" + newFileName;
997 if (path.find("~") != string::npos) { //go to home directory
998 string homeDir = getenv ("HOMEPATH");
999 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
1002 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
1003 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
1005 char *cwdpath = NULL;
1006 cwdpath = getcwd(NULL, 0); // or _getcwd
1007 if ( cwdpath != NULL) { cwd = cwdpath; }
1010 //break apart the current working directory
1011 vector<string> dirs;
1012 while (cwd.find_first_of('\\') != -1) {
1013 string dir = cwd.substr(0,cwd.find_first_of('\\'));
1014 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
1015 dirs.push_back(dir);
1019 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
1021 int index = dirs.size()-1;
1023 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
1024 if (pos == 0) { break; //you are at the end
1025 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
1026 path = path.substr(0, pos-1);
1028 if (index == 0) { break; }
1029 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
1030 path = path.substr(0, pos);
1031 }else if (pos == 1) { break; //you are at the end
1032 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
1035 for (int i = index; i >= 0; i--) {
1036 newFileName = dirs[i] + "\\" + newFileName;
1045 catch(exception& e) {
1046 errorOut(e, "MothurOut", "getFullPathName");
1050 /***********************************************************************/
1052 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
1054 //get full path name
1055 string completeFileName = getFullPathName(fileName);
1056 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1057 #ifdef USE_COMPRESSION
1058 // check for gzipped or bzipped file
1059 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1060 string tempName = string(tmpnam(0));
1061 mkfifo(tempName.c_str(), 0666);
1062 int fork_result = fork();
1063 if (fork_result < 0) {
1064 cerr << "Error forking.\n";
1066 } else if (fork_result == 0) {
1067 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1068 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1069 system(command.c_str());
1070 cerr << "Done decompressing " << completeFileName << "\n";
1071 mothurRemove(tempName);
1074 cerr << "waiting on child process " << fork_result << "\n";
1075 completeFileName = tempName;
1080 fileHandle.open(completeFileName.c_str());
1082 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1085 //check for blank file
1090 catch(exception& e) {
1091 errorOut(e, "MothurOut", "openInputFile - no Error");
1095 /***********************************************************************/
1097 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1100 //get full path name
1101 string completeFileName = getFullPathName(fileName);
1102 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1103 #ifdef USE_COMPRESSION
1104 // check for gzipped or bzipped file
1105 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1106 string tempName = string(tmpnam(0));
1107 mkfifo(tempName.c_str(), 0666);
1108 int fork_result = fork();
1109 if (fork_result < 0) {
1110 cerr << "Error forking.\n";
1112 } else if (fork_result == 0) {
1113 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1114 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1115 system(command.c_str());
1116 cerr << "Done decompressing " << completeFileName << "\n";
1117 mothurRemove(tempName);
1120 cerr << "waiting on child process " << fork_result << "\n";
1121 completeFileName = tempName;
1127 fileHandle.open(completeFileName.c_str());
1129 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1133 //check for blank file
1135 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1140 catch(exception& e) {
1141 errorOut(e, "MothurOut", "openInputFile");
1145 /***********************************************************************/
1146 int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle){
1149 //get full path name
1150 string completeFileName = getFullPathName(fileName);
1151 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1152 #ifdef USE_COMPRESSION
1153 // check for gzipped or bzipped file
1154 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1155 string tempName = string(tmpnam(0));
1156 mkfifo(tempName.c_str(), 0666);
1157 int fork_result = fork();
1158 if (fork_result < 0) {
1159 cerr << "Error forking.\n";
1161 } else if (fork_result == 0) {
1162 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1163 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1164 system(command.c_str());
1165 cerr << "Done decompressing " << completeFileName << "\n";
1166 mothurRemove(tempName);
1169 cerr << "waiting on child process " << fork_result << "\n";
1170 completeFileName = tempName;
1176 fileHandle.open(completeFileName.c_str(), ios::binary);
1178 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1182 //check for blank file
1184 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1189 catch(exception& e) {
1190 errorOut(e, "MothurOut", "openInputFileBinary");
1194 /***********************************************************************/
1195 int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle, string noerror){
1198 //get full path name
1199 string completeFileName = getFullPathName(fileName);
1200 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1201 #ifdef USE_COMPRESSION
1202 // check for gzipped or bzipped file
1203 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1204 string tempName = string(tmpnam(0));
1205 mkfifo(tempName.c_str(), 0666);
1206 int fork_result = fork();
1207 if (fork_result < 0) {
1208 cerr << "Error forking.\n";
1210 } else if (fork_result == 0) {
1211 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1212 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1213 system(command.c_str());
1214 cerr << "Done decompressing " << completeFileName << "\n";
1215 mothurRemove(tempName);
1218 cerr << "waiting on child process " << fork_result << "\n";
1219 completeFileName = tempName;
1225 fileHandle.open(completeFileName.c_str(), ios::binary);
1227 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1231 //check for blank file
1233 //if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1238 catch(exception& e) {
1239 errorOut(e, "MothurOut", "openInputFileBinary - no error");
1244 /***********************************************************************/
1246 int MothurOut::renameFile(string oldName, string newName){
1249 if (oldName == newName) { return 0; }
1252 int exist = openInputFile(newName, inTest, "");
1255 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1256 if (exist == 0) { //you could open it so you want to delete it
1257 string command = "rm " + newName;
1258 system(command.c_str());
1261 string command = "mv " + oldName + " " + newName;
1262 system(command.c_str());
1264 mothurRemove(newName);
1265 int renameOk = rename(oldName.c_str(), newName.c_str());
1270 catch(exception& e) {
1271 errorOut(e, "MothurOut", "renameFile");
1276 /***********************************************************************/
1278 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1281 string completeFileName = getFullPathName(fileName);
1282 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1283 #ifdef USE_COMPRESSION
1284 // check for gzipped file
1285 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1286 string tempName = string(tmpnam(0));
1287 mkfifo(tempName.c_str(), 0666);
1288 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1289 int fork_result = fork();
1290 if (fork_result < 0) {
1291 cerr << "Error forking.\n";
1293 } else if (fork_result == 0) {
1294 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1295 system(command.c_str());
1298 completeFileName = tempName;
1303 fileHandle.open(completeFileName.c_str(), ios::trunc);
1305 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1312 catch(exception& e) {
1313 errorOut(e, "MothurOut", "openOutputFile");
1318 /***********************************************************************/
1320 int MothurOut::openOutputFileBinary(string fileName, ofstream& fileHandle){
1323 string completeFileName = getFullPathName(fileName);
1324 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1325 #ifdef USE_COMPRESSION
1326 // check for gzipped file
1327 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1328 string tempName = string(tmpnam(0));
1329 mkfifo(tempName.c_str(), 0666);
1330 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1331 int fork_result = fork();
1332 if (fork_result < 0) {
1333 cerr << "Error forking.\n";
1335 } else if (fork_result == 0) {
1336 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1337 system(command.c_str());
1340 completeFileName = tempName;
1345 fileHandle.open(completeFileName.c_str(), ios::trunc | ios::binary);
1347 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1354 catch(exception& e) {
1355 errorOut(e, "MothurOut", "openOutputFileBinary");
1360 /**************************************************************************************************/
1361 int MothurOut::appendFiles(string temp, string filename) {
1366 //open output file in append mode
1367 openOutputFileAppend(filename, output);
1368 int ableToOpen = openInputFile(temp, input, "no error");
1369 //int ableToOpen = openInputFile(temp, input);
1372 if (ableToOpen == 0) { //you opened it
1375 while (!input.eof()) {
1376 input.read(buffer, 4096);
1377 output.write(buffer, input.gcount());
1378 //count number of lines
1379 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1388 catch(exception& e) {
1389 errorOut(e, "MothurOut", "appendFiles");
1393 /**************************************************************************************************/
1394 int MothurOut::appendBinaryFiles(string temp, string filename) {
1399 //open output file in append mode
1400 openOutputFileBinaryAppend(filename, output);
1401 int ableToOpen = openInputFileBinary(temp, input, "no error");
1403 if (ableToOpen == 0) { //you opened it
1406 while (!input.eof()) {
1407 input.read(buffer, 4096);
1408 output.write(buffer, input.gcount());
1417 catch(exception& e) {
1418 errorOut(e, "MothurOut", "appendBinaryFiles");
1423 /**************************************************************************************************/
1424 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
1429 //open output file in append mode
1430 openOutputFileAppend(filename, output);
1431 int ableToOpen = openInputFile(temp, input, "no error");
1432 //int ableToOpen = openInputFile(temp, input);
1435 if (ableToOpen == 0) { //you opened it
1437 string headers = getline(input); gobble(input);
1438 if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
1441 while (!input.eof()) {
1442 input.read(buffer, 4096);
1443 output.write(buffer, input.gcount());
1444 //count number of lines
1445 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1454 catch(exception& e) {
1455 errorOut(e, "MothurOut", "appendFiles");
1459 /**************************************************************************************************/
1460 string MothurOut::sortFile(string distFile, string outputDir){
1463 //if (outputDir == "") { outputDir += hasPath(distFile); }
1464 string outfile = getRootName(distFile) + "sorted.dist";
1467 //if you can, use the unix sort since its been optimized for years
1468 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1469 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1470 system(command.c_str());
1471 #else //you are stuck with my best attempt...
1472 //windows sort does not have a way to specify a column, only a character in the line
1473 //since we cannot assume that the distance will always be at the the same character location on each line
1474 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1476 //read in file line by file and put distance first
1477 string tempDistFile = distFile + ".temp";
1480 openInputFile(distFile, input);
1481 openOutputFile(tempDistFile, output);
1483 string firstName, secondName;
1485 while (!input.eof()) {
1486 input >> firstName >> secondName >> dist;
1487 output << dist << '\t' << firstName << '\t' << secondName << endl;
1494 //sort using windows sort
1495 string tempOutfile = outfile + ".temp";
1496 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1497 system(command.c_str());
1499 //read in sorted file and put distance at end again
1502 openInputFile(tempOutfile, input2);
1503 openOutputFile(outfile, output2);
1505 while (!input2.eof()) {
1506 input2 >> dist >> firstName >> secondName;
1507 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1514 mothurRemove(tempDistFile);
1515 mothurRemove(tempOutfile);
1520 catch(exception& e) {
1521 errorOut(e, "MothurOut", "sortFile");
1525 /**************************************************************************************************/
1526 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1528 vector<unsigned long long> positions;
1530 //openInputFile(filename, inFASTA);
1531 inFASTA.open(filename.c_str(), ios::binary);
1534 unsigned long long count = 0;
1535 while(!inFASTA.eof()){
1536 //input = getline(inFASTA);
1537 //cout << input << '\t' << inFASTA.tellg() << endl;
1538 //if (input.length() != 0) {
1539 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1541 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1542 char c = inFASTA.get(); count++;
1544 positions.push_back(count-1);
1545 if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); }
1550 num = positions.size();
1551 if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
1553 unsigned long long size;
1555 //get num bytes in file
1556 pFile = fopen (filename.c_str(),"rb");
1557 if (pFile==NULL) perror ("Error opening file");
1559 fseek (pFile, 0, SEEK_END);
1564 /*unsigned long long size = positions[(positions.size()-1)];
1566 openInputFile(filename, in);
1571 if(in.eof()) { break; }
1576 if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
1578 positions.push_back(size);
1583 catch(exception& e) {
1584 errorOut(e, "MothurOut", "setFilePosFasta");
1588 //**********************************************************************************************************************
1589 vector<consTax> MothurOut::readConsTax(string inputfile){
1592 vector<consTax> taxes;
1595 openInputFile(inputfile, in);
1602 if (control_pressed) { break; }
1604 string otu = ""; string tax = "unknown";
1607 in >> otu >> size >> tax; gobble(in);
1608 consTax temp(otu, tax, size);
1609 taxes.push_back(temp);
1615 catch(exception& e) {
1616 errorOut(e, "MothurOut", "readConsTax");
1620 //**********************************************************************************************************************
1621 int MothurOut::readConsTax(string inputfile, map<string, consTax2>& taxes){
1624 openInputFile(inputfile, in);
1631 if (control_pressed) { break; }
1633 string otu = ""; string tax = "unknown";
1636 in >> otu >> size >> tax; gobble(in);
1637 consTax2 temp(tax, size);
1644 catch(exception& e) {
1645 errorOut(e, "MothurOut", "readConsTax");
1649 /**************************************************************************************************/
1650 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1652 filename = getFullPathName(filename);
1654 vector<unsigned long long> positions;
1656 //openInputFile(filename, in);
1657 in.open(filename.c_str(), ios::binary);
1660 unsigned long long count = 0;
1661 positions.push_back(0);
1664 //getline counting reads
1665 char d = in.get(); count++;
1666 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1667 //get next character
1673 d=in.get(); count++;
1674 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1676 positions.push_back(count-1);
1677 //cout << count-1 << endl;
1681 num = positions.size()-1;
1684 unsigned long long size;
1686 //get num bytes in file
1687 pFile = fopen (filename.c_str(),"rb");
1688 if (pFile==NULL) perror ("Error opening file");
1690 fseek (pFile, 0, SEEK_END);
1695 positions[(positions.size()-1)] = size;
1699 catch(exception& e) {
1700 errorOut(e, "MothurOut", "setFilePosEachLine");
1704 /**************************************************************************************************/
1706 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1708 vector<unsigned long long> filePos;
1709 filePos.push_back(0);
1712 unsigned long long size;
1714 filename = getFullPathName(filename);
1716 //get num bytes in file
1717 pFile = fopen (filename.c_str(),"rb");
1718 if (pFile==NULL) perror ("Error opening file");
1720 fseek (pFile, 0, SEEK_END);
1725 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1727 //estimate file breaks
1728 unsigned long long chunkSize = 0;
1729 chunkSize = size / proc;
1731 //file to small to divide by processors
1732 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1734 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1735 for (int i = 0; i < proc; i++) {
1736 unsigned long long spot = (i+1) * chunkSize;
1739 openInputFile(filename, in);
1743 unsigned long long newSpot = spot;
1747 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1748 else if (int(c) == -1) { break; }
1752 //there was not another sequence before the end of the file
1753 unsigned long long sanityPos = in.tellg();
1755 if (sanityPos == -1) { break; }
1756 else { filePos.push_back(newSpot); }
1762 filePos.push_back(size);
1764 //sanity check filePos
1765 for (int i = 0; i < (filePos.size()-1); i++) {
1766 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1769 proc = (filePos.size() - 1);
1771 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1773 filePos.push_back(size);
1777 catch(exception& e) {
1778 errorOut(e, "MothurOut", "divideFile");
1782 /**************************************************************************************************/
1784 vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
1786 vector<unsigned long long> filePos;
1787 filePos.push_back(0);
1790 unsigned long long size;
1792 filename = getFullPathName(filename);
1794 //get num bytes in file
1795 pFile = fopen (filename.c_str(),"rb");
1796 if (pFile==NULL) perror ("Error opening file");
1798 fseek (pFile, 0, SEEK_END);
1803 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1805 //estimate file breaks
1806 unsigned long long chunkSize = 0;
1807 chunkSize = size / proc;
1809 //file to small to divide by processors
1810 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1812 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1813 for (int i = 0; i < proc; i++) {
1814 unsigned long long spot = (i+1) * chunkSize;
1817 openInputFile(filename, in);
1820 //look for next line break
1821 unsigned long long newSpot = spot;
1825 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
1826 else if (int(c) == -1) { break; }
1829 //there was not another line before the end of the file
1830 unsigned long long sanityPos = in.tellg();
1832 if (sanityPos == -1) { break; }
1833 else { filePos.push_back(newSpot); }
1839 filePos.push_back(size);
1841 //sanity check filePos
1842 for (int i = 0; i < (filePos.size()-1); i++) {
1843 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1846 proc = (filePos.size() - 1);
1848 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1850 filePos.push_back(size);
1854 catch(exception& e) {
1855 errorOut(e, "MothurOut", "divideFile");
1859 /**************************************************************************************************/
1860 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1863 vector<unsigned long long> filePos = divideFile(filename, proc);
1865 for (int i = 0; i < (filePos.size()-1); i++) {
1869 openInputFile(filename, in);
1870 in.seekg(filePos[i]);
1871 unsigned long long size = filePos[(i+1)] - filePos[i];
1872 char* chunk = new char[size];
1873 in.read(chunk, size);
1877 string fileChunkName = filename + "." + toString(i) + ".tmp";
1879 openOutputFile(fileChunkName, out);
1881 out << chunk << endl;
1886 files.push_back(fileChunkName);
1891 catch(exception& e) {
1892 errorOut(e, "MothurOut", "divideFile");
1896 /***********************************************************************/
1898 bool MothurOut::isTrue(string f){
1901 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1903 if ((f == "TRUE") || (f == "T")) { return true; }
1904 else { return false; }
1906 catch(exception& e) {
1907 errorOut(e, "MothurOut", "isTrue");
1912 /***********************************************************************/
1914 float MothurOut::roundDist(float dist, int precision){
1916 return int(dist * precision + 0.5)/float(precision);
1918 catch(exception& e) {
1919 errorOut(e, "MothurOut", "roundDist");
1923 /***********************************************************************/
1925 float MothurOut::ceilDist(float dist, int precision){
1927 return int(ceil(dist * precision))/float(precision);
1929 catch(exception& e) {
1930 errorOut(e, "MothurOut", "ceilDist");
1934 /***********************************************************************/
1936 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1938 vector<string> pieces;
1940 for (int i = 0; i < size; i++) {
1941 if (!isspace(buffer[i])) { rest += buffer[i]; }
1943 if (rest != "") { pieces.push_back(rest); rest = ""; }
1944 while (i < size) { //gobble white space
1945 if (isspace(buffer[i])) { i++; }
1946 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1953 catch(exception& e) {
1954 errorOut(e, "MothurOut", "splitWhiteSpace");
1958 /***********************************************************************/
1959 vector<string> MothurOut::splitWhiteSpace(string input){
1961 vector<string> pieces;
1964 for (int i = 0; i < input.length(); i++) {
1965 if (!isspace(input[i])) { rest += input[i]; }
1967 if (rest != "") { pieces.push_back(rest); rest = ""; }
1968 while (i < input.length()) { //gobble white space
1969 if (isspace(input[i])) { i++; }
1970 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1975 if (rest != "") { pieces.push_back(rest); }
1979 catch(exception& e) {
1980 errorOut(e, "MothurOut", "splitWhiteSpace");
1984 /***********************************************************************/
1985 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
1987 vector<string> pieces;
1990 int pos = input.find('\'');
1991 int pos2 = input.find('\"');
1993 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
1995 for (int i = 0; i < input.length(); i++) {
1996 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
1998 for (int j = i+1; j < input.length(); j++) {
1999 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
2003 }else { rest += input[j]; }
2005 }else if (!isspace(input[i])) { rest += input[i]; }
2007 if (rest != "") { pieces.push_back(rest); rest = ""; }
2008 while (i < input.length()) { //gobble white space
2009 if (isspace(input[i])) { i++; }
2010 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
2015 if (rest != "") { pieces.push_back(rest); }
2019 catch(exception& e) {
2020 errorOut(e, "MothurOut", "splitWhiteSpace");
2024 //**********************************************************************************************************************
2025 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
2029 openInputFile(namefile, in);
2033 bool pairDone = false;
2034 bool columnOne = true;
2035 string firstCol, secondCol;
2039 if (control_pressed) { break; }
2041 in.read(buffer, 4096);
2042 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2044 for (int i = 0; i < pieces.size(); i++) {
2045 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2046 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2049 checkName(firstCol);
2050 //are there confidence scores, if so remove them
2051 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
2052 map<string, string>::iterator itTax = taxMap.find(firstCol);
2054 if(itTax == taxMap.end()) {
2055 bool ignore = false;
2056 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
2058 if (!ignore) { taxMap[firstCol] = secondCol; }
2059 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
2061 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique.\n"); error = true;
2070 vector<string> pieces = splitWhiteSpace(rest);
2072 for (int i = 0; i < pieces.size(); i++) {
2073 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2074 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2077 checkName(firstCol);
2078 //are there confidence scores, if so remove them
2079 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
2080 map<string, string>::iterator itTax = taxMap.find(firstCol);
2082 if(itTax == taxMap.end()) {
2083 bool ignore = false;
2084 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
2086 if (!ignore) { taxMap[firstCol] = secondCol; }
2087 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
2089 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); error = true;
2097 if (error) { control_pressed = true; }
2098 if (debug) { mothurOut("[DEBUG]: numSeqs saved = '" + toString(taxMap.size()) + "'\n"); }
2099 return taxMap.size();
2102 catch(exception& e) {
2103 errorOut(e, "MothurOut", "readTax");
2107 /**********************************************************************************************************************/
2108 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
2112 openInputFile(namefile, in);
2116 bool pairDone = false;
2117 bool columnOne = true;
2118 string firstCol, secondCol;
2121 if (control_pressed) { break; }
2123 in.read(buffer, 4096);
2124 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2126 for (int i = 0; i < pieces.size(); i++) {
2127 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2128 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2131 checkName(firstCol);
2132 checkName(secondCol);
2134 //parse names into vector
2135 vector<string> theseNames;
2136 splitAtComma(secondCol, theseNames);
2137 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2145 vector<string> pieces = splitWhiteSpace(rest);
2147 for (int i = 0; i < pieces.size(); i++) {
2148 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2149 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2152 checkName(firstCol);
2153 checkName(secondCol);
2155 //parse names into vector
2156 vector<string> theseNames;
2157 splitAtComma(secondCol, theseNames);
2158 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2164 return nameMap.size();
2167 catch(exception& e) {
2168 errorOut(e, "MothurOut", "readNames");
2172 /**********************************************************************************************************************/
2173 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
2177 openInputFile(namefile, in);
2181 bool pairDone = false;
2182 bool columnOne = true;
2183 string firstCol, secondCol;
2186 if (control_pressed) { break; }
2188 in.read(buffer, 4096);
2189 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2191 for (int i = 0; i < pieces.size(); i++) {
2192 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2193 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2196 checkName(firstCol);
2197 checkName(secondCol);
2198 nameMap[secondCol] = firstCol;
2206 vector<string> pieces = splitWhiteSpace(rest);
2208 for (int i = 0; i < pieces.size(); i++) {
2209 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2210 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2213 checkName(firstCol);
2214 checkName(secondCol);
2215 nameMap[secondCol] = firstCol;
2221 return nameMap.size();
2224 catch(exception& e) {
2225 errorOut(e, "MothurOut", "readNames");
2229 /**********************************************************************************************************************/
2230 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
2232 nameMap.clear(); nameCount.clear();
2235 openInputFile(namefile, in);
2239 bool pairDone = false;
2240 bool columnOne = true;
2241 string firstCol, secondCol;
2244 if (control_pressed) { break; }
2246 in.read(buffer, 4096);
2247 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2249 for (int i = 0; i < pieces.size(); i++) {
2250 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2251 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2254 checkName(firstCol);
2255 checkName(secondCol);
2256 //parse names into vector
2257 vector<string> theseNames;
2258 splitAtComma(secondCol, theseNames);
2259 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2260 nameCount[firstCol] = theseNames.size();
2268 vector<string> pieces = splitWhiteSpace(rest);
2270 for (int i = 0; i < pieces.size(); i++) {
2271 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2272 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2275 checkName(firstCol);
2276 checkName(secondCol);
2277 //parse names into vector
2278 vector<string> theseNames;
2279 splitAtComma(secondCol, theseNames);
2280 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2281 nameCount[firstCol] = theseNames.size();
2287 return nameMap.size();
2290 catch(exception& e) {
2291 errorOut(e, "MothurOut", "readNames");
2295 /**********************************************************************************************************************/
2296 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
2300 openInputFile(namefile, in);
2304 bool pairDone = false;
2305 bool columnOne = true;
2306 string firstCol, secondCol;
2309 if (control_pressed) { break; }
2311 in.read(buffer, 4096);
2312 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2314 for (int i = 0; i < pieces.size(); i++) {
2315 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2316 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2319 checkName(firstCol);
2320 checkName(secondCol);
2321 nameMap[firstCol] = secondCol; pairDone = false; }
2327 vector<string> pieces = splitWhiteSpace(rest);
2329 for (int i = 0; i < pieces.size(); i++) {
2330 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2331 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2334 checkName(firstCol);
2335 checkName(secondCol);
2336 nameMap[firstCol] = secondCol; pairDone = false; }
2340 return nameMap.size();
2343 catch(exception& e) {
2344 errorOut(e, "MothurOut", "readNames");
2348 /**********************************************************************************************************************/
2349 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
2353 openInputFile(namefile, in);
2357 bool pairDone = false;
2358 bool columnOne = true;
2359 string firstCol, secondCol;
2362 if (control_pressed) { break; }
2364 in.read(buffer, 4096);
2365 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2367 for (int i = 0; i < pieces.size(); i++) {
2368 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2369 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2372 checkName(firstCol);
2373 checkName(secondCol);
2374 vector<string> temp;
2375 splitAtComma(secondCol, temp);
2376 nameMap[firstCol] = temp;
2384 vector<string> pieces = splitWhiteSpace(rest);
2386 for (int i = 0; i < pieces.size(); i++) {
2387 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2388 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2391 checkName(firstCol);
2392 checkName(secondCol);
2393 vector<string> temp;
2394 splitAtComma(secondCol, temp);
2395 nameMap[firstCol] = temp;
2401 return nameMap.size();
2403 catch(exception& e) {
2404 errorOut(e, "MothurOut", "readNames");
2408 /**********************************************************************************************************************/
2409 map<string, int> MothurOut::readNames(string namefile) {
2411 map<string, int> nameMap;
2415 openInputFile(namefile, in);
2419 bool pairDone = false;
2420 bool columnOne = true;
2421 string firstCol, secondCol;
2424 if (control_pressed) { break; }
2426 in.read(buffer, 4096);
2427 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2429 for (int i = 0; i < pieces.size(); i++) {
2430 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2431 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2434 checkName(firstCol);
2435 checkName(secondCol);
2436 int num = getNumNames(secondCol);
2437 nameMap[firstCol] = num;
2445 vector<string> pieces = splitWhiteSpace(rest);
2446 for (int i = 0; i < pieces.size(); i++) {
2447 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2448 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2451 checkName(firstCol);
2452 checkName(secondCol);
2453 int num = getNumNames(secondCol);
2454 nameMap[firstCol] = num;
2463 catch(exception& e) {
2464 errorOut(e, "MothurOut", "readNames");
2468 /**********************************************************************************************************************/
2469 map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
2471 map<string, int> nameMap;
2476 openInputFile(namefile, in);
2480 bool pairDone = false;
2481 bool columnOne = true;
2482 string firstCol, secondCol;
2485 if (control_pressed) { break; }
2487 in.read(buffer, 4096);
2488 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2490 for (int i = 0; i < pieces.size(); i++) {
2491 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2492 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2495 checkName(firstCol);
2496 checkName(secondCol);
2497 int num = getNumNames(secondCol);
2498 nameMap[firstCol] = num;
2507 vector<string> pieces = splitWhiteSpace(rest);
2508 for (int i = 0; i < pieces.size(); i++) {
2509 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2510 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2513 checkName(firstCol);
2514 checkName(secondCol);
2515 int num = getNumNames(secondCol);
2516 nameMap[firstCol] = num;
2526 catch(exception& e) {
2527 errorOut(e, "MothurOut", "readNames");
2531 /************************************************************/
2532 int MothurOut::checkName(string& name) {
2535 for (int i = 0; i < name.length(); i++) {
2536 if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
2541 catch(exception& e) {
2542 errorOut(e, "MothurOut", "checkName");
2546 /**********************************************************************************************************************/
2547 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2553 openInputFile(namefile, in);
2557 bool pairDone = false;
2558 bool columnOne = true;
2559 string firstCol, secondCol;
2562 if (control_pressed) { break; }
2564 in.read(buffer, 4096);
2565 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2567 for (int i = 0; i < pieces.size(); i++) {
2568 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2569 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2572 checkName(firstCol);
2573 checkName(secondCol);
2574 int num = getNumNames(secondCol);
2576 map<string, string>::iterator it = fastamap.find(firstCol);
2577 if (it == fastamap.end()) {
2579 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2581 seqPriorityNode temp(num, it->second, firstCol);
2582 nameVector.push_back(temp);
2592 vector<string> pieces = splitWhiteSpace(rest);
2594 for (int i = 0; i < pieces.size(); i++) {
2595 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2596 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2599 checkName(firstCol);
2600 checkName(secondCol);
2601 int num = getNumNames(secondCol);
2603 map<string, string>::iterator it = fastamap.find(firstCol);
2604 if (it == fastamap.end()) {
2606 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2608 seqPriorityNode temp(num, it->second, firstCol);
2609 nameVector.push_back(temp);
2618 catch(exception& e) {
2619 errorOut(e, "MothurOut", "readNames");
2623 //**********************************************************************************************************************
2624 set<string> MothurOut::readAccnos(string accnosfile){
2628 openInputFile(accnosfile, in);
2635 if (control_pressed) { break; }
2637 in.read(buffer, 4096);
2638 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2640 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]);
2641 names.insert(pieces[i]);
2647 vector<string> pieces = splitWhiteSpace(rest);
2648 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2652 catch(exception& e) {
2653 errorOut(e, "MothurOut", "readAccnos");
2657 //**********************************************************************************************************************
2658 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2662 openInputFile(accnosfile, in);
2669 if (control_pressed) { break; }
2671 in.read(buffer, 4096);
2672 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2674 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2679 vector<string> pieces = splitWhiteSpace(rest);
2680 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2685 catch(exception& e) {
2686 errorOut(e, "MothurOut", "readAccnos");
2690 /***********************************************************************/
2692 int MothurOut::getNumNames(string names){
2698 for(int i=0;i<names.size();i++){
2699 if(names[i] == ','){
2707 catch(exception& e) {
2708 errorOut(e, "MothurOut", "getNumNames");
2712 /***********************************************************************/
2714 int MothurOut::getNumChar(string line, char c){
2719 for(int i=0;i<line.size();i++){
2728 catch(exception& e) {
2729 errorOut(e, "MothurOut", "getNumChar");
2733 /***********************************************************************/
2734 string MothurOut::getSimpleLabel(string label){
2738 //remove OTU or phylo tag
2739 string newLabel1 = "";
2740 for (int i = 0; i < label.length(); i++) {
2741 if(label[i]>47 && label[i]<58) { //is a digit
2742 newLabel1 += label[i];
2747 mothurConvert(newLabel1, num1);
2749 simple = toString(num1);
2753 catch(exception& e) {
2754 errorOut(e, "MothurOut", "isLabelEquivalent");
2758 /***********************************************************************/
2760 bool MothurOut::isLabelEquivalent(string label1, string label2){
2764 //remove OTU or phylo tag
2765 string newLabel1 = "";
2766 for (int i = 0; i < label1.length(); i++) {
2767 if(label1[i]>47 && label1[i]<58) { //is a digit
2768 newLabel1 += label1[i];
2772 string newLabel2 = "";
2773 for (int i = 0; i < label2.length(); i++) {
2774 if(label2[i]>47 && label2[i]<58) { //is a digit
2775 newLabel2 += label2[i];
2780 mothurConvert(newLabel1, num1);
2781 mothurConvert(newLabel2, num2);
2783 if (num1 == num2) { same = true; }
2787 catch(exception& e) {
2788 errorOut(e, "MothurOut", "isLabelEquivalent");
2792 //**********************************************************************************************************************
2793 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2797 if (subset.size() > bigset.size()) { return false; }
2799 //check if each guy in suset is also in bigset
2800 for (int i = 0; i < subset.size(); i++) {
2802 for (int j = 0; j < bigset.size(); j++) {
2803 if (subset[i] == bigset[j]) { match = true; break; }
2806 //you have a guy in subset that had no match in bigset
2807 if (match == false) { return false; }
2813 catch(exception& e) {
2814 errorOut(e, "MothurOut", "isSubset");
2818 /***********************************************************************/
2819 int MothurOut::mothurRemove(string filename){
2821 filename = getFullPathName(filename);
2822 int error = remove(filename.c_str());
2824 // if (errno != ENOENT) { //ENOENT == file does not exist
2825 // string message = "Error deleting file " + filename;
2826 // perror(message.c_str());
2831 catch(exception& e) {
2832 errorOut(e, "MothurOut", "mothurRemove");
2836 /***********************************************************************/
2837 bool MothurOut::mothurConvert(string item, int& num){
2841 if (isNumeric1(item)) {
2846 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2847 commandInputsConvertError = true;
2852 catch(exception& e) {
2853 errorOut(e, "MothurOut", "mothurConvert");
2857 /***********************************************************************/
2858 bool MothurOut::mothurConvert(string item, intDist& num){
2862 if (isNumeric1(item)) {
2867 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2868 commandInputsConvertError = true;
2873 catch(exception& e) {
2874 errorOut(e, "MothurOut", "mothurConvert");
2879 /***********************************************************************/
2880 bool MothurOut::isNumeric1(string stringToCheck){
2882 bool numeric = false;
2884 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2888 catch(exception& e) {
2889 errorOut(e, "MothurOut", "isNumeric1");
2894 /***********************************************************************/
2895 bool MothurOut::mothurConvert(string item, float& num){
2899 if (isNumeric1(item)) {
2904 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2905 commandInputsConvertError = true;
2910 catch(exception& e) {
2911 errorOut(e, "MothurOut", "mothurConvert");
2915 /***********************************************************************/
2916 bool MothurOut::mothurConvert(string item, double& num){
2920 if (isNumeric1(item)) {
2925 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2926 commandInputsConvertError = true;
2931 catch(exception& e) {
2932 errorOut(e, "MothurOut", "mothurConvert");
2936 /**************************************************************************************************/
2938 vector<vector<double> > MothurOut::binomial(int maxOrder){
2940 vector<vector<double> > binomial(maxOrder+1);
2942 for(int i=0;i<=maxOrder;i++){
2943 binomial[i].resize(maxOrder+1);
2952 for(int i=2;i<=maxOrder;i++){
2956 for(int i=2;i<=maxOrder;i++){
2957 for(int j=1;j<=maxOrder;j++){
2958 if(i==j){ binomial[i][j]=1; }
2959 if(j>i) { binomial[i][j]=0; }
2960 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2967 catch(exception& e) {
2968 errorOut(e, "MothurOut", "binomial");
2972 /**************************************************************************************************/
2973 unsigned int MothurOut::fromBase36(string base36){
2975 unsigned int num = 0;
2977 map<char, int> converts;
3042 while (i < base36.length()) {
3044 num = 36 * num + converts[c];
3051 catch(exception& e) {
3052 errorOut(e, "MothurOut", "fromBase36");
3056 /***********************************************************************/
3057 string MothurOut::findEdianness() {
3059 // find real endian type
3060 unsigned char EndianTest[2] = {1,0};
3061 short x = *(short *)EndianTest;
3063 string endianType = "unknown";
3064 if(x == 1) { endianType = "BIG_ENDIAN"; }
3065 else { endianType = "LITTLE_ENDIAN"; }
3069 catch(exception& e) {
3070 errorOut(e, "MothurOut", "findEdianness");
3074 /***********************************************************************/
3075 double MothurOut::median(vector<double> x) {
3079 if (x.size() == 0) { } //error
3081 //For example, if a < b < c, then the median of the list {a, b, c} is b, and, if a < b < c < d, then the median of the list {a, b, c, d} is the mean of b and c; i.e., it is (b + c)/2.
3082 sort(x.begin(), x.end());
3084 if ((x.size()%2) == 0) { //size() is even. median = average of 2 midpoints
3085 int midIndex1 = (x.size()/2)-1;
3086 int midIndex2 = (x.size()/2);
3087 value = (x[midIndex1]+ x[midIndex2]) / 2.0;
3089 int midIndex = (x.size()/2);
3090 value = x[midIndex];
3095 catch(exception& e) {
3096 errorOut(e, "MothurOut", "median");
3100 /***********************************************************************/
3101 int MothurOut::factorial(int num){
3105 for (int i = 1; i <= num; i++) {
3111 catch(exception& e) {
3112 errorOut(e, "MothurOut", "factorial");
3116 /***********************************************************************/
3118 int MothurOut::getNumSeqs(ifstream& file){
3120 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
3124 catch(exception& e) {
3125 errorOut(e, "MothurOut", "getNumSeqs");
3129 /***********************************************************************/
3130 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
3135 input = getline(file);
3136 if (input.length() != 0) {
3137 if(input[0] == '>'){ numSeqs++; }
3141 catch(exception& e) {
3142 errorOut(e, "MothurOut", "getNumSeqs");
3146 /***********************************************************************/
3147 bool MothurOut::checkLocations(string& filename, string inputDir){
3149 filename = getFullPathName(filename);
3153 ableToOpen = openInputFile(filename, in, "noerror");
3156 //if you can't open it, try input location
3157 if (ableToOpen == 1) {
3158 if (inputDir != "") { //default path is set
3159 string tryPath = inputDir + getSimpleName(filename);
3160 mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath); mothurOutEndLine();
3162 ableToOpen = openInputFile(tryPath, in2, "noerror");
3168 //if you can't open it, try default location
3169 if (ableToOpen == 1) {
3170 if (getDefaultPath() != "") { //default path is set
3171 string tryPath = getDefaultPath() + getSimpleName(filename);
3172 mothurOut("Unable to open " + filename + ". Trying default " + tryPath); mothurOutEndLine();
3174 ableToOpen = openInputFile(tryPath, in2, "noerror");
3180 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
3181 if (ableToOpen == 1) {
3182 string exepath = argv;
3183 string tempPath = exepath;
3184 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
3185 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
3187 string tryPath = getFullPathName(exepath) + getSimpleName(filename);
3188 mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath); mothurOutEndLine();
3190 ableToOpen = openInputFile(tryPath, in2, "noerror");
3195 if (ableToOpen == 1) { mothurOut("Unable to open " + filename + "."); mothurOutEndLine(); return false; }
3199 catch(exception& e) {
3200 errorOut(e, "MothurOut", "checkLocations");
3204 /***********************************************************************/
3206 //This function parses the estimator options and puts them in a vector
3207 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
3210 if (symbol == '-') { splitAtDash(estim, container); return; }
3212 string individual = "";
3213 int estimLength = estim.size();
3214 for(int i=0;i<estimLength;i++){
3215 if(estim[i] == symbol){
3216 container.push_back(individual);
3220 individual += estim[i];
3223 container.push_back(individual);
3226 catch(exception& e) {
3227 errorOut(e, "MothurOut", "splitAtChar");
3232 /***********************************************************************/
3234 //This function parses the estimator options and puts them in a vector
3235 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
3237 string individual = "";
3238 int estimLength = estim.size();
3239 bool prevEscape = false;
3240 /*for(int i=0;i<estimLength;i++){
3242 individual += estim[i];
3246 if(estim[i] == '\\'){
3249 else if(estim[i] == '-'){
3250 container.push_back(individual);
3255 individual += estim[i];
3262 for(int i=0;i<estimLength;i++){
3263 if(estim[i] == '-'){
3264 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3266 container.push_back(individual);
3269 }else if(estim[i] == '\\'){
3270 if (i < estimLength-1) {
3271 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3272 else { individual += estim[i]; prevEscape = false; } //if no, add in
3273 }else { individual += estim[i]; }
3275 individual += estim[i];
3281 container.push_back(individual);
3283 catch(exception& e) {
3284 errorOut(e, "MothurOut", "splitAtDash");
3289 /***********************************************************************/
3290 //This function parses the label options and puts them in a set
3291 void MothurOut::splitAtDash(string& estim, set<string>& container) {
3293 string individual = "";
3294 int estimLength = estim.size();
3295 bool prevEscape = false;
3297 for(int i=0;i<estimLength;i++){
3299 individual += estim[i];
3303 if(estim[i] == '\\'){
3306 else if(estim[i] == '-'){
3307 container.insert(individual);
3312 individual += estim[i];
3319 for(int i=0;i<estimLength;i++){
3320 if(estim[i] == '-'){
3321 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3323 container.insert(individual);
3326 }else if(estim[i] == '\\'){
3327 if (i < estimLength-1) {
3328 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3329 else { individual += estim[i]; prevEscape = false; } //if no, add in
3330 }else { individual += estim[i]; }
3332 individual += estim[i];
3335 container.insert(individual);
3338 catch(exception& e) {
3339 errorOut(e, "MothurOut", "splitAtDash");
3343 /***********************************************************************/
3344 //This function parses the line options and puts them in a set
3345 void MothurOut::splitAtDash(string& estim, set<int>& container) {
3347 string individual = "";
3349 int estimLength = estim.size();
3350 bool prevEscape = false;
3352 for(int i=0;i<estimLength;i++){
3354 individual += estim[i];
3358 if(estim[i] == '\\'){
3361 else if(estim[i] == '-'){
3362 convert(individual, lineNum); //convert the string to int
3363 container.insert(lineNum);
3368 individual += estim[i];
3374 for(int i=0;i<estimLength;i++){
3375 if(estim[i] == '-'){
3376 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3378 convert(individual, lineNum); //convert the string to int
3379 container.insert(lineNum);
3382 }else if(estim[i] == '\\'){
3383 if (i < estimLength-1) {
3384 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3385 else { individual += estim[i]; prevEscape = false; } //if no, add in
3386 }else { individual += estim[i]; }
3388 individual += estim[i];
3392 convert(individual, lineNum); //convert the string to int
3393 container.insert(lineNum);
3395 catch(exception& e) {
3396 errorOut(e, "MothurOut", "splitAtDash");
3401 /***********************************************************************/
3402 string MothurOut::makeList(vector<string>& names) {
3406 if (names.size() == 0) { return list; }
3408 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
3411 list += names[names.size()-1];
3415 catch(exception& e) {
3416 errorOut(e, "MothurOut", "makeList");
3421 /***********************************************************************/
3422 //This function parses the a string and puts peices in a vector
3423 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
3425 string individual = "";
3426 int estimLength = estim.size();
3427 for(int i=0;i<estimLength;i++){
3428 if(estim[i] == ','){
3429 container.push_back(individual);
3433 individual += estim[i];
3436 container.push_back(individual);
3441 // string individual;
3443 // while (estim.find_first_of(',') != -1) {
3444 // individual = estim.substr(0,estim.find_first_of(','));
3445 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
3446 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
3447 // container.push_back(individual);
3451 // container.push_back(estim);
3453 catch(exception& e) {
3454 errorOut(e, "MothurOut", "splitAtComma");
3458 /***********************************************************************/
3459 //This function splits up the various option parameters
3460 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
3462 prefix = suffix.substr(0,suffix.find_first_of(c));
3463 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3464 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
3466 while(suffix.at(0) == ' ')
3467 suffix = suffix.substr(1, suffix.length());
3468 }else { suffix = ""; }
3471 catch(exception& e) {
3472 errorOut(e, "MothurOut", "splitAtChar");
3477 /***********************************************************************/
3479 //This function splits up the various option parameters
3480 void MothurOut::splitAtComma(string& prefix, string& suffix){
3482 prefix = suffix.substr(0,suffix.find_first_of(','));
3483 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3484 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
3486 while(suffix.at(0) == ' ')
3487 suffix = suffix.substr(1, suffix.length());
3488 }else { suffix = ""; }
3491 catch(exception& e) {
3492 errorOut(e, "MothurOut", "splitAtComma");
3496 /***********************************************************************/
3498 //This function separates the key value from the option value i.e. dist=96_...
3499 void MothurOut::splitAtEquals(string& key, string& value){
3501 if(value.find_first_of('=') != -1){
3502 key = value.substr(0,value.find_first_of('='));
3503 if ((value.find_first_of('=')+1) <= value.length()) {
3504 value = value.substr(value.find_first_of('=')+1, value.length());
3511 catch(exception& e) {
3512 errorOut(e, "MothurOut", "splitAtEquals");
3517 /**************************************************************************************************/
3519 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
3521 for (int i = 0; i < Groups.size(); i++) {
3522 if (groupname == Groups[i]) { return true; }
3526 catch(exception& e) {
3527 errorOut(e, "MothurOut", "inUsersGroups");
3531 /**************************************************************************************************/
3533 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
3535 for (int i = 0; i < sets.size(); i++) {
3536 if (set == sets[i]) { return true; }
3540 catch(exception& e) {
3541 errorOut(e, "MothurOut", "inUsersGroups");
3545 /**************************************************************************************************/
3547 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
3549 for (int i = 0; i < Groups.size(); i++) {
3550 if (groupname == Groups[i]) { return true; }
3554 catch(exception& e) {
3555 errorOut(e, "MothurOut", "inUsersGroups");
3560 /**************************************************************************************************/
3561 //returns true if any of the strings in first vector are in second vector
3562 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
3565 for (int i = 0; i < groupnames.size(); i++) {
3566 if (inUsersGroups(groupnames[i], Groups)) { return true; }
3570 catch(exception& e) {
3571 errorOut(e, "MothurOut", "inUsersGroups");
3575 /***********************************************************************/
3576 //this function determines if the user has given us labels that are smaller than the given label.
3577 //if so then it returns true so that the calling function can run the previous valid distance.
3578 //it's a "smart" distance function. It also checks for invalid labels.
3579 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
3582 set<string>::iterator it;
3583 vector<float> orderFloat;
3584 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
3585 map<string, float>::iterator it2;
3587 bool smaller = false;
3589 //unique is the smallest line
3590 if (label == "unique") { return false; }
3592 if (convertTestFloat(label, labelFloat)) {
3593 convert(label, labelFloat);
3594 }else { //cant convert
3599 //go through users set and make them floats
3600 for(it = userLabels.begin(); it != userLabels.end();) {
3603 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
3605 orderFloat.push_back(temp);
3606 userMap[*it] = temp;
3608 }else if (*it == "unique") {
3609 orderFloat.push_back(-1.0);
3610 userMap["unique"] = -1.0;
3613 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
3614 userLabels.erase(it++);
3619 sort(orderFloat.begin(), orderFloat.end());
3621 /*************************************************/
3622 //is this label bigger than any of the users labels
3623 /*************************************************/
3625 //loop through order until you find a label greater than label
3626 for (int i = 0; i < orderFloat.size(); i++) {
3627 if (orderFloat[i] < labelFloat) {
3629 if (orderFloat[i] == -1) {
3630 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
3631 userLabels.erase("unique");
3634 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
3636 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
3637 if (it2->second == orderFloat[i]) {
3639 //remove small labels
3640 userLabels.erase(s);
3644 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
3646 //since they are sorted once you find a bigger one stop looking
3653 catch(exception& e) {
3654 errorOut(e, "MothurOut", "anyLabelsToProcess");
3659 /**************************************************************************************************/
3660 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
3665 string line = getline(file);
3667 //before we added this check
3668 if (line[0] != '#') { good = false; }
3671 line = line.substr(1);
3673 vector<string> versionVector;
3674 splitAtChar(version, versionVector, '.');
3676 //check file version
3677 vector<string> linesVector;
3678 splitAtChar(line, linesVector, '.');
3680 if (versionVector.size() != linesVector.size()) { good = false; }
3682 for (int j = 0; j < versionVector.size(); j++) {
3684 convert(versionVector[j], num1);
3685 convert(linesVector[j], num2);
3687 //if mothurs version is newer than this files version, then we want to remake it
3688 if (num1 > num2) { good = false; break; }
3694 if (!good) { file.close(); }
3695 else { file.seekg(0); }
3699 catch(exception& e) {
3700 errorOut(e, "MothurOut", "checkReleaseVersion");
3704 /**************************************************************************************************/
3705 vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
3707 vector<double> averages; //averages.resize(numComp, 0.0);
3708 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
3710 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
3711 for (int i = 0; i < dists[thisIter].size(); i++) {
3712 averages[i] += dists[thisIter][i];
3717 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
3721 catch(exception& e) {
3722 errorOut(e, "MothurOut", "getAverages");
3726 /**************************************************************************************************/
3727 double MothurOut::getAverage(vector<double> dists) {
3731 for (int i = 0; i < dists.size(); i++) {
3732 average += dists[i];
3736 average /= (double) dists.size();
3740 catch(exception& e) {
3741 errorOut(e, "MothurOut", "getAverage");
3746 /**************************************************************************************************/
3747 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
3750 vector<double> averages = getAverages(dists);
3752 //find standard deviation
3753 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3754 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3756 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3757 for (int j = 0; j < dists[thisIter].size(); j++) {
3758 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3761 for (int i = 0; i < stdDev.size(); i++) {
3762 stdDev[i] /= (double) dists.size();
3763 stdDev[i] = sqrt(stdDev[i]);
3768 catch(exception& e) {
3769 errorOut(e, "MothurOut", "getAverages");
3773 /**************************************************************************************************/
3774 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
3776 //find standard deviation
3777 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3778 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3780 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3781 for (int j = 0; j < dists[thisIter].size(); j++) {
3782 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3785 for (int i = 0; i < stdDev.size(); i++) {
3786 stdDev[i] /= (double) dists.size();
3787 stdDev[i] = sqrt(stdDev[i]);
3792 catch(exception& e) {
3793 errorOut(e, "MothurOut", "getStandardDeviation");
3797 /**************************************************************************************************/
3798 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
3801 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3802 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3803 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3804 vector<seqDist> temp;
3805 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3807 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3808 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3809 tempDist.dist = 0.0;
3810 temp.push_back(tempDist);
3812 calcAverages.push_back(temp);
3815 if (mode == "average") {
3816 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3817 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3818 for (int j = 0; j < calcAverages[i].size(); j++) {
3819 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3824 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3825 for (int j = 0; j < calcAverages[i].size(); j++) {
3826 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3829 }else { //find median
3830 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
3831 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
3832 vector<double> dists;
3833 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
3834 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
3836 sort(dists.begin(), dists.end());
3837 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
3842 return calcAverages;
3844 catch(exception& e) {
3845 errorOut(e, "MothurOut", "getAverages");
3849 /**************************************************************************************************/
3850 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3853 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3854 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3855 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3856 vector<seqDist> temp;
3857 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3859 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3860 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3861 tempDist.dist = 0.0;
3862 temp.push_back(tempDist);
3864 calcAverages.push_back(temp);
3868 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3869 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3870 for (int j = 0; j < calcAverages[i].size(); j++) {
3871 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3876 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3877 for (int j = 0; j < calcAverages[i].size(); j++) {
3878 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3882 return calcAverages;
3884 catch(exception& e) {
3885 errorOut(e, "MothurOut", "getAverages");
3889 /**************************************************************************************************/
3890 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3893 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
3895 //find standard deviation
3896 vector< vector<seqDist> > stdDev;
3897 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3898 vector<seqDist> temp;
3899 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3901 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3902 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3903 tempDist.dist = 0.0;
3904 temp.push_back(tempDist);
3906 stdDev.push_back(temp);
3909 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3910 for (int i = 0; i < stdDev.size(); i++) {
3911 for (int j = 0; j < stdDev[i].size(); j++) {
3912 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3917 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3918 for (int j = 0; j < stdDev[i].size(); j++) {
3919 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3920 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3926 catch(exception& e) {
3927 errorOut(e, "MothurOut", "getAverages");
3931 /**************************************************************************************************/
3932 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
3934 //find standard deviation
3935 vector< vector<seqDist> > stdDev;
3936 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3937 vector<seqDist> temp;
3938 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3940 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3941 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3942 tempDist.dist = 0.0;
3943 temp.push_back(tempDist);
3945 stdDev.push_back(temp);
3948 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3949 for (int i = 0; i < stdDev.size(); i++) {
3950 for (int j = 0; j < stdDev[i].size(); j++) {
3951 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3956 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3957 for (int j = 0; j < stdDev[i].size(); j++) {
3958 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3959 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3965 catch(exception& e) {
3966 errorOut(e, "MothurOut", "getAverages");
3971 /**************************************************************************************************/
3972 bool MothurOut::isContainingOnlyDigits(string input) {
3975 //are you a digit in ascii code
3976 for (int i = 0;i < input.length(); i++){
3977 if( input[i]>47 && input[i]<58){}
3978 else { return false; }
3983 catch(exception& e) {
3984 errorOut(e, "MothurOut", "isContainingOnlyDigits");
3988 /**************************************************************************************************/
3989 int MothurOut::removeConfidences(string& tax) {
3995 while (tax.find_first_of(';') != -1) {
3997 if (control_pressed) { return 0; }
4000 taxon = tax.substr(0,tax.find_first_of(';'));
4002 int pos = taxon.find_last_of('(');
4005 int pos2 = taxon.find_last_of(')');
4007 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
4008 if (isNumeric1(confidenceScore)) {
4009 taxon = taxon.substr(0, pos); //rip off confidence
4015 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
4023 catch(exception& e) {
4024 errorOut(e, "MothurOut", "removeConfidences");
4028 /**************************************************************************************************/
4029 string MothurOut::removeQuotes(string tax) {
4035 for (int i = 0; i < tax.length(); i++) {
4037 if (control_pressed) { return newTax; }
4039 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
4045 catch(exception& e) {
4046 errorOut(e, "MothurOut", "removeQuotes");
4050 /**************************************************************************************************/
4051 // function for calculating standard deviation
4052 double MothurOut::getStandardDeviation(vector<int>& featureVector){
4056 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
4057 average /= (double) featureVector.size();
4059 //find standard deviation
4061 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
4062 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
4065 stdDev /= (double) featureVector.size();
4066 stdDev = sqrt(stdDev);
4070 catch(exception& e) {
4071 errorOut(e, "MothurOut", "getStandardDeviation");
4075 /**************************************************************************************************/
4076 // returns largest value in vector
4077 double MothurOut::max(vector<double>& featureVector){
4079 if (featureVector.size() == 0) { mothurOut("[ERROR]: vector size = 0!\n"); control_pressed=true; return 0.0; }
4082 double largest = featureVector[0];
4083 for (int i = 1; i < featureVector.size(); i++) {
4084 if (featureVector[i] > largest) { largest = featureVector[i]; }
4089 catch(exception& e) {
4090 errorOut(e, "MothurOut", "max");
4094 /**************************************************************************************************/
4095 // returns smallest value in vector
4096 double MothurOut::min(vector<double>& featureVector){
4098 if (featureVector.size() == 0) { mothurOut("[ERROR]: vector size = 0!\n"); control_pressed=true; return 0.0; }
4101 double smallest = featureVector[0];
4102 for (int i = 1; i < featureVector.size(); i++) {
4103 if (featureVector[i] < smallest) { smallest = featureVector[i]; }
4108 catch(exception& e) {
4109 errorOut(e, "MothurOut", "min");
4113 /**************************************************************************************************/