5 * Created by westcott on 2/25/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "mothurout.h"
13 /******************************************************/
14 MothurOut* MothurOut::getInstance() {
15 if( _uniqueInstance == 0) {
16 _uniqueInstance = new MothurOut();
18 return _uniqueInstance;
20 /*********************************************************************************************/
21 set<string> MothurOut::getCurrentTypes() {
25 types.insert("fasta");
26 types.insert("summary");
27 types.insert("accnos");
28 types.insert("column");
29 types.insert("design");
30 types.insert("group");
33 types.insert("oligos");
34 types.insert("order");
35 types.insert("ordergroup");
36 types.insert("phylip");
37 types.insert("qfile");
38 types.insert("relabund");
39 types.insert("sabund");
40 types.insert("rabund");
42 types.insert("shared");
43 types.insert("taxonomy");
47 types.insert("count");
48 types.insert("processors");
53 errorOut(e, "MothurOut", "getCurrentTypes");
57 /*********************************************************************************************/
58 void MothurOut::printCurrentFiles() {
62 if (accnosfile != "") { mothurOut("accnos=" + accnosfile); mothurOutEndLine(); }
63 if (columnfile != "") { mothurOut("column=" + columnfile); mothurOutEndLine(); }
64 if (designfile != "") { mothurOut("design=" + designfile); mothurOutEndLine(); }
65 if (fastafile != "") { mothurOut("fasta=" + fastafile); mothurOutEndLine(); }
66 if (groupfile != "") { mothurOut("group=" + groupfile); mothurOutEndLine(); }
67 if (listfile != "") { mothurOut("list=" + listfile); mothurOutEndLine(); }
68 if (namefile != "") { mothurOut("name=" + namefile); mothurOutEndLine(); }
69 if (oligosfile != "") { mothurOut("oligos=" + oligosfile); mothurOutEndLine(); }
70 if (orderfile != "") { mothurOut("order=" + orderfile); mothurOutEndLine(); }
71 if (ordergroupfile != "") { mothurOut("ordergroup=" + ordergroupfile); mothurOutEndLine(); }
72 if (phylipfile != "") { mothurOut("phylip=" + phylipfile); mothurOutEndLine(); }
73 if (qualfile != "") { mothurOut("qfile=" + qualfile); mothurOutEndLine(); }
74 if (rabundfile != "") { mothurOut("rabund=" + rabundfile); mothurOutEndLine(); }
75 if (relabundfile != "") { mothurOut("relabund=" + relabundfile); mothurOutEndLine(); }
76 if (sabundfile != "") { mothurOut("sabund=" + sabundfile); mothurOutEndLine(); }
77 if (sfffile != "") { mothurOut("sff=" + sfffile); mothurOutEndLine(); }
78 if (sharedfile != "") { mothurOut("shared=" + sharedfile); mothurOutEndLine(); }
79 if (taxonomyfile != "") { mothurOut("taxonomy=" + taxonomyfile); mothurOutEndLine(); }
80 if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); }
81 if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); }
82 if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); }
83 if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); }
84 if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); }
85 if (summaryfile != "") { mothurOut("summary=" + summaryfile); mothurOutEndLine(); }
89 errorOut(e, "MothurOut", "printCurrentFiles");
93 /*********************************************************************************************/
94 bool MothurOut::hasCurrentFiles() {
96 bool hasCurrent = false;
98 if (accnosfile != "") { return true; }
99 if (columnfile != "") { return true; }
100 if (designfile != "") { return true; }
101 if (fastafile != "") { return true; }
102 if (groupfile != "") { return true; }
103 if (listfile != "") { return true; }
104 if (namefile != "") { return true; }
105 if (oligosfile != "") { return true; }
106 if (orderfile != "") { return true; }
107 if (ordergroupfile != "") { return true; }
108 if (phylipfile != "") { return true; }
109 if (qualfile != "") { return true; }
110 if (rabundfile != "") { return true; }
111 if (relabundfile != "") { return true; }
112 if (sabundfile != "") { return true; }
113 if (sfffile != "") { return true; }
114 if (sharedfile != "") { return true; }
115 if (taxonomyfile != "") { return true; }
116 if (treefile != "") { return true; }
117 if (flowfile != "") { return true; }
118 if (biomfile != "") { return true; }
119 if (counttablefile != "") { return true; }
120 if (summaryfile != "") { return true; }
121 if (processors != "1") { return true; }
126 catch(exception& e) {
127 errorOut(e, "MothurOut", "hasCurrentFiles");
132 /*********************************************************************************************/
133 void MothurOut::clearCurrentFiles() {
160 catch(exception& e) {
161 errorOut(e, "MothurOut", "clearCurrentFiles");
165 /***********************************************************************/
166 string MothurOut::findProgramPath(string programName){
169 string envPath = getenv("PATH");
172 //delimiting path char
174 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
180 //break apart path variable by ':'
182 splitAtChar(envPath, dirs, delim);
184 if (debug) { mothurOut("[DEBUG]: dir's in path: \n"); }
186 //get path related to mothur
187 for (int i = 0; i < dirs.size(); i++) {
189 if (debug) { mothurOut("[DEBUG]: " + dirs[i] + "\n"); }
191 //to lower so we can find it
192 string tempLower = "";
193 for (int j = 0; j < dirs[i].length(); j++) { tempLower += tolower(dirs[i][j]); }
195 //is this mothurs path?
196 if (tempLower.find(programName) != -1) { pPath = dirs[i]; break; }
199 if (debug) { mothurOut("[DEBUG]: programPath = " + pPath + "\n"); }
202 //add programName so it looks like what argv would look like
203 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
204 pPath += "/" + programName;
206 pPath += "\\" + programName;
209 //okay programName is not in the path, so the folder programName is in must be in the path
210 //lets find out which one
212 //get path related to the program
213 for (int i = 0; i < dirs.size(); i++) {
215 if (debug) { mothurOut("[DEBUG]: looking in " + dirs[i] + " for " + programName + " \n"); }
217 //is this the programs path?
219 string tempIn = dirs[i];
220 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
221 tempIn += "/" + programName;
223 tempIn += "\\" + programName;
225 openInputFile(tempIn, in, "");
227 //if this file exists
228 if (in) { in.close(); pPath = tempIn; if (debug) { mothurOut("[DEBUG]: found it, programPath = " + pPath + "\n"); } break; }
235 catch(exception& e) {
236 errorOut(e, "MothurOut", "findProgramPath");
240 /*********************************************************************************************/
241 void MothurOut::setFileName(string filename) {
243 logFileName = filename;
247 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
249 if (pid == 0) { //only one process should output to screen
252 openOutputFile(filename, out);
258 catch(exception& e) {
259 errorOut(e, "MothurOut", "setFileName");
263 /*********************************************************************************************/
264 void MothurOut::setDefaultPath(string pathname) {
267 //add / to name if needed
268 string lastChar = pathname.substr(pathname.length()-1);
269 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
270 if (lastChar != "/") { pathname += "/"; }
272 if (lastChar != "\\") { pathname += "\\"; }
275 defaultPath = pathname;
278 catch(exception& e) {
279 errorOut(e, "MothurOut", "setDefaultPath");
283 /*********************************************************************************************/
284 void MothurOut::setOutputDir(string pathname) {
286 outputDir = pathname;
288 catch(exception& e) {
289 errorOut(e, "MothurOut", "setOutputDir");
293 /*********************************************************************************************/
294 void MothurOut::closeLog() {
299 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
301 if (pid == 0) { //only one process should output to screen
310 catch(exception& e) {
311 errorOut(e, "MothurOut", "closeLog");
316 /*********************************************************************************************/
317 MothurOut::~MothurOut() {
322 catch(exception& e) {
323 errorOut(e, "MothurOut", "MothurOut");
327 /*********************************************************************************************/
328 void MothurOut::mothurOut(string output) {
333 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
335 if (pid == 0) { //only one process should output to screen
345 catch(exception& e) {
346 errorOut(e, "MothurOut", "MothurOut");
350 /*********************************************************************************************/
351 void MothurOut::debugOut(string output) {
354 mothurOut("[DEBUG]: "+output+"\n");
357 catch(exception& e) {
358 errorOut(e, "MothurOut", "MothurOut");
362 /*********************************************************************************************/
363 void MothurOut::debugOut(string output,ofstream& outputFile) {
366 mothurOut("[DEBUG]: "+output+"\n",outputFile);
369 catch(exception& e) {
370 errorOut(e,"MothurOut","MothurOut");
374 /*********************************************************************************************/
375 void MothurOut::mothurOutJustToScreen(string output) {
380 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
382 if (pid == 0) { //only one process should output to screen
390 catch(exception& e) {
391 errorOut(e, "MothurOut", "MothurOut");
395 /*********************************************************************************************/
396 void MothurOut::mothurOutEndLine() {
400 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
402 if (pid == 0) { //only one process should output to screen
412 catch(exception& e) {
413 errorOut(e, "MothurOut", "MothurOutEndLine");
417 /*********************************************************************************************/
418 void MothurOut::mothurOut(string output, ofstream& outputFile) {
423 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
425 if (pid == 0) { //only one process should output to screen
430 outputFile << output;
438 catch(exception& e) {
439 errorOut(e, "MothurOut", "MothurOut");
443 /*********************************************************************************************/
444 void MothurOut::mothurOutEndLine(ofstream& outputFile) {
448 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
450 if (pid == 0) { //only one process should output to screen
461 catch(exception& e) {
462 errorOut(e, "MothurOut", "MothurOutEndLine");
466 /*********************************************************************************************/
467 void MothurOut::mothurOutJustToLog(string output) {
471 MPI_Comm_rank(MPI_COMM_WORLD, &pid);
473 if (pid == 0) { //only one process should output to screen
482 catch(exception& e) {
483 errorOut(e, "MothurOut", "MothurOutJustToLog");
487 /*********************************************************************************************/
488 void MothurOut::errorOut(exception& e, string object, string function) {
490 //mem_usage(vm, rss);
492 string errorType = toString(e.what());
494 int pos = errorType.find("bad_alloc");
495 mothurOut("[ERROR]: ");
496 mothurOut(errorType);
498 if (pos == string::npos) { //not bad_alloc
499 mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
502 if (object == "cluster"){
503 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
504 }else if (object == "shhh.flows"){
505 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
507 mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
511 /*********************************************************************************************/
512 //The following was originally from http://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
513 // process_mem_usage(double &, double &) - takes two doubles by reference,
514 // attempts to read the system-dependent data for a process' virtual memory
515 // size and resident set size, and return the results in KB.
517 // On failure, returns 0.0, 0.0
518 int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
519 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
524 // 'file' stat seems to give the most reliable results
526 ifstream stat_stream("/proc/self/stat",ios_base::in);
528 // dummy vars for leading entries in stat that we don't care about
530 string pid, comm, state, ppid, pgrp, session, tty_nr;
531 string tpgid, flags, minflt, cminflt, majflt, cmajflt;
532 string utime, stime, cutime, cstime, priority, nice;
533 string O, itrealvalue, starttime;
535 // the two fields we want
540 stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
541 >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
542 >> utime >> stime >> cutime >> cstime >> priority >> nice
543 >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
545 long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
546 vm_usage = vsize / 1024.0;
547 resident_set = rss * page_size_kb;
549 mothurOut("Memory Usage: vm = " + toString(vm_usage) + " rss = " + toString(resident_set) + "\n");
553 /* //windows memory usage
554 // Get the list of process identifiers.
555 DWORD aProcesses[1024], cbNeeded, cProcesses;
557 if ( !EnumProcesses( aProcesses, sizeof(aProcesses), &cbNeeded ) ){ return 1; }
559 // Calculate how many process identifiers were returned.
560 cProcesses = cbNeeded / sizeof(DWORD);
562 // Print the memory usage for each process
563 for (int i = 0; i < cProcesses; i++ ) {
564 DWORD processID = aProcesses[i];
566 PROCESS_MEMORY_COUNTERS pmc;
568 HANDLE hProcess = OpenProcess((PROCESS_QUERY_INFORMATION | PROCESS_VM_READ), FALSE, processID);
570 // Print the process identifier.
571 printf( "\nProcess ID: %u\n", processID);
573 if (NULL != hProcess) {
575 if ( GetProcessMemoryInfo( hProcess, &pmc, sizeof(pmc)) ) {
576 printf( "\tPageFaultCount: 0x%08X\n", pmc.PageFaultCount );
577 printf( "\tPeakWorkingSetSize: 0x%08X\n", pmc.PeakWorkingSetSize );
578 printf( "\tWorkingSetSize: 0x%08X\n", pmc.WorkingSetSize );
579 printf( "\tQuotaPeakPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakPagedPoolUsage );
580 printf( "\tQuotaPagedPoolUsage: 0x%08X\n", pmc.QuotaPagedPoolUsage );
581 printf( "\tQuotaPeakNonPagedPoolUsage: 0x%08X\n", pmc.QuotaPeakNonPagedPoolUsage );
582 printf( "\tQuotaNonPagedPoolUsage: 0x%08X\n", pmc.QuotaNonPagedPoolUsage );
583 printf( "\tPagefileUsage: 0x%08X\n", pmc.PagefileUsage );
584 printf( "\tPeakPagefileUsage: 0x%08X\n", pmc.PeakPagefileUsage );
586 CloseHandle(hProcess);
596 /***********************************************************************/
597 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
599 fileName = getFullPathName(fileName);
601 fileHandle.open(fileName.c_str(), ios::app);
603 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
610 catch(exception& e) {
611 errorOut(e, "MothurOut", "openOutputFileAppend");
615 /***********************************************************************/
616 int MothurOut::openOutputFileBinaryAppend(string fileName, ofstream& fileHandle){
618 fileName = getFullPathName(fileName);
620 fileHandle.open(fileName.c_str(), ios::app | ios::binary);
622 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
629 catch(exception& e) {
630 errorOut(e, "MothurOut", "openOutputFileAppend");
635 /***********************************************************************/
636 void MothurOut::gobble(istream& f){
640 while(isspace(d=f.get())) { ;}
641 if(!f.eof()) { f.putback(d); }
643 catch(exception& e) {
644 errorOut(e, "MothurOut", "gobble");
648 /***********************************************************************/
649 void MothurOut::gobble(istringstream& f){
652 while(isspace(d=f.get())) {;}
653 if(!f.eof()) { f.putback(d); }
655 catch(exception& e) {
656 errorOut(e, "MothurOut", "gobble");
661 /***********************************************************************/
663 string MothurOut::getline(istringstream& fileHandle) {
668 while (!fileHandle.eof()) {
670 char c = fileHandle.get();
672 //are you at the end of the line
673 if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
680 catch(exception& e) {
681 errorOut(e, "MothurOut", "getline");
685 /***********************************************************************/
687 string MothurOut::getline(ifstream& fileHandle) {
694 char c = fileHandle.get();
696 //are you at the end of the line
697 if ((c == '\n') || (c == '\r') || (c == '\f') || (c == EOF)){ break; }
704 catch(exception& e) {
705 errorOut(e, "MothurOut", "getline");
709 /***********************************************************************/
711 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
712 #ifdef USE_COMPRESSION
713 inline bool endsWith(string s, const char * suffix){
714 size_t suffixLength = strlen(suffix);
715 return s.size() >= suffixLength && s.substr(s.size() - suffixLength, suffixLength).compare(suffix) == 0;
720 string MothurOut::getRootName(string longName){
723 string rootName = longName;
725 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
726 #ifdef USE_COMPRESSION
727 if (endsWith(rootName, ".gz") || endsWith(rootName, ".bz2")) {
728 int pos = rootName.find_last_of('.');
729 rootName = rootName.substr(0, pos);
730 cerr << "shortening " << longName << " to " << rootName << "\n";
734 if(rootName.find_last_of(".") != rootName.npos){
735 int pos = rootName.find_last_of('.')+1;
736 rootName = rootName.substr(0, pos);
741 catch(exception& e) {
742 errorOut(e, "MothurOut", "getRootName");
746 /***********************************************************************/
748 string MothurOut::getSimpleName(string longName){
750 string simpleName = longName;
753 found=longName.find_last_of("/\\");
755 if(found != longName.npos){
756 simpleName = longName.substr(found+1);
761 catch(exception& e) {
762 errorOut(e, "MothurOut", "getSimpleName");
767 /***********************************************************************/
769 int MothurOut::getRandomIndex(int highest){
772 int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
776 catch(exception& e) {
777 errorOut(e, "MothurOut", "getRandomIndex");
782 /**********************************************************************/
784 string MothurOut::getPathName(string longName){
786 string rootPathName = longName;
788 if(longName.find_last_of("/\\") != longName.npos){
789 int pos = longName.find_last_of("/\\")+1;
790 rootPathName = longName.substr(0, pos);
795 catch(exception& e) {
796 errorOut(e, "MothurOut", "getPathName");
801 /***********************************************************************/
803 bool MothurOut::dirCheck(string& dirName){
806 if (dirName == "") { return false; }
811 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
816 //add / to name if needed
817 string lastChar = dirName.substr(dirName.length()-1);
818 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
819 if (lastChar != "/") { dirName += "/"; }
821 if (lastChar != "\\") { dirName += "\\"; }
824 //test to make sure directory exists
825 dirName = getFullPathName(dirName);
826 string outTemp = dirName + tag + "temp"+ toString(time(NULL));
828 out.open(outTemp.c_str(), ios::trunc);
830 mothurOut(dirName + " directory does not exist or is not writable."); mothurOutEndLine();
833 mothurRemove(outTemp);
839 catch(exception& e) {
840 errorOut(e, "MothurOut", "dirCheck");
845 //**********************************************************************************************************************
847 map<string, vector<string> > MothurOut::parseClasses(string classes){
849 map<string, vector<string> > parts;
851 //treatment<Early|Late>-age<young|old>
852 vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
854 for (int i = 0; i < pieces.size(); i++) {
855 string category = ""; string value = "";
856 bool foundOpen = false;
857 for (int j = 0; j < pieces[i].length(); j++) {
858 if (control_pressed) { return parts; }
860 if (pieces[i][j] == '<') { foundOpen = true; }
861 else if (pieces[i][j] == '>') { j += pieces[i].length(); }
863 if (!foundOpen) { category += pieces[i][j]; }
864 else { value += pieces[i][j]; }
867 vector<string> values; splitAtChar(value, values, '|');
868 parts[category] = values;
873 catch(exception& e) {
874 errorOut(e, "MothurOut", "parseClasses");
878 /***********************************************************************/
880 string MothurOut::hasPath(string longName){
885 found=longName.find_last_of("~/\\");
887 if(found != longName.npos){
888 path = longName.substr(0, found+1);
893 catch(exception& e) {
894 errorOut(e, "MothurOut", "hasPath");
899 /***********************************************************************/
901 string MothurOut::getExtension(string longName){
903 string extension = "";
905 if(longName.find_last_of('.') != longName.npos){
906 int pos = longName.find_last_of('.');
907 extension = longName.substr(pos, longName.length());
912 catch(exception& e) {
913 errorOut(e, "MothurOut", "getExtension");
917 /***********************************************************************/
918 bool MothurOut::isBlank(string fileName){
921 fileName = getFullPathName(fileName);
924 fileHandle.open(fileName.c_str());
926 mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
929 //check for blank file
931 if (fileHandle.eof()) { fileHandle.close(); return true; }
936 catch(exception& e) {
937 errorOut(e, "MothurOut", "isBlank");
941 /***********************************************************************/
943 string MothurOut::getFullPathName(string fileName){
946 string path = hasPath(fileName);
950 if (path == "") { return fileName; } //its a simple name
951 else { //we need to complete the pathname
952 // ex. ../../../filename
953 // cwd = /user/work/desktop
956 //get current working directory
957 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
959 if (path.find("~") != -1) { //go to home directory
962 char *homepath = NULL;
963 homepath = getenv ("HOME");
964 if ( homepath != NULL) { homeDir = homepath; }
965 else { homeDir = ""; }
967 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
970 if (path.rfind("./") == string::npos) { return fileName; } //already complete name
971 else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
973 //char* cwdpath = new char[1024];
975 //cwdpath=getcwd(cwdpath,size);
978 char *cwdpath = NULL;
979 cwdpath = getcwd(NULL, 0); // or _getcwd
980 if ( cwdpath != NULL) { cwd = cwdpath; }
986 if (cwd.length() > 0) { simpleCWD = cwd.substr(1); }
988 //break apart the current working directory
990 while (simpleCWD.find_first_of('/') != string::npos) {
991 string dir = simpleCWD.substr(0,simpleCWD.find_first_of('/'));
992 simpleCWD = simpleCWD.substr(simpleCWD.find_first_of('/')+1, simpleCWD.length());
995 //get last one // ex. ../../../filename = /user/work/desktop/filename
996 dirs.push_back(simpleCWD); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
999 int index = dirs.size()-1;
1001 while((pos = path.rfind("./")) != string::npos) { //while you don't have a complete path
1002 if (pos == 0) { break; //you are at the end
1003 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
1004 path = path.substr(0, pos-1);
1006 if (index == 0) { break; }
1007 }else if (path[(pos-1)] == '/') { //you want the current working dir ./
1008 path = path.substr(0, pos);
1009 }else if (pos == 1) { break; //you are at the end
1010 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
1013 for (int i = index; i >= 0; i--) {
1014 newFileName = dirs[i] + "/" + newFileName;
1017 newFileName = "/" + newFileName;
1021 if (path.find("~") != string::npos) { //go to home directory
1022 string homeDir = getenv ("HOMEPATH");
1023 newFileName = homeDir + fileName.substr(fileName.find("~")+1);
1026 if (path.rfind(".\\") == string::npos) { return fileName; } //already complete name
1027 else { newFileName = fileName.substr(fileName.rfind(".\\")+2); } //save the complete part of the name
1029 char *cwdpath = NULL;
1030 cwdpath = getcwd(NULL, 0); // or _getcwd
1031 if ( cwdpath != NULL) { cwd = cwdpath; }
1034 //break apart the current working directory
1035 vector<string> dirs;
1036 while (cwd.find_first_of('\\') != -1) {
1037 string dir = cwd.substr(0,cwd.find_first_of('\\'));
1038 cwd = cwd.substr(cwd.find_first_of('\\')+1, cwd.length());
1039 dirs.push_back(dir);
1043 dirs.push_back(cwd); //ex. dirs[0] = user, dirs[1] = work, dirs[2] = desktop
1045 int index = dirs.size()-1;
1047 while((pos = path.rfind(".\\")) != string::npos) { //while you don't have a complete path
1048 if (pos == 0) { break; //you are at the end
1049 }else if (path[(pos-1)] == '.') { //you want your parent directory ../
1050 path = path.substr(0, pos-1);
1052 if (index == 0) { break; }
1053 }else if (path[(pos-1)] == '\\') { //you want the current working dir ./
1054 path = path.substr(0, pos);
1055 }else if (pos == 1) { break; //you are at the end
1056 }else { mothurOut("cannot resolve path for " + fileName + "\n"); return fileName; }
1059 for (int i = index; i >= 0; i--) {
1060 newFileName = dirs[i] + "\\" + newFileName;
1069 catch(exception& e) {
1070 errorOut(e, "MothurOut", "getFullPathName");
1074 /***********************************************************************/
1076 int MothurOut::openInputFile(string fileName, ifstream& fileHandle, string m){
1078 //get full path name
1079 string completeFileName = getFullPathName(fileName);
1080 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1081 #ifdef USE_COMPRESSION
1082 // check for gzipped or bzipped file
1083 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1084 string tempName = string(tmpnam(0));
1085 mkfifo(tempName.c_str(), 0666);
1086 int fork_result = fork();
1087 if (fork_result < 0) {
1088 cerr << "Error forking.\n";
1090 } else if (fork_result == 0) {
1091 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1092 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1093 system(command.c_str());
1094 cerr << "Done decompressing " << completeFileName << "\n";
1095 mothurRemove(tempName);
1098 cerr << "waiting on child process " << fork_result << "\n";
1099 completeFileName = tempName;
1104 fileHandle.open(completeFileName.c_str());
1106 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1109 //check for blank file
1114 catch(exception& e) {
1115 errorOut(e, "MothurOut", "openInputFile - no Error");
1119 /***********************************************************************/
1121 int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
1124 //get full path name
1125 string completeFileName = getFullPathName(fileName);
1126 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1127 #ifdef USE_COMPRESSION
1128 // check for gzipped or bzipped file
1129 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1130 string tempName = string(tmpnam(0));
1131 mkfifo(tempName.c_str(), 0666);
1132 int fork_result = fork();
1133 if (fork_result < 0) {
1134 cerr << "Error forking.\n";
1136 } else if (fork_result == 0) {
1137 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1138 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1139 system(command.c_str());
1140 cerr << "Done decompressing " << completeFileName << "\n";
1141 mothurRemove(tempName);
1144 cerr << "waiting on child process " << fork_result << "\n";
1145 completeFileName = tempName;
1151 fileHandle.open(completeFileName.c_str());
1153 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1157 //check for blank file
1159 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1164 catch(exception& e) {
1165 errorOut(e, "MothurOut", "openInputFile");
1169 /***********************************************************************/
1170 int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle){
1173 //get full path name
1174 string completeFileName = getFullPathName(fileName);
1175 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1176 #ifdef USE_COMPRESSION
1177 // check for gzipped or bzipped file
1178 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1179 string tempName = string(tmpnam(0));
1180 mkfifo(tempName.c_str(), 0666);
1181 int fork_result = fork();
1182 if (fork_result < 0) {
1183 cerr << "Error forking.\n";
1185 } else if (fork_result == 0) {
1186 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1187 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1188 system(command.c_str());
1189 cerr << "Done decompressing " << completeFileName << "\n";
1190 mothurRemove(tempName);
1193 cerr << "waiting on child process " << fork_result << "\n";
1194 completeFileName = tempName;
1200 fileHandle.open(completeFileName.c_str(), ios::binary);
1202 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1206 //check for blank file
1208 if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1213 catch(exception& e) {
1214 errorOut(e, "MothurOut", "openInputFileBinary");
1218 /***********************************************************************/
1219 int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle, string noerror){
1222 //get full path name
1223 string completeFileName = getFullPathName(fileName);
1224 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1225 #ifdef USE_COMPRESSION
1226 // check for gzipped or bzipped file
1227 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1228 string tempName = string(tmpnam(0));
1229 mkfifo(tempName.c_str(), 0666);
1230 int fork_result = fork();
1231 if (fork_result < 0) {
1232 cerr << "Error forking.\n";
1234 } else if (fork_result == 0) {
1235 string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
1236 cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1237 system(command.c_str());
1238 cerr << "Done decompressing " << completeFileName << "\n";
1239 mothurRemove(tempName);
1242 cerr << "waiting on child process " << fork_result << "\n";
1243 completeFileName = tempName;
1249 fileHandle.open(completeFileName.c_str(), ios::binary);
1251 //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1255 //check for blank file
1257 //if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
1262 catch(exception& e) {
1263 errorOut(e, "MothurOut", "openInputFileBinary - no error");
1268 /***********************************************************************/
1270 int MothurOut::renameFile(string oldName, string newName){
1273 if (oldName == newName) { return 0; }
1276 int exist = openInputFile(newName, inTest, "");
1279 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1280 if (exist == 0) { //you could open it so you want to delete it
1281 string command = "rm " + newName;
1282 system(command.c_str());
1285 string command = "mv " + oldName + " " + newName;
1286 system(command.c_str());
1288 mothurRemove(newName);
1289 int renameOk = rename(oldName.c_str(), newName.c_str());
1294 catch(exception& e) {
1295 errorOut(e, "MothurOut", "renameFile");
1300 /***********************************************************************/
1302 int MothurOut::openOutputFile(string fileName, ofstream& fileHandle){
1305 string completeFileName = getFullPathName(fileName);
1306 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1307 #ifdef USE_COMPRESSION
1308 // check for gzipped file
1309 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1310 string tempName = string(tmpnam(0));
1311 mkfifo(tempName.c_str(), 0666);
1312 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1313 int fork_result = fork();
1314 if (fork_result < 0) {
1315 cerr << "Error forking.\n";
1317 } else if (fork_result == 0) {
1318 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1319 system(command.c_str());
1322 completeFileName = tempName;
1327 fileHandle.open(completeFileName.c_str(), ios::trunc);
1329 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1336 catch(exception& e) {
1337 errorOut(e, "MothurOut", "openOutputFile");
1342 /***********************************************************************/
1344 int MothurOut::openOutputFileBinary(string fileName, ofstream& fileHandle){
1347 string completeFileName = getFullPathName(fileName);
1348 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1349 #ifdef USE_COMPRESSION
1350 // check for gzipped file
1351 if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
1352 string tempName = string(tmpnam(0));
1353 mkfifo(tempName.c_str(), 0666);
1354 cerr << "Compressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
1355 int fork_result = fork();
1356 if (fork_result < 0) {
1357 cerr << "Error forking.\n";
1359 } else if (fork_result == 0) {
1360 string command = string(endsWith(completeFileName, ".gz") ? "gzip" : "bzip2") + " -v > " + completeFileName + string(" < ") + tempName;
1361 system(command.c_str());
1364 completeFileName = tempName;
1369 fileHandle.open(completeFileName.c_str(), ios::trunc | ios::binary);
1371 mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
1378 catch(exception& e) {
1379 errorOut(e, "MothurOut", "openOutputFileBinary");
1384 /**************************************************************************************************/
1385 int MothurOut::appendFiles(string temp, string filename) {
1390 //open output file in append mode
1391 openOutputFileAppend(filename, output);
1392 int ableToOpen = openInputFile(temp, input, "no error");
1393 //int ableToOpen = openInputFile(temp, input);
1396 if (ableToOpen == 0) { //you opened it
1399 while (!input.eof()) {
1400 input.read(buffer, 4096);
1401 output.write(buffer, input.gcount());
1402 //count number of lines
1403 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1412 catch(exception& e) {
1413 errorOut(e, "MothurOut", "appendFiles");
1417 /**************************************************************************************************/
1418 int MothurOut::appendBinaryFiles(string temp, string filename) {
1423 //open output file in append mode
1424 openOutputFileBinaryAppend(filename, output);
1425 int ableToOpen = openInputFileBinary(temp, input, "no error");
1427 if (ableToOpen == 0) { //you opened it
1430 while (!input.eof()) {
1431 input.read(buffer, 4096);
1432 output.write(buffer, input.gcount());
1441 catch(exception& e) {
1442 errorOut(e, "MothurOut", "appendBinaryFiles");
1447 /**************************************************************************************************/
1448 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
1453 //open output file in append mode
1454 openOutputFileAppend(filename, output);
1455 int ableToOpen = openInputFile(temp, input, "no error");
1456 //int ableToOpen = openInputFile(temp, input);
1459 if (ableToOpen == 0) { //you opened it
1461 string headers = getline(input); gobble(input);
1462 if (debug) { mothurOut("[DEBUG]: skipping headers " + headers +'\n'); }
1465 while (!input.eof()) {
1466 input.read(buffer, 4096);
1467 output.write(buffer, input.gcount());
1468 //count number of lines
1469 for (int i = 0; i < input.gcount(); i++) { if (buffer[i] == '\n') {numLines++;} }
1478 catch(exception& e) {
1479 errorOut(e, "MothurOut", "appendFiles");
1483 /**************************************************************************************************/
1484 string MothurOut::sortFile(string distFile, string outputDir){
1487 //if (outputDir == "") { outputDir += hasPath(distFile); }
1488 string outfile = getRootName(distFile) + "sorted.dist";
1491 //if you can, use the unix sort since its been optimized for years
1492 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1493 string command = "sort -n -k +3 " + distFile + " -o " + outfile;
1494 system(command.c_str());
1495 #else //you are stuck with my best attempt...
1496 //windows sort does not have a way to specify a column, only a character in the line
1497 //since we cannot assume that the distance will always be at the the same character location on each line
1498 //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
1500 //read in file line by file and put distance first
1501 string tempDistFile = distFile + ".temp";
1504 openInputFile(distFile, input);
1505 openOutputFile(tempDistFile, output);
1507 string firstName, secondName;
1509 while (!input.eof()) {
1510 input >> firstName >> secondName >> dist;
1511 output << dist << '\t' << firstName << '\t' << secondName << endl;
1518 //sort using windows sort
1519 string tempOutfile = outfile + ".temp";
1520 string command = "sort " + tempDistFile + " /O " + tempOutfile;
1521 system(command.c_str());
1523 //read in sorted file and put distance at end again
1526 openInputFile(tempOutfile, input2);
1527 openOutputFile(outfile, output2);
1529 while (!input2.eof()) {
1530 input2 >> dist >> firstName >> secondName;
1531 output2 << firstName << '\t' << secondName << '\t' << dist << endl;
1538 mothurRemove(tempDistFile);
1539 mothurRemove(tempOutfile);
1544 catch(exception& e) {
1545 errorOut(e, "MothurOut", "sortFile");
1549 /**************************************************************************************************/
1550 vector<unsigned long long> MothurOut::setFilePosFasta(string filename, int& num) {
1552 vector<unsigned long long> positions;
1554 //openInputFile(filename, inFASTA);
1555 inFASTA.open(filename.c_str(), ios::binary);
1558 unsigned long long count = 0;
1559 while(!inFASTA.eof()){
1560 //input = getline(inFASTA);
1561 //cout << input << '\t' << inFASTA.tellg() << endl;
1562 //if (input.length() != 0) {
1563 // if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); cout << (pos - input.length() - 1) << endl; }
1565 //gobble(inFASTA); //has to be here since windows line endings are 2 characters and mess up the positions
1566 char c = inFASTA.get(); count++;
1568 positions.push_back(count-1);
1569 if (debug) { mothurOut("[DEBUG]: numSeqs = " + toString(positions.size()) + " count = " + toString(count) + ".\n"); }
1574 num = positions.size();
1575 if (debug) { mothurOut("[DEBUG]: num = " + toString(num) + ".\n"); }
1577 unsigned long long size;
1579 //get num bytes in file
1580 pFile = fopen (filename.c_str(),"rb");
1581 if (pFile==NULL) perror ("Error opening file");
1583 fseek (pFile, 0, SEEK_END);
1588 /*unsigned long long size = positions[(positions.size()-1)];
1590 openInputFile(filename, in);
1595 if(in.eof()) { break; }
1600 if (debug) { mothurOut("[DEBUG]: size = " + toString(size) + ".\n"); }
1602 positions.push_back(size);
1607 catch(exception& e) {
1608 errorOut(e, "MothurOut", "setFilePosFasta");
1612 //**********************************************************************************************************************
1613 vector<consTax> MothurOut::readConsTax(string inputfile){
1616 vector<consTax> taxes;
1619 openInputFile(inputfile, in);
1626 if (control_pressed) { break; }
1628 string otu = ""; string tax = "unknown";
1631 in >> otu >> size >> tax; gobble(in);
1632 consTax temp(otu, tax, size);
1633 taxes.push_back(temp);
1639 catch(exception& e) {
1640 errorOut(e, "MothurOut", "readConsTax");
1644 //**********************************************************************************************************************
1645 int MothurOut::readConsTax(string inputfile, map<string, consTax2>& taxes){
1648 openInputFile(inputfile, in);
1655 if (control_pressed) { break; }
1657 string otu = ""; string tax = "unknown";
1660 in >> otu >> size >> tax; gobble(in);
1661 consTax2 temp(tax, size);
1668 catch(exception& e) {
1669 errorOut(e, "MothurOut", "readConsTax");
1673 /**************************************************************************************************/
1674 vector<unsigned long long> MothurOut::setFilePosEachLine(string filename, int& num) {
1676 filename = getFullPathName(filename);
1678 vector<unsigned long long> positions;
1680 //openInputFile(filename, in);
1681 in.open(filename.c_str(), ios::binary);
1684 unsigned long long count = 0;
1685 positions.push_back(0);
1688 //getline counting reads
1689 char d = in.get(); count++;
1690 while ((d != '\n') && (d != '\r') && (d != '\f') && (d != in.eof())) {
1691 //get next character
1697 d=in.get(); count++;
1698 while(isspace(d) && (d != in.eof())) { d=in.get(); count++;}
1700 positions.push_back(count-1);
1701 //cout << count-1 << endl;
1705 num = positions.size()-1;
1708 unsigned long long size;
1710 //get num bytes in file
1711 pFile = fopen (filename.c_str(),"rb");
1712 if (pFile==NULL) perror ("Error opening file");
1714 fseek (pFile, 0, SEEK_END);
1719 positions[(positions.size()-1)] = size;
1723 catch(exception& e) {
1724 errorOut(e, "MothurOut", "setFilePosEachLine");
1728 /**************************************************************************************************/
1730 vector<unsigned long long> MothurOut::divideFile(string filename, int& proc) {
1732 vector<unsigned long long> filePos;
1733 filePos.push_back(0);
1736 unsigned long long size;
1738 filename = getFullPathName(filename);
1740 //get num bytes in file
1741 pFile = fopen (filename.c_str(),"rb");
1742 if (pFile==NULL) perror ("Error opening file");
1744 fseek (pFile, 0, SEEK_END);
1749 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1751 //estimate file breaks
1752 unsigned long long chunkSize = 0;
1753 chunkSize = size / proc;
1755 //file to small to divide by processors
1756 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1758 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1759 for (int i = 0; i < proc; i++) {
1760 unsigned long long spot = (i+1) * chunkSize;
1763 openInputFile(filename, in);
1767 unsigned long long newSpot = spot;
1771 if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
1772 else if (int(c) == -1) { break; }
1776 //there was not another sequence before the end of the file
1777 unsigned long long sanityPos = in.tellg();
1779 if (sanityPos == -1) { break; }
1780 else { filePos.push_back(newSpot); }
1786 filePos.push_back(size);
1788 //sanity check filePos
1789 for (int i = 0; i < (filePos.size()-1); i++) {
1790 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1793 proc = (filePos.size() - 1);
1795 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1797 filePos.push_back(size);
1801 catch(exception& e) {
1802 errorOut(e, "MothurOut", "divideFile");
1806 /**************************************************************************************************/
1808 vector<unsigned long long> MothurOut::divideFilePerLine(string filename, int& proc) {
1810 vector<unsigned long long> filePos;
1811 filePos.push_back(0);
1814 unsigned long long size;
1816 filename = getFullPathName(filename);
1818 //get num bytes in file
1819 pFile = fopen (filename.c_str(),"rb");
1820 if (pFile==NULL) perror ("Error opening file");
1822 fseek (pFile, 0, SEEK_END);
1827 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1829 //estimate file breaks
1830 unsigned long long chunkSize = 0;
1831 chunkSize = size / proc;
1833 //file to small to divide by processors
1834 if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
1836 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
1837 for (int i = 0; i < proc; i++) {
1838 unsigned long long spot = (i+1) * chunkSize;
1841 openInputFile(filename, in);
1844 //look for next line break
1845 unsigned long long newSpot = spot;
1849 if ((c == '\n') || (c == '\r') || (c == '\f')) { gobble(in); newSpot = in.tellg(); break; }
1850 else if (int(c) == -1) { break; }
1853 //there was not another line before the end of the file
1854 unsigned long long sanityPos = in.tellg();
1856 if (sanityPos == -1) { break; }
1857 else { filePos.push_back(newSpot); }
1863 filePos.push_back(size);
1865 //sanity check filePos
1866 for (int i = 0; i < (filePos.size()-1); i++) {
1867 if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
1870 proc = (filePos.size() - 1);
1872 mothurOut("[ERROR]: Windows version should not be calling the divideFile function."); mothurOutEndLine();
1874 filePos.push_back(size);
1878 catch(exception& e) {
1879 errorOut(e, "MothurOut", "divideFile");
1883 /**************************************************************************************************/
1884 int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
1887 vector<unsigned long long> filePos = divideFile(filename, proc);
1889 for (int i = 0; i < (filePos.size()-1); i++) {
1893 openInputFile(filename, in);
1894 in.seekg(filePos[i]);
1895 unsigned long long size = filePos[(i+1)] - filePos[i];
1896 char* chunk = new char[size];
1897 in.read(chunk, size);
1901 string fileChunkName = filename + "." + toString(i) + ".tmp";
1903 openOutputFile(fileChunkName, out);
1905 out << chunk << endl;
1910 files.push_back(fileChunkName);
1915 catch(exception& e) {
1916 errorOut(e, "MothurOut", "divideFile");
1920 /***********************************************************************/
1922 bool MothurOut::isTrue(string f){
1925 for (int i = 0; i < f.length(); i++) { f[i] = toupper(f[i]); }
1927 if ((f == "TRUE") || (f == "T")) { return true; }
1928 else { return false; }
1930 catch(exception& e) {
1931 errorOut(e, "MothurOut", "isTrue");
1936 /***********************************************************************/
1938 float MothurOut::roundDist(float dist, int precision){
1940 return int(dist * precision + 0.5)/float(precision);
1942 catch(exception& e) {
1943 errorOut(e, "MothurOut", "roundDist");
1947 /***********************************************************************/
1949 float MothurOut::ceilDist(float dist, int precision){
1951 return int(ceil(dist * precision))/float(precision);
1953 catch(exception& e) {
1954 errorOut(e, "MothurOut", "ceilDist");
1958 /***********************************************************************/
1960 vector<string> MothurOut::splitWhiteSpace(string& rest, char buffer[], int size){
1962 vector<string> pieces;
1964 for (int i = 0; i < size; i++) {
1965 if (!isspace(buffer[i])) { rest += buffer[i]; }
1967 if (rest != "") { pieces.push_back(rest); rest = ""; }
1968 while (i < size) { //gobble white space
1969 if (isspace(buffer[i])) { i++; }
1970 else { rest = buffer[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1977 catch(exception& e) {
1978 errorOut(e, "MothurOut", "splitWhiteSpace");
1982 /***********************************************************************/
1983 vector<string> MothurOut::splitWhiteSpace(string input){
1985 vector<string> pieces;
1988 for (int i = 0; i < input.length(); i++) {
1989 if (!isspace(input[i])) { rest += input[i]; }
1991 if (rest != "") { pieces.push_back(rest); rest = ""; }
1992 while (i < input.length()) { //gobble white space
1993 if (isspace(input[i])) { i++; }
1994 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
1999 if (rest != "") { pieces.push_back(rest); }
2003 catch(exception& e) {
2004 errorOut(e, "MothurOut", "splitWhiteSpace");
2008 /***********************************************************************/
2009 vector<string> MothurOut::splitWhiteSpaceWithQuotes(string input){
2011 vector<string> pieces;
2014 int pos = input.find('\'');
2015 int pos2 = input.find('\"');
2017 if ((pos == string::npos) && (pos2 == string::npos)) { return splitWhiteSpace(input); } //no quotes to worry about
2019 for (int i = 0; i < input.length(); i++) {
2020 if ((input[i] == '\'') || (input[i] == '\"') || (rest == "\'") || (rest == "\"")) { //grab everything til end or next ' or "
2022 for (int j = i+1; j < input.length(); j++) {
2023 if ((input[j] == '\'') || (input[j] == '\"')) { //then quit
2027 }else { rest += input[j]; }
2029 }else if (!isspace(input[i])) { rest += input[i]; }
2031 if (rest != "") { pieces.push_back(rest); rest = ""; }
2032 while (i < input.length()) { //gobble white space
2033 if (isspace(input[i])) { i++; }
2034 else { rest = input[i]; break; } //cout << "next piece buffer = " << nextPiece << endl;
2039 if (rest != "") { pieces.push_back(rest); }
2043 catch(exception& e) {
2044 errorOut(e, "MothurOut", "splitWhiteSpace");
2048 //**********************************************************************************************************************
2049 int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
2053 openInputFile(namefile, in);
2057 bool pairDone = false;
2058 bool columnOne = true;
2059 string firstCol, secondCol;
2063 if (control_pressed) { break; }
2065 in.read(buffer, 4096);
2066 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2068 for (int i = 0; i < pieces.size(); i++) {
2069 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2070 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2073 checkName(firstCol);
2074 //are there confidence scores, if so remove them
2075 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
2076 map<string, string>::iterator itTax = taxMap.find(firstCol);
2078 if(itTax == taxMap.end()) {
2079 bool ignore = false;
2080 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
2082 if (!ignore) { taxMap[firstCol] = secondCol; }
2083 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
2085 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique.\n"); error = true;
2094 vector<string> pieces = splitWhiteSpace(rest);
2096 for (int i = 0; i < pieces.size(); i++) {
2097 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2098 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2101 checkName(firstCol);
2102 //are there confidence scores, if so remove them
2103 if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
2104 map<string, string>::iterator itTax = taxMap.find(firstCol);
2106 if(itTax == taxMap.end()) {
2107 bool ignore = false;
2108 if (secondCol != "") { if (secondCol[secondCol.length()-1] != ';') { mothurOut("[ERROR]: " + firstCol + " is missing the final ';', ignoring.\n"); ignore=true; }
2110 if (!ignore) { taxMap[firstCol] = secondCol; }
2111 if (debug) { mothurOut("[DEBUG]: name = '" + firstCol + "' tax = '" + secondCol + "'\n"); }
2113 mothurOut("[ERROR]: " + firstCol + " is already in your taxonomy file, names must be unique./n"); error = true;
2121 if (error) { control_pressed = true; }
2122 if (debug) { mothurOut("[DEBUG]: numSeqs saved = '" + toString(taxMap.size()) + "'\n"); }
2123 return taxMap.size();
2126 catch(exception& e) {
2127 errorOut(e, "MothurOut", "readTax");
2131 /**********************************************************************************************************************/
2132 int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool redund) {
2136 openInputFile(namefile, in);
2140 bool pairDone = false;
2141 bool columnOne = true;
2142 string firstCol, secondCol;
2145 if (control_pressed) { break; }
2147 in.read(buffer, 4096);
2148 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2150 for (int i = 0; i < pieces.size(); i++) {
2151 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2152 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2155 checkName(firstCol);
2156 checkName(secondCol);
2158 //parse names into vector
2159 vector<string> theseNames;
2160 splitAtComma(secondCol, theseNames);
2161 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2169 vector<string> pieces = splitWhiteSpace(rest);
2171 for (int i = 0; i < pieces.size(); i++) {
2172 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2173 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2176 checkName(firstCol);
2177 checkName(secondCol);
2179 //parse names into vector
2180 vector<string> theseNames;
2181 splitAtComma(secondCol, theseNames);
2182 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2188 return nameMap.size();
2191 catch(exception& e) {
2192 errorOut(e, "MothurOut", "readNames");
2196 /**********************************************************************************************************************/
2197 int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip) {
2201 openInputFile(namefile, in);
2205 bool pairDone = false;
2206 bool columnOne = true;
2207 string firstCol, secondCol;
2210 if (control_pressed) { break; }
2212 in.read(buffer, 4096);
2213 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2215 for (int i = 0; i < pieces.size(); i++) {
2216 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2217 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2220 checkName(firstCol);
2221 checkName(secondCol);
2222 nameMap[secondCol] = firstCol;
2230 vector<string> pieces = splitWhiteSpace(rest);
2232 for (int i = 0; i < pieces.size(); i++) {
2233 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2234 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2237 checkName(firstCol);
2238 checkName(secondCol);
2239 nameMap[secondCol] = firstCol;
2245 return nameMap.size();
2248 catch(exception& e) {
2249 errorOut(e, "MothurOut", "readNames");
2253 /**********************************************************************************************************************/
2254 int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<string, int>& nameCount) {
2256 nameMap.clear(); nameCount.clear();
2259 openInputFile(namefile, in);
2263 bool pairDone = false;
2264 bool columnOne = true;
2265 string firstCol, secondCol;
2268 if (control_pressed) { break; }
2270 in.read(buffer, 4096);
2271 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2273 for (int i = 0; i < pieces.size(); i++) {
2274 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2275 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2278 checkName(firstCol);
2279 checkName(secondCol);
2280 //parse names into vector
2281 vector<string> theseNames;
2282 splitAtComma(secondCol, theseNames);
2283 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2284 nameCount[firstCol] = theseNames.size();
2292 vector<string> pieces = splitWhiteSpace(rest);
2294 for (int i = 0; i < pieces.size(); i++) {
2295 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2296 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2299 checkName(firstCol);
2300 checkName(secondCol);
2301 //parse names into vector
2302 vector<string> theseNames;
2303 splitAtComma(secondCol, theseNames);
2304 for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
2305 nameCount[firstCol] = theseNames.size();
2311 return nameMap.size();
2314 catch(exception& e) {
2315 errorOut(e, "MothurOut", "readNames");
2319 /**********************************************************************************************************************/
2320 int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
2324 openInputFile(namefile, in);
2328 bool pairDone = false;
2329 bool columnOne = true;
2330 string firstCol, secondCol;
2333 if (control_pressed) { break; }
2335 in.read(buffer, 4096);
2336 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2338 for (int i = 0; i < pieces.size(); i++) {
2339 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2340 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2343 checkName(firstCol);
2344 checkName(secondCol);
2345 nameMap[firstCol] = secondCol; pairDone = false; }
2351 vector<string> pieces = splitWhiteSpace(rest);
2353 for (int i = 0; i < pieces.size(); i++) {
2354 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2355 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2358 checkName(firstCol);
2359 checkName(secondCol);
2360 nameMap[firstCol] = secondCol; pairDone = false; }
2364 return nameMap.size();
2367 catch(exception& e) {
2368 errorOut(e, "MothurOut", "readNames");
2372 /**********************************************************************************************************************/
2373 int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap) {
2377 openInputFile(namefile, in);
2381 bool pairDone = false;
2382 bool columnOne = true;
2383 string firstCol, secondCol;
2386 if (control_pressed) { break; }
2388 in.read(buffer, 4096);
2389 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2391 for (int i = 0; i < pieces.size(); i++) {
2392 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2393 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2396 checkName(firstCol);
2397 checkName(secondCol);
2398 vector<string> temp;
2399 splitAtComma(secondCol, temp);
2400 nameMap[firstCol] = temp;
2408 vector<string> pieces = splitWhiteSpace(rest);
2410 for (int i = 0; i < pieces.size(); i++) {
2411 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2412 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2415 checkName(firstCol);
2416 checkName(secondCol);
2417 vector<string> temp;
2418 splitAtComma(secondCol, temp);
2419 nameMap[firstCol] = temp;
2425 return nameMap.size();
2427 catch(exception& e) {
2428 errorOut(e, "MothurOut", "readNames");
2432 /**********************************************************************************************************************/
2433 map<string, int> MothurOut::readNames(string namefile) {
2435 map<string, int> nameMap;
2439 openInputFile(namefile, in);
2443 bool pairDone = false;
2444 bool columnOne = true;
2445 string firstCol, secondCol;
2448 if (control_pressed) { break; }
2450 in.read(buffer, 4096);
2451 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2453 for (int i = 0; i < pieces.size(); i++) {
2454 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2455 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2458 checkName(firstCol);
2459 checkName(secondCol);
2460 int num = getNumNames(secondCol);
2461 nameMap[firstCol] = num;
2469 vector<string> pieces = splitWhiteSpace(rest);
2470 for (int i = 0; i < pieces.size(); i++) {
2471 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2472 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2475 checkName(firstCol);
2476 checkName(secondCol);
2477 int num = getNumNames(secondCol);
2478 nameMap[firstCol] = num;
2487 catch(exception& e) {
2488 errorOut(e, "MothurOut", "readNames");
2492 /**********************************************************************************************************************/
2493 map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
2495 map<string, int> nameMap;
2500 openInputFile(namefile, in);
2504 bool pairDone = false;
2505 bool columnOne = true;
2506 string firstCol, secondCol;
2509 if (control_pressed) { break; }
2511 in.read(buffer, 4096);
2512 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2514 for (int i = 0; i < pieces.size(); i++) {
2515 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2516 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2519 checkName(firstCol);
2520 checkName(secondCol);
2521 int num = getNumNames(secondCol);
2522 nameMap[firstCol] = num;
2531 vector<string> pieces = splitWhiteSpace(rest);
2532 for (int i = 0; i < pieces.size(); i++) {
2533 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2534 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2537 checkName(firstCol);
2538 checkName(secondCol);
2539 int num = getNumNames(secondCol);
2540 nameMap[firstCol] = num;
2550 catch(exception& e) {
2551 errorOut(e, "MothurOut", "readNames");
2555 /************************************************************/
2556 int MothurOut::checkName(string& name) {
2559 for (int i = 0; i < name.length(); i++) {
2560 if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
2565 catch(exception& e) {
2566 errorOut(e, "MothurOut", "checkName");
2570 /**********************************************************************************************************************/
2571 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
2577 openInputFile(namefile, in);
2581 bool pairDone = false;
2582 bool columnOne = true;
2583 string firstCol, secondCol;
2586 if (control_pressed) { break; }
2588 in.read(buffer, 4096);
2589 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2591 for (int i = 0; i < pieces.size(); i++) {
2592 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2593 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2596 checkName(firstCol);
2597 checkName(secondCol);
2598 int num = getNumNames(secondCol);
2600 map<string, string>::iterator it = fastamap.find(firstCol);
2601 if (it == fastamap.end()) {
2603 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2605 seqPriorityNode temp(num, it->second, firstCol);
2606 nameVector.push_back(temp);
2616 vector<string> pieces = splitWhiteSpace(rest);
2618 for (int i = 0; i < pieces.size(); i++) {
2619 if (columnOne) { firstCol = pieces[i]; columnOne=false; }
2620 else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
2623 checkName(firstCol);
2624 checkName(secondCol);
2625 int num = getNumNames(secondCol);
2627 map<string, string>::iterator it = fastamap.find(firstCol);
2628 if (it == fastamap.end()) {
2630 mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
2632 seqPriorityNode temp(num, it->second, firstCol);
2633 nameVector.push_back(temp);
2642 catch(exception& e) {
2643 errorOut(e, "MothurOut", "readNames");
2647 //**********************************************************************************************************************
2648 set<string> MothurOut::readAccnos(string accnosfile){
2652 openInputFile(accnosfile, in);
2659 if (control_pressed) { break; }
2661 in.read(buffer, 4096);
2662 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2664 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]);
2665 names.insert(pieces[i]);
2671 vector<string> pieces = splitWhiteSpace(rest);
2672 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
2676 catch(exception& e) {
2677 errorOut(e, "MothurOut", "readAccnos");
2681 //**********************************************************************************************************************
2682 int MothurOut::readAccnos(string accnosfile, vector<string>& names){
2686 openInputFile(accnosfile, in);
2693 if (control_pressed) { break; }
2695 in.read(buffer, 4096);
2696 vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
2698 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2703 vector<string> pieces = splitWhiteSpace(rest);
2704 for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
2709 catch(exception& e) {
2710 errorOut(e, "MothurOut", "readAccnos");
2714 /***********************************************************************/
2716 int MothurOut::getNumNames(string names){
2722 for(int i=0;i<names.size();i++){
2723 if(names[i] == ','){
2731 catch(exception& e) {
2732 errorOut(e, "MothurOut", "getNumNames");
2736 /***********************************************************************/
2738 int MothurOut::getNumChar(string line, char c){
2743 for(int i=0;i<line.size();i++){
2752 catch(exception& e) {
2753 errorOut(e, "MothurOut", "getNumChar");
2757 /***********************************************************************/
2758 string MothurOut::getSimpleLabel(string label){
2762 //remove OTU or phylo tag
2763 string newLabel1 = "";
2764 for (int i = 0; i < label.length(); i++) {
2765 if(label[i]>47 && label[i]<58) { //is a digit
2766 newLabel1 += label[i];
2771 mothurConvert(newLabel1, num1);
2773 simple = toString(num1);
2777 catch(exception& e) {
2778 errorOut(e, "MothurOut", "isLabelEquivalent");
2782 /***********************************************************************/
2784 bool MothurOut::isLabelEquivalent(string label1, string label2){
2788 //remove OTU or phylo tag
2789 string newLabel1 = "";
2790 for (int i = 0; i < label1.length(); i++) {
2791 if(label1[i]>47 && label1[i]<58) { //is a digit
2792 newLabel1 += label1[i];
2796 string newLabel2 = "";
2797 for (int i = 0; i < label2.length(); i++) {
2798 if(label2[i]>47 && label2[i]<58) { //is a digit
2799 newLabel2 += label2[i];
2804 mothurConvert(newLabel1, num1);
2805 mothurConvert(newLabel2, num2);
2807 if (num1 == num2) { same = true; }
2811 catch(exception& e) {
2812 errorOut(e, "MothurOut", "isLabelEquivalent");
2816 //**********************************************************************************************************************
2817 bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
2821 if (subset.size() > bigset.size()) { return false; }
2823 //check if each guy in suset is also in bigset
2824 for (int i = 0; i < subset.size(); i++) {
2826 for (int j = 0; j < bigset.size(); j++) {
2827 if (subset[i] == bigset[j]) { match = true; break; }
2830 //you have a guy in subset that had no match in bigset
2831 if (match == false) { return false; }
2837 catch(exception& e) {
2838 errorOut(e, "MothurOut", "isSubset");
2842 /***********************************************************************/
2843 int MothurOut::mothurRemove(string filename){
2845 filename = getFullPathName(filename);
2846 int error = remove(filename.c_str());
2848 // if (errno != ENOENT) { //ENOENT == file does not exist
2849 // string message = "Error deleting file " + filename;
2850 // perror(message.c_str());
2855 catch(exception& e) {
2856 errorOut(e, "MothurOut", "mothurRemove");
2860 /***********************************************************************/
2861 bool MothurOut::mothurConvert(string item, int& num){
2865 if (isNumeric1(item)) {
2870 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2871 commandInputsConvertError = true;
2876 catch(exception& e) {
2877 errorOut(e, "MothurOut", "mothurConvert");
2881 /***********************************************************************/
2882 bool MothurOut::mothurConvert(string item, intDist& num){
2886 if (isNumeric1(item)) {
2891 mothurOut("[ERROR]: cannot convert " + item + " to an integer."); mothurOutEndLine();
2892 commandInputsConvertError = true;
2897 catch(exception& e) {
2898 errorOut(e, "MothurOut", "mothurConvert");
2903 /***********************************************************************/
2904 bool MothurOut::isNumeric1(string stringToCheck){
2906 bool numeric = false;
2908 if(stringToCheck.find_first_not_of("0123456789.-") == string::npos) { numeric = true; }
2912 catch(exception& e) {
2913 errorOut(e, "MothurOut", "isNumeric1");
2918 /***********************************************************************/
2919 bool MothurOut::mothurConvert(string item, float& num){
2923 if (isNumeric1(item)) {
2928 mothurOut("[ERROR]: cannot convert " + item + " to a float."); mothurOutEndLine();
2929 commandInputsConvertError = true;
2934 catch(exception& e) {
2935 errorOut(e, "MothurOut", "mothurConvert");
2939 /***********************************************************************/
2940 bool MothurOut::mothurConvert(string item, double& num){
2944 if (isNumeric1(item)) {
2949 mothurOut("[ERROR]: cannot convert " + item + " to a double."); mothurOutEndLine();
2950 commandInputsConvertError = true;
2955 catch(exception& e) {
2956 errorOut(e, "MothurOut", "mothurConvert");
2960 /**************************************************************************************************/
2962 vector<vector<double> > MothurOut::binomial(int maxOrder){
2964 vector<vector<double> > binomial(maxOrder+1);
2966 for(int i=0;i<=maxOrder;i++){
2967 binomial[i].resize(maxOrder+1);
2976 for(int i=2;i<=maxOrder;i++){
2980 for(int i=2;i<=maxOrder;i++){
2981 for(int j=1;j<=maxOrder;j++){
2982 if(i==j){ binomial[i][j]=1; }
2983 if(j>i) { binomial[i][j]=0; }
2984 else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; }
2991 catch(exception& e) {
2992 errorOut(e, "MothurOut", "binomial");
2996 /**************************************************************************************************/
2997 unsigned int MothurOut::fromBase36(string base36){
2999 unsigned int num = 0;
3001 map<char, int> converts;
3066 while (i < base36.length()) {
3068 num = 36 * num + converts[c];
3075 catch(exception& e) {
3076 errorOut(e, "MothurOut", "fromBase36");
3080 /***********************************************************************/
3081 string MothurOut::findEdianness() {
3083 // find real endian type
3084 unsigned char EndianTest[2] = {1,0};
3085 short x = *(short *)EndianTest;
3087 string endianType = "unknown";
3088 if(x == 1) { endianType = "BIG_ENDIAN"; }
3089 else { endianType = "LITTLE_ENDIAN"; }
3093 catch(exception& e) {
3094 errorOut(e, "MothurOut", "findEdianness");
3098 /***********************************************************************/
3099 double MothurOut::median(vector<double> x) {
3103 if (x.size() == 0) { } //error
3105 //For example, if a < b < c, then the median of the list {a, b, c} is b, and, if a < b < c < d, then the median of the list {a, b, c, d} is the mean of b and c; i.e., it is (b + c)/2.
3106 sort(x.begin(), x.end());
3108 if ((x.size()%2) == 0) { //size() is even. median = average of 2 midpoints
3109 int midIndex1 = (x.size()/2)-1;
3110 int midIndex2 = (x.size()/2);
3111 value = (x[midIndex1]+ x[midIndex2]) / 2.0;
3113 int midIndex = (x.size()/2);
3114 value = x[midIndex];
3119 catch(exception& e) {
3120 errorOut(e, "MothurOut", "median");
3124 /***********************************************************************/
3125 int MothurOut::factorial(int num){
3129 for (int i = 1; i <= num; i++) {
3135 catch(exception& e) {
3136 errorOut(e, "MothurOut", "factorial");
3140 /***********************************************************************/
3142 int MothurOut::getNumSeqs(ifstream& file){
3144 int numSeqs = count(istreambuf_iterator<char>(file),istreambuf_iterator<char>(), '>');
3148 catch(exception& e) {
3149 errorOut(e, "MothurOut", "getNumSeqs");
3153 /***********************************************************************/
3154 void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
3159 input = getline(file);
3160 if (input.length() != 0) {
3161 if(input[0] == '>'){ numSeqs++; }
3165 catch(exception& e) {
3166 errorOut(e, "MothurOut", "getNumSeqs");
3170 /***********************************************************************/
3171 bool MothurOut::checkLocations(string& filename, string inputDir){
3173 filename = getFullPathName(filename);
3177 ableToOpen = openInputFile(filename, in, "noerror");
3180 //if you can't open it, try input location
3181 if (ableToOpen == 1) {
3182 if (inputDir != "") { //default path is set
3183 string tryPath = inputDir + getSimpleName(filename);
3184 mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath); mothurOutEndLine();
3186 ableToOpen = openInputFile(tryPath, in2, "noerror");
3192 //if you can't open it, try default location
3193 if (ableToOpen == 1) {
3194 if (getDefaultPath() != "") { //default path is set
3195 string tryPath = getDefaultPath() + getSimpleName(filename);
3196 mothurOut("Unable to open " + filename + ". Trying default " + tryPath); mothurOutEndLine();
3198 ableToOpen = openInputFile(tryPath, in2, "noerror");
3204 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
3205 if (ableToOpen == 1) {
3206 string exepath = argv;
3207 string tempPath = exepath;
3208 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
3209 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
3211 string tryPath = getFullPathName(exepath) + getSimpleName(filename);
3212 mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath); mothurOutEndLine();
3214 ableToOpen = openInputFile(tryPath, in2, "noerror");
3219 if (ableToOpen == 1) { mothurOut("Unable to open " + filename + "."); mothurOutEndLine(); return false; }
3223 catch(exception& e) {
3224 errorOut(e, "MothurOut", "checkLocations");
3228 /***********************************************************************/
3230 //This function parses the estimator options and puts them in a vector
3231 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
3234 if (symbol == '-') { splitAtDash(estim, container); return; }
3236 string individual = "";
3237 int estimLength = estim.size();
3238 for(int i=0;i<estimLength;i++){
3239 if(estim[i] == symbol){
3240 container.push_back(individual);
3244 individual += estim[i];
3247 container.push_back(individual);
3250 catch(exception& e) {
3251 errorOut(e, "MothurOut", "splitAtChar");
3256 /***********************************************************************/
3258 //This function parses the estimator options and puts them in a vector
3259 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
3261 string individual = "";
3262 int estimLength = estim.size();
3263 bool prevEscape = false;
3264 /*for(int i=0;i<estimLength;i++){
3266 individual += estim[i];
3270 if(estim[i] == '\\'){
3273 else if(estim[i] == '-'){
3274 container.push_back(individual);
3279 individual += estim[i];
3286 for(int i=0;i<estimLength;i++){
3287 if(estim[i] == '-'){
3288 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3290 container.push_back(individual);
3293 }else if(estim[i] == '\\'){
3294 if (i < estimLength-1) {
3295 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3296 else { individual += estim[i]; prevEscape = false; } //if no, add in
3297 }else { individual += estim[i]; }
3299 individual += estim[i];
3305 container.push_back(individual);
3307 catch(exception& e) {
3308 errorOut(e, "MothurOut", "splitAtDash");
3313 /***********************************************************************/
3314 //This function parses the label options and puts them in a set
3315 void MothurOut::splitAtDash(string& estim, set<string>& container) {
3317 string individual = "";
3318 int estimLength = estim.size();
3319 bool prevEscape = false;
3321 for(int i=0;i<estimLength;i++){
3323 individual += estim[i];
3327 if(estim[i] == '\\'){
3330 else if(estim[i] == '-'){
3331 container.insert(individual);
3336 individual += estim[i];
3343 for(int i=0;i<estimLength;i++){
3344 if(estim[i] == '-'){
3345 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3347 container.insert(individual);
3350 }else if(estim[i] == '\\'){
3351 if (i < estimLength-1) {
3352 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3353 else { individual += estim[i]; prevEscape = false; } //if no, add in
3354 }else { individual += estim[i]; }
3356 individual += estim[i];
3359 container.insert(individual);
3362 catch(exception& e) {
3363 errorOut(e, "MothurOut", "splitAtDash");
3367 /***********************************************************************/
3368 //This function parses the line options and puts them in a set
3369 void MothurOut::splitAtDash(string& estim, set<int>& container) {
3371 string individual = "";
3373 int estimLength = estim.size();
3374 bool prevEscape = false;
3376 for(int i=0;i<estimLength;i++){
3378 individual += estim[i];
3382 if(estim[i] == '\\'){
3385 else if(estim[i] == '-'){
3386 convert(individual, lineNum); //convert the string to int
3387 container.insert(lineNum);
3392 individual += estim[i];
3398 for(int i=0;i<estimLength;i++){
3399 if(estim[i] == '-'){
3400 if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
3402 convert(individual, lineNum); //convert the string to int
3403 container.insert(lineNum);
3406 }else if(estim[i] == '\\'){
3407 if (i < estimLength-1) {
3408 if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
3409 else { individual += estim[i]; prevEscape = false; } //if no, add in
3410 }else { individual += estim[i]; }
3412 individual += estim[i];
3416 convert(individual, lineNum); //convert the string to int
3417 container.insert(lineNum);
3419 catch(exception& e) {
3420 errorOut(e, "MothurOut", "splitAtDash");
3425 /***********************************************************************/
3426 string MothurOut::makeList(vector<string>& names) {
3430 if (names.size() == 0) { return list; }
3432 for (int i = 0; i < names.size()-1; i++) { list += names[i] + ","; }
3435 list += names[names.size()-1];
3439 catch(exception& e) {
3440 errorOut(e, "MothurOut", "makeList");
3445 /***********************************************************************/
3446 //This function parses the a string and puts peices in a vector
3447 void MothurOut::splitAtComma(string& estim, vector<string>& container) {
3449 string individual = "";
3450 int estimLength = estim.size();
3451 for(int i=0;i<estimLength;i++){
3452 if(estim[i] == ','){
3453 container.push_back(individual);
3457 individual += estim[i];
3460 container.push_back(individual);
3465 // string individual;
3467 // while (estim.find_first_of(',') != -1) {
3468 // individual = estim.substr(0,estim.find_first_of(','));
3469 // if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
3470 // estim = estim.substr(estim.find_first_of(',')+1, estim.length());
3471 // container.push_back(individual);
3475 // container.push_back(estim);
3477 catch(exception& e) {
3478 errorOut(e, "MothurOut", "splitAtComma");
3482 /***********************************************************************/
3483 //This function splits up the various option parameters
3484 void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
3486 prefix = suffix.substr(0,suffix.find_first_of(c));
3487 if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3488 suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
3490 while(suffix.at(0) == ' ')
3491 suffix = suffix.substr(1, suffix.length());
3492 }else { suffix = ""; }
3495 catch(exception& e) {
3496 errorOut(e, "MothurOut", "splitAtChar");
3501 /***********************************************************************/
3503 //This function splits up the various option parameters
3504 void MothurOut::splitAtComma(string& prefix, string& suffix){
3506 prefix = suffix.substr(0,suffix.find_first_of(','));
3507 if ((suffix.find_first_of(',')+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
3508 suffix = suffix.substr(suffix.find_first_of(',')+1, suffix.length());
3510 while(suffix.at(0) == ' ')
3511 suffix = suffix.substr(1, suffix.length());
3512 }else { suffix = ""; }
3515 catch(exception& e) {
3516 errorOut(e, "MothurOut", "splitAtComma");
3520 /***********************************************************************/
3522 //This function separates the key value from the option value i.e. dist=96_...
3523 void MothurOut::splitAtEquals(string& key, string& value){
3525 if(value.find_first_of('=') != -1){
3526 key = value.substr(0,value.find_first_of('='));
3527 if ((value.find_first_of('=')+1) <= value.length()) {
3528 value = value.substr(value.find_first_of('=')+1, value.length());
3535 catch(exception& e) {
3536 errorOut(e, "MothurOut", "splitAtEquals");
3541 /**************************************************************************************************/
3543 bool MothurOut::inUsersGroups(string groupname, vector<string> Groups) {
3545 for (int i = 0; i < Groups.size(); i++) {
3546 if (groupname == Groups[i]) { return true; }
3550 catch(exception& e) {
3551 errorOut(e, "MothurOut", "inUsersGroups");
3555 /**************************************************************************************************/
3557 bool MothurOut::inUsersGroups(vector<int> set, vector< vector<int> > sets) {
3559 for (int i = 0; i < sets.size(); i++) {
3560 if (set == sets[i]) { return true; }
3564 catch(exception& e) {
3565 errorOut(e, "MothurOut", "inUsersGroups");
3569 /**************************************************************************************************/
3571 bool MothurOut::inUsersGroups(int groupname, vector<int> Groups) {
3573 for (int i = 0; i < Groups.size(); i++) {
3574 if (groupname == Groups[i]) { return true; }
3578 catch(exception& e) {
3579 errorOut(e, "MothurOut", "inUsersGroups");
3584 /**************************************************************************************************/
3585 //returns true if any of the strings in first vector are in second vector
3586 bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups) {
3589 for (int i = 0; i < groupnames.size(); i++) {
3590 if (inUsersGroups(groupnames[i], Groups)) { return true; }
3594 catch(exception& e) {
3595 errorOut(e, "MothurOut", "inUsersGroups");
3599 /***********************************************************************/
3600 //this function determines if the user has given us labels that are smaller than the given label.
3601 //if so then it returns true so that the calling function can run the previous valid distance.
3602 //it's a "smart" distance function. It also checks for invalid labels.
3603 bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string errorOff) {
3606 set<string>::iterator it;
3607 vector<float> orderFloat;
3608 map<string, float> userMap; //the conversion process removes trailing 0's which we need to put back
3609 map<string, float>::iterator it2;
3611 bool smaller = false;
3613 //unique is the smallest line
3614 if (label == "unique") { return false; }
3616 if (convertTestFloat(label, labelFloat)) {
3617 convert(label, labelFloat);
3618 }else { //cant convert
3623 //go through users set and make them floats
3624 for(it = userLabels.begin(); it != userLabels.end();) {
3627 if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
3629 orderFloat.push_back(temp);
3630 userMap[*it] = temp;
3632 }else if (*it == "unique") {
3633 orderFloat.push_back(-1.0);
3634 userMap["unique"] = -1.0;
3637 if (errorOff == "") { mothurOut(*it + " is not a valid label."); mothurOutEndLine(); }
3638 userLabels.erase(it++);
3643 sort(orderFloat.begin(), orderFloat.end());
3645 /*************************************************/
3646 //is this label bigger than any of the users labels
3647 /*************************************************/
3649 //loop through order until you find a label greater than label
3650 for (int i = 0; i < orderFloat.size(); i++) {
3651 if (orderFloat[i] < labelFloat) {
3653 if (orderFloat[i] == -1) {
3654 if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
3655 userLabels.erase("unique");
3658 if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
3660 for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {
3661 if (it2->second == orderFloat[i]) {
3663 //remove small labels
3664 userLabels.erase(s);
3668 if (errorOff == "") {mothurOut( s + ". I will use the next smallest distance. "); mothurOutEndLine(); }
3670 //since they are sorted once you find a bigger one stop looking
3677 catch(exception& e) {
3678 errorOut(e, "MothurOut", "anyLabelsToProcess");
3683 /**************************************************************************************************/
3684 bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
3689 string line = getline(file);
3691 //before we added this check
3692 if (line[0] != '#') { good = false; }
3695 line = line.substr(1);
3697 vector<string> versionVector;
3698 splitAtChar(version, versionVector, '.');
3700 //check file version
3701 vector<string> linesVector;
3702 splitAtChar(line, linesVector, '.');
3704 if (versionVector.size() != linesVector.size()) { good = false; }
3706 for (int j = 0; j < versionVector.size(); j++) {
3708 convert(versionVector[j], num1);
3709 convert(linesVector[j], num2);
3711 //if mothurs version is newer than this files version, then we want to remake it
3712 if (num1 > num2) { good = false; break; }
3718 if (!good) { file.close(); }
3719 else { file.seekg(0); }
3723 catch(exception& e) {
3724 errorOut(e, "MothurOut", "checkReleaseVersion");
3728 /**************************************************************************************************/
3729 vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
3731 vector<double> averages; //averages.resize(numComp, 0.0);
3732 for (int i = 0; i < dists[0].size(); i++) { averages.push_back(0.0); }
3734 for (int thisIter = 0; thisIter < dists.size(); thisIter++) {
3735 for (int i = 0; i < dists[thisIter].size(); i++) {
3736 averages[i] += dists[thisIter][i];
3741 for (int i = 0; i < averages.size(); i++) { averages[i] /= (double) dists.size(); }
3745 catch(exception& e) {
3746 errorOut(e, "MothurOut", "getAverages");
3750 /**************************************************************************************************/
3751 double MothurOut::getAverage(vector<double> dists) {
3755 for (int i = 0; i < dists.size(); i++) {
3756 average += dists[i];
3760 average /= (double) dists.size();
3764 catch(exception& e) {
3765 errorOut(e, "MothurOut", "getAverage");
3770 /**************************************************************************************************/
3771 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
3774 vector<double> averages = getAverages(dists);
3776 //find standard deviation
3777 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3778 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3780 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3781 for (int j = 0; j < dists[thisIter].size(); j++) {
3782 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3785 for (int i = 0; i < stdDev.size(); i++) {
3786 stdDev[i] /= (double) dists.size();
3787 stdDev[i] = sqrt(stdDev[i]);
3792 catch(exception& e) {
3793 errorOut(e, "MothurOut", "getAverages");
3797 /**************************************************************************************************/
3798 vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists, vector<double>& averages) {
3800 //find standard deviation
3801 vector<double> stdDev; //stdDev.resize(numComp, 0.0);
3802 for (int i = 0; i < dists[0].size(); i++) { stdDev.push_back(0.0); }
3804 for (int thisIter = 0; thisIter < dists.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3805 for (int j = 0; j < dists[thisIter].size(); j++) {
3806 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
3809 for (int i = 0; i < stdDev.size(); i++) {
3810 stdDev[i] /= (double) dists.size();
3811 stdDev[i] = sqrt(stdDev[i]);
3816 catch(exception& e) {
3817 errorOut(e, "MothurOut", "getStandardDeviation");
3821 /**************************************************************************************************/
3822 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
3825 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3826 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3827 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3828 vector<seqDist> temp;
3829 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3831 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3832 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3833 tempDist.dist = 0.0;
3834 temp.push_back(tempDist);
3836 calcAverages.push_back(temp);
3839 if (mode == "average") {
3840 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3841 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3842 for (int j = 0; j < calcAverages[i].size(); j++) {
3843 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3848 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3849 for (int j = 0; j < calcAverages[i].size(); j++) {
3850 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3853 }else { //find median
3854 for (int i = 0; i < calcAverages.size(); i++) { //for each calc
3855 for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
3856 vector<double> dists;
3857 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
3858 dists.push_back(calcDistsTotals[thisIter][i][j].dist);
3860 sort(dists.begin(), dists.end());
3861 calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
3866 return calcAverages;
3868 catch(exception& e) {
3869 errorOut(e, "MothurOut", "getAverages");
3873 /**************************************************************************************************/
3874 vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3877 vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
3878 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3879 //calcAverages[i].resize(calcDistsTotals[0][i].size());
3880 vector<seqDist> temp;
3881 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3883 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3884 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3885 tempDist.dist = 0.0;
3886 temp.push_back(tempDist);
3888 calcAverages.push_back(temp);
3892 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
3893 for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
3894 for (int j = 0; j < calcAverages[i].size(); j++) {
3895 calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
3900 for (int i = 0; i < calcAverages.size(); i++) { //finds average.
3901 for (int j = 0; j < calcAverages[i].size(); j++) {
3902 calcAverages[i][j].dist /= (float) calcDistsTotals.size();
3906 return calcAverages;
3908 catch(exception& e) {
3909 errorOut(e, "MothurOut", "getAverages");
3913 /**************************************************************************************************/
3914 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
3917 vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
3919 //find standard deviation
3920 vector< vector<seqDist> > stdDev;
3921 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3922 vector<seqDist> temp;
3923 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3925 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3926 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3927 tempDist.dist = 0.0;
3928 temp.push_back(tempDist);
3930 stdDev.push_back(temp);
3933 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3934 for (int i = 0; i < stdDev.size(); i++) {
3935 for (int j = 0; j < stdDev[i].size(); j++) {
3936 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3941 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3942 for (int j = 0; j < stdDev[i].size(); j++) {
3943 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3944 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3950 catch(exception& e) {
3951 errorOut(e, "MothurOut", "getAverages");
3955 /**************************************************************************************************/
3956 vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
3958 //find standard deviation
3959 vector< vector<seqDist> > stdDev;
3960 for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
3961 vector<seqDist> temp;
3962 for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
3964 tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
3965 tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
3966 tempDist.dist = 0.0;
3967 temp.push_back(tempDist);
3969 stdDev.push_back(temp);
3972 for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
3973 for (int i = 0; i < stdDev.size(); i++) {
3974 for (int j = 0; j < stdDev[i].size(); j++) {
3975 stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
3980 for (int i = 0; i < stdDev.size(); i++) { //finds average.
3981 for (int j = 0; j < stdDev[i].size(); j++) {
3982 stdDev[i][j].dist /= (float) calcDistsTotals.size();
3983 stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
3989 catch(exception& e) {
3990 errorOut(e, "MothurOut", "getAverages");
3995 /**************************************************************************************************/
3996 bool MothurOut::isContainingOnlyDigits(string input) {
3999 //are you a digit in ascii code
4000 for (int i = 0;i < input.length(); i++){
4001 if( input[i]>47 && input[i]<58){}
4002 else { return false; }
4007 catch(exception& e) {
4008 errorOut(e, "MothurOut", "isContainingOnlyDigits");
4012 /**************************************************************************************************/
4013 int MothurOut::removeConfidences(string& tax) {
4019 while (tax.find_first_of(';') != -1) {
4021 if (control_pressed) { return 0; }
4024 taxon = tax.substr(0,tax.find_first_of(';'));
4026 int pos = taxon.find_last_of('(');
4029 int pos2 = taxon.find_last_of(')');
4031 string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
4032 if (isNumeric1(confidenceScore)) {
4033 taxon = taxon.substr(0, pos); //rip off confidence
4039 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
4047 catch(exception& e) {
4048 errorOut(e, "MothurOut", "removeConfidences");
4052 /**************************************************************************************************/
4053 string MothurOut::removeQuotes(string tax) {
4059 for (int i = 0; i < tax.length(); i++) {
4061 if (control_pressed) { return newTax; }
4063 if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
4069 catch(exception& e) {
4070 errorOut(e, "MothurOut", "removeQuotes");
4074 /**************************************************************************************************/
4075 // function for calculating standard deviation
4076 double MothurOut::getStandardDeviation(vector<int>& featureVector){
4080 for (int i = 0; i < featureVector.size(); i++) { average += featureVector[i]; }
4081 average /= (double) featureVector.size();
4083 //find standard deviation
4085 for (int i = 0; i < featureVector.size(); i++) { //compute the difference of each dist from the mean, and square the result of each
4086 stdDev += ((featureVector[i] - average) * (featureVector[i] - average));
4089 stdDev /= (double) featureVector.size();
4090 stdDev = sqrt(stdDev);
4094 catch(exception& e) {
4095 errorOut(e, "MothurOut", "getStandardDeviation");
4099 /**************************************************************************************************/
4100 // returns largest value in vector
4101 double MothurOut::max(vector<double>& featureVector){
4103 if (featureVector.size() == 0) { mothurOut("[ERROR]: vector size = 0!\n"); control_pressed=true; return 0.0; }
4106 double largest = featureVector[0];
4107 for (int i = 1; i < featureVector.size(); i++) {
4108 if (featureVector[i] > largest) { largest = featureVector[i]; }
4113 catch(exception& e) {
4114 errorOut(e, "MothurOut", "max");
4118 /**************************************************************************************************/
4119 // returns smallest value in vector
4120 double MothurOut::min(vector<double>& featureVector){
4122 if (featureVector.size() == 0) { mothurOut("[ERROR]: vector size = 0!\n"); control_pressed=true; return 0.0; }
4125 double smallest = featureVector[0];
4126 for (int i = 1; i < featureVector.size(); i++) {
4127 if (featureVector[i] < smallest) { smallest = featureVector[i]; }
4132 catch(exception& e) {
4133 errorOut(e, "MothurOut", "min");
4137 /**************************************************************************************************/