]> git.donarmstrong.com Git - mothur.git/blobdiff - countseqscommand.h
changing command name classify.shared to classifyrf.shared
[mothur.git] / countseqscommand.h
index 555d6a74fa36fa034b6d0aafeb5f07bd0c4e7bbc..a5326d401e870c1878b5d8e7f4c2708bc0c48f04 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include "command.hpp"
+#include "groupmap.h"
 
 class CountSeqsCommand : public Command {
        
@@ -23,8 +24,9 @@ public:
        vector<string> setParameters();
        string getCommandName()                 { return "count.seqs";                          }
        string getCommandCategory()             { return "Sequence Processing";         }
-       string getOutputFileNameTag(string, string);
+       
        string getHelpString(); 
+    string getOutputPattern(string);   
        string getCitation() { return "http://www.mothur.org/wiki/Count.seqs"; }
        string getDescription()         { return "counts the number of sequences represented by each unique sequence in a namesfile"; }
 
@@ -33,17 +35,137 @@ public:
        
        
 private:
+    
+    struct linePair {
+               unsigned long long start;
+               unsigned long long end;
+               linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+       };
+    
        string namefile, groupfile, outputDir, groups;
        bool abort, large;
        vector<string> Groups, outputNames;
+    int processors;
     
     int processSmall(string);
     int processLarge(string);
     map<int, string> processNameFile(string);
     map<int, string> getGroupNames(string, set<string>&);
     
+    int createProcesses(GroupMap*&, string);
+    int driver(unsigned long long, unsigned long long, string, GroupMap*&);
+    
+};
+
+/***********************************************************************/
+struct countData {
+    unsigned long long start;
+       unsigned long long end;
+       MothurOut* m;
+    string outputFileName, namefile, groupfile;
+    GroupMap* groupMap;
+    int total;
+    vector<string> Groups;
+    
+       countData(){}
+       countData(string fn, GroupMap* g, MothurOut* mout, unsigned long long st, unsigned long long en, string gfn, string nfn, vector<string> gr) {
+        m = mout;
+               start = st;
+               end = en;
+        groupMap = g;
+        groupfile = gfn;
+        namefile = nfn;
+        outputFileName = fn;
+        Groups = gr;
+        total = 0;
+       }
 };
 
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyCountThreadFunction(LPVOID lpParam){
+       countData* pDataArray;
+       pDataArray = (countData*)lpParam;
+       try {
+        ofstream out;
+        pDataArray->m->openOutputFile(pDataArray->outputFileName, out);
+        
+        ifstream in;
+               pDataArray->m->openInputFile(pDataArray->namefile, in);
+               in.seekg(pDataArray->start);
+        
+        //print header if you are process 0
+               if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
+                       in.seekg(0);
+               }else { //this accounts for the difference in line endings.
+                       in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
+               }
+        
+        pDataArray->total = 0;
+               for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
+            
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       string firstCol, secondCol;
+                       in >> firstCol; pDataArray->m->gobble(in); in >> secondCol; pDataArray->m->gobble(in);
+            //cout << firstCol << '\t' << secondCol << endl;
+            pDataArray->m->checkName(firstCol);
+            pDataArray->m->checkName(secondCol);
+            
+                       vector<string> names;
+                       pDataArray->m->splitAtChar(secondCol, names, ',');
+                       
+                       if (pDataArray->groupfile != "") {
+                               //set to 0
+                               map<string, int> groupCounts;
+                               int total = 0;
+                               for (int i = 0; i < pDataArray->Groups.size(); i++) { groupCounts[pDataArray->Groups[i]] = 0; }
+                               
+                               //get counts for each of the users groups
+                               for (int i = 0; i < names.size(); i++) {
+                                       string group = pDataArray->groupMap->getGroup(names[i]);
+                                       
+                                       if (group == "not found") { pDataArray->m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); pDataArray->m->mothurOutEndLine(); }
+                                       else {
+                                               map<string, int>::iterator it = groupCounts.find(group);
+                                               
+                                               //if not found, then this sequence is not from a group we care about
+                                               if (it != groupCounts.end()) {
+                                                       it->second++;
+                                                       total++;
+                                               }
+                                       }
+                               }
+                               
+                               if (total != 0) {
+                                       out << firstCol << '\t' << total << '\t';
+                                       for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
+                                               out << it->second << '\t';
+                                       }
+                                       out << endl;
+                               }
+                       }else {
+                               out << firstCol << '\t' << names.size() << endl;
+                       }
+                       
+                       pDataArray->total += names.size();
+               }
+               in.close();
+        out.close();
+
+                
+        return 0;
+    }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "CountSeqsCommand", "MyCountThreadFunction");
+               exit(1);
+       }
+}
+#endif
+
+
+
 #endif