From 1fd93f3d6e283e5b69a187b9ece80e6ea554390a Mon Sep 17 00:00:00 2001 From: Daniel Jones Date: Fri, 18 Mar 2011 12:12:45 -0700 Subject: [PATCH] a little documentation --- AUTHORS | 6 +++++ COPYING | 22 +++++++++++++++++ README | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++ TODO | 51 +++++++++++++++++++++++++++++++++++++++ src/hash.c | 8 +++++++ src/hash.h | 10 ++++++++ src/parse.h | 5 +--- src/sw.h | 2 +- 8 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 AUTHORS create mode 100644 COPYING create mode 100644 README create mode 100644 TODO diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..6d957f9 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,6 @@ + +fastq-tools is written and maintained by: + +Daniel C. Jones + + diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..dee5a7b --- /dev/null +++ b/COPYING @@ -0,0 +1,22 @@ + +Copyright (C) 2011 by Daniel C. Jones + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + + diff --git a/README b/README new file mode 100644 index 0000000..ad14efe --- /dev/null +++ b/README @@ -0,0 +1,69 @@ + +fastq-tools +=========== + +This package provides a number of small and efficient programs to perform common +tasks with high throughput sequencing data in the FASTQ format. All of the +programs work with typical FASTQ files as well as gzipped FASTQ files. + + +index +----- + +The following programs are provided. See the individual man pages for more +information. + + +*fastq-grep* : match sequences against regular expressions + +*fastq-kmers* : count k-mer occurrences + +*fastq-match* : (smith-waterman) local sequence alignment + +*fastq-uniq* : count duplicate reads + + +install +------- + +On most systems, this is a simple `./configure && make install`. + +The only external dependencies are PCRE (http://www.pcre.org/) and zlib +(http://zlib.net/). + + +contribute +---------- + +If you have written any small but useful programs to deal with FASTQ files, +please consider submitting them for inclusion in fastq-tools. Check out the +Github page (https://github.com/dcjones/fastq-tools) or send mail to the author +(dcjones@cs.washington.edu). + + +copying +------- + +This package is provided under a permissive MIT-style license. In particular: + +Copyright (C) 2011 by Daniel C. Jones + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + + diff --git a/TODO b/TODO new file mode 100644 index 0000000..4afc59a --- /dev/null +++ b/TODO @@ -0,0 +1,51 @@ + +general +======= + +Man pages! + + +I would like to encorporate some domain-specific compression algorithm. Options +I know of: + * G-SQZ (http://www.tgen.org/research/gsqueez.cfm) : not open source (i.e. not an option) + * fqzcomp (http://seqanswers.com/forums/archive/index.php/t-6349.html) + * DSRC (http://bioinformatics.oxfordjournals.org/cgi/content/short/27/6/860?rss=1) : + Not yet available? + * Invent my own. + + + + + + +program specific +================ + + +fastq-grep +---------- + + + +fastq-kmers +----------- + + + +fastq-match +----------- + +Specifying the score matrix, rather than using a hardcoded. + +The implementation of the Smith-Waterman algorithm could be more efficient. We +might look to Phil Green's cross_match implementation for ideas. + +An option to return the sequences that match past a certain threshold would be +useful. + + + +fastq-uniq +---------- + + diff --git a/src/hash.c b/src/hash.c index 9b1a043..25202a4 100644 --- a/src/hash.c +++ b/src/hash.c @@ -1,4 +1,12 @@ +/* + * This file is part of fastq-tools. + * + * Copyright (c) 2011 by Daniel C. Jones + * + */ + + #include "hash.h" #include "common.h" #include diff --git a/src/hash.h b/src/hash.h index f101824..e2386a1 100644 --- a/src/hash.h +++ b/src/hash.h @@ -1,3 +1,13 @@ +/* + * This file is part of fastq-tools. + * + * Copyright (c) 2011 by Daniel C. Jones + * + * hash : + * A quick and simple all-purpose hash table. + * + */ + #ifndef FASTQ_TOOLS_HASH_H #define FASTQ_TOOLS_HASH_H diff --git a/src/parse.h b/src/parse.h index 64d3726..d2e2612 100644 --- a/src/parse.h +++ b/src/parse.h @@ -3,12 +3,9 @@ * * Copyright (c) 2011 by Daniel C. Jones * - * fastq-parse : + * parse : * A parser for FASTQ files. * - * This parser is mostly derivative of Heng Li's. - * See: http://lh3lh3.users.sourceforge.net/kseq.shtml - * */ #ifndef FASTQ_TOOLS_PARSE_H diff --git a/src/sw.h b/src/sw.h index 6e35a77..9da384b 100644 --- a/src/sw.h +++ b/src/sw.h @@ -3,7 +3,7 @@ * * Copyright (c) 2011 by Daniel C. Jones * - * fastq-sw : + * sw : * Local alignments of nucleotide sequences via Smith-Waterman. * */ -- 2.39.5