]> git.donarmstrong.com Git - samtools.git/blob - bgzip.c
Index files should be opened in binary mode, not text mode.
[samtools.git] / bgzip.c
1 /* The MIT License
2
3    Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
4
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23
24 #include <stdlib.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <fcntl.h>
28 #include <unistd.h>
29 #include <errno.h>
30 #include "bgzf.h"
31
32 static const int WINDOW_SIZE = 64 * 1024;
33
34 static int bgzip_main_usage()
35 {
36         printf("\n");
37         printf("Usage:   bgzip [options] [file] ...\n\n");
38         printf("Options: -c      write on standard output, keep original files unchanged\n");
39         printf("         -d      decompress\n");
40         // printf("         -l      list compressed file contents\n");
41         printf("         -b INT  decompress at virtual file pointer INT\n");
42         printf("         -s INT  decompress INT bytes in the uncompressed file\n");
43         printf("         -h      give this help\n");
44         printf("\n");
45         return 0;
46 }
47
48 static int write_open(const char *fn, int is_forced)
49 {
50         int fd = -1;
51         char c;
52         if (!is_forced) {
53                 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {
54                         printf("bgzip: %s already exists; do you wish to overwrite (y or n)? ", fn);
55                         scanf("%c", &c);
56                         if (c != 'Y' && c != 'y') {
57                                 printf("bgzip: not overwritten\n");
58                                 exit(1);
59                         }
60                 }
61         }
62         if (fd < 0) {
63                 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {
64                         fprintf(stderr, "bgzip: %s: Fail to write\n", fn);
65                         exit(1);
66                 }
67         }
68         return fd;
69 }
70
71 static
72 void
73 fail(BGZF* fp)
74 {
75     printf("Error: %s\n", fp->error);
76     exit(1);
77 }
78
79 int main(int argc, char **argv)
80 {
81         int c, compress, pstdout, is_forced;
82         BGZF *rz;
83         void *buffer;
84         long start, end, size;
85
86         compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
87         while((c  = getopt(argc, argv, "cdlhfb:s:")) >= 0){
88                 switch(c){
89                 case 'h': return bgzip_main_usage();
90                 case 'd': compress = 0; break;
91                 case 'c': pstdout = 1; break;
92                 // case 'l': compress = 2; break;
93                 case 'b': start = atol(optarg); break;
94                 case 's': size = atol(optarg); break;
95                 case 'f': is_forced = 1; break;
96                 }
97         }
98         if (size >= 0) end = start + size;
99         if(end >= 0 && end < start){
100                 fprintf(stderr, " -- Illegal region: [%ld, %ld] --\n", start, end);
101                 return 1;
102         }
103         if(compress == 1){
104                 int f_src, f_dst = -1;
105                 if(argc > optind){
106                         if((f_src = open(argv[optind], O_RDONLY)) < 0){
107                                 fprintf(stderr, " -- Cannot open file: %s --\n", argv[optind]);
108                                 return 1;
109                         }
110                         if(pstdout){
111                                 f_dst = fileno(stdout);
112                         } else {
113                                 char *name = malloc(sizeof(strlen(argv[optind]) + 5));
114                                 strcpy(name, argv[optind]);
115                                 strcat(name, ".gz");
116                                 f_dst = write_open(name, is_forced);
117                                 if (f_dst < 0) return 1;
118                                 free(name);
119                         }
120                 } else if(pstdout){ 
121                         f_src = fileno(stdin);
122                         f_dst = fileno(stdout);
123                 } else return bgzip_main_usage();
124                 rz = bgzf_fdopen(f_dst, "w");
125                 buffer = malloc(WINDOW_SIZE);
126                 while((c = read(f_src, buffer, WINDOW_SIZE)) > 0) {
127                   if (bgzf_write(rz, buffer, c) < 0) {
128                     fail(rz);
129                   }
130                 }
131                 // f_dst will be closed here
132                 if (bgzf_close(rz) < 0) {
133                   fail(rz);
134                 }
135                 if (argc > optind) unlink(argv[optind]);
136                 free(buffer);
137                 close(f_src);
138                 return 0;
139         } else {
140                 if(argc <= optind) return bgzip_main_usage();
141                 int f_dst;
142                 if (argc > optind && !pstdout) {
143                   char *name;
144                   if (strstr(argv[optind], ".gz") - argv[optind] != strlen(argv[optind]) - 3) {
145                     printf("bgzip: %s: unknown suffix -- ignored\n", argv[optind]);
146                     return 1;
147                   }
148                   name = strdup(argv[optind]);
149                   name[strlen(name) - 3] = '\0';
150                   f_dst = write_open(name, is_forced);
151                   free(name);
152                 } else f_dst = fileno(stdout);
153                 rz = bgzf_open(argv[optind], "r");
154                 if (rz == NULL) {
155                   printf("Could not open file: %s\n", argv[optind]);
156                   return 1;
157                 }
158                 buffer = malloc(WINDOW_SIZE);
159                 if (bgzf_seek(rz, start, SEEK_SET) < 0) {
160                   fail(rz);
161                 }
162                 while(1){
163                   if(end < 0) c = bgzf_read(rz, buffer, WINDOW_SIZE);
164                   else c = bgzf_read(rz, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
165                   if(c == 0) break;
166                   if (c < 0) fail(rz);
167                   start += c;
168                   write(f_dst, buffer, c);
169                   if(end >= 0 && start >= end) break;
170                 }
171                 free(buffer);
172                 if (bgzf_close(rz) < 0) {
173                   fail(rz);
174                 }
175                 if (!pstdout) unlink(argv[optind]);
176                 return 0;
177         }
178 }
179