Biomedical Image Analysis Library
The Biomedical Image Analysis Library is a poweful tool for developers, physicians, researchers, engineers, and so on.
gzappend.c
Go to the documentation of this file.
1 /* gzappend -- command to append to a gzip file
2 
3  Copyright (C) 2003, 2012 Mark Adler, all rights reserved
4  version 1.2, 11 Oct 2012
5 
6  This software is provided 'as-is', without any express or implied
7  warranty. In no event will the author be held liable for any damages
8  arising from the use of this software.
9 
10  Permission is granted to anyone to use this software for any purpose,
11  including commercial applications, and to alter it and redistribute it
12  freely, subject to the following restrictions:
13 
14  1. The origin of this software must not be misrepresented; you must not
15  claim that you wrote the original software. If you use this software
16  in a product, an acknowledgment in the product documentation would be
17  appreciated but is not required.
18  2. Altered source versions must be plainly marked as such, and must not be
19  misrepresented as being the original software.
20  3. This notice may not be removed or altered from any source distribution.
21 
22  Mark Adler madler@alumni.caltech.edu
23  */
24 
25 /*
26  * Change history:
27  *
28  * 1.0 19 Oct 2003 - First version
29  * 1.1 4 Nov 2003 - Expand and clarify some comments and notes
30  * - Add version and copyright to help
31  * - Send help to stdout instead of stderr
32  * - Add some preemptive typecasts
33  * - Add L to constants in lseek() calls
34  * - Remove some debugging information in error messages
35  * - Use new data_type definition for zlib 1.2.1
36  * - Simplfy and unify file operations
37  * - Finish off gzip file in gztack()
38  * - Use deflatePrime() instead of adding empty blocks
39  * - Keep gzip file clean on appended file read errors
40  * - Use in-place rotate instead of auxiliary buffer
41  * (Why you ask? Because it was fun to write!)
42  * 1.2 11 Oct 2012 - Fix for proper z_const usage
43  * - Check for input buffer malloc failure
44  */
45 
46 /*
47  gzappend takes a gzip file and appends to it, compressing files from the
48  command line or data from stdin. The gzip file is written to directly, to
49  avoid copying that file, in case it's large. Note that this results in the
50  unfriendly behavior that if gzappend fails, the gzip file is corrupted.
51 
52  This program was written to illustrate the use of the new Z_BLOCK option of
53  zlib 1.2.x's inflate() function. This option returns from inflate() at each
54  block boundary to facilitate locating and modifying the last block bit at
55  the start of the final deflate block. Also whether using Z_BLOCK or not,
56  another required feature of zlib 1.2.x is that inflate() now provides the
57  number of unusued bits in the last input byte used. gzappend will not work
58  with versions of zlib earlier than 1.2.1.
59 
60  gzappend first decompresses the gzip file internally, discarding all but
61  the last 32K of uncompressed data, and noting the location of the last block
62  bit and the number of unused bits in the last byte of the compressed data.
63  The gzip trailer containing the CRC-32 and length of the uncompressed data
64  is verified. This trailer will be later overwritten.
65 
66  Then the last block bit is cleared by seeking back in the file and rewriting
67  the byte that contains it. Seeking forward, the last byte of the compressed
68  data is saved along with the number of unused bits to initialize deflate.
69 
70  A deflate process is initialized, using the last 32K of the uncompressed
71  data from the gzip file to initialize the dictionary. If the total
72  uncompressed data was less than 32K, then all of it is used to initialize
73  the dictionary. The deflate output bit buffer is also initialized with the
74  last bits from the original deflate stream. From here on, the data to
75  append is simply compressed using deflate, and written to the gzip file.
76  When that is complete, the new CRC-32 and uncompressed length are written
77  as the trailer of the gzip file.
78  */
79 
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <fcntl.h>
84 #include <unistd.h>
85 #include "zlib.h"
86 
87 #define local static
88 #define LGCHUNK 14
89 #define CHUNK (1U << LGCHUNK)
90 #define DSIZE 32768U
91 
92 /* print an error message and terminate with extreme prejudice */
93 local void bye(char *msg1, char *msg2)
94 {
95  fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
96  exit(1);
97 }
98 
99 /* return the greatest common divisor of a and b using Euclid's algorithm,
100  modified to be fast when one argument much greater than the other, and
101  coded to avoid unnecessary swapping */
102 local unsigned gcd(unsigned a, unsigned b)
103 {
104  unsigned c;
105 
106  while (a && b)
107  if (a > b) {
108  c = b;
109  while (a - c >= c)
110  c <<= 1;
111  a -= c;
112  }
113  else {
114  c = a;
115  while (b - c >= c)
116  c <<= 1;
117  b -= c;
118  }
119  return a + b;
120 }
121 
122 /* rotate list[0..len-1] left by rot positions, in place */
123 local void rotate(unsigned char *list, unsigned len, unsigned rot)
124 {
125  unsigned char tmp;
126  unsigned cycles;
127  unsigned char *start, *last, *to, *from;
128 
129  /* normalize rot and handle degenerate cases */
130  if (len < 2) return;
131  if (rot >= len) rot %= len;
132  if (rot == 0) return;
133 
134  /* pointer to last entry in list */
135  last = list + (len - 1);
136 
137  /* do simple left shift by one */
138  if (rot == 1) {
139  tmp = *list;
140  memcpy(list, list + 1, len - 1);
141  *last = tmp;
142  return;
143  }
144 
145  /* do simple right shift by one */
146  if (rot == len - 1) {
147  tmp = *last;
148  memmove(list + 1, list, len - 1);
149  *list = tmp;
150  return;
151  }
152 
153  /* otherwise do rotate as a set of cycles in place */
154  cycles = gcd(len, rot); /* number of cycles */
155  do {
156  start = from = list + cycles; /* start index is arbitrary */
157  tmp = *from; /* save entry to be overwritten */
158  for (;;) {
159  to = from; /* next step in cycle */
160  from += rot; /* go right rot positions */
161  if (from > last) from -= len; /* (pointer better not wrap) */
162  if (from == start) break; /* all but one shifted */
163  *to = *from; /* shift left */
164  }
165  *to = tmp; /* complete the circle */
166  } while (--cycles);
167 }
168 
169 /* structure for gzip file read operations */
170 typedef struct {
171  int fd; /* file descriptor */
172  int size; /* 1 << size is bytes in buf */
173  unsigned left; /* bytes available at next */
174  unsigned char *buf; /* buffer */
175  z_const unsigned char *next; /* next byte in buffer */
176  char *name; /* file name for error messages */
177 } file;
178 
179 /* reload buffer */
181 {
182  int len;
183 
184  len = read(in->fd, in->buf, 1 << in->size);
185  if (len == -1) bye("error reading ", in->name);
186  in->left = (unsigned)len;
187  in->next = in->buf;
188  return len;
189 }
190 
191 /* read from file in, exit if end-of-file */
193 {
194  if (readin(in) == 0) bye("unexpected end of ", in->name);
195  return 0;
196 }
197 
198 #define read1(in) (in->left == 0 ? readmore(in) : 0, \
199  in->left--, *(in->next)++)
200 
201 /* skip over n bytes of in */
202 local void skip(file *in, unsigned n)
203 {
204  unsigned bypass;
205 
206  if (n > in->left) {
207  n -= in->left;
208  bypass = n & ~((1U << in->size) - 1);
209  if (bypass) {
210  if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
211  bye("seeking ", in->name);
212  n -= bypass;
213  }
214  readmore(in);
215  if (n > in->left)
216  bye("unexpected end of ", in->name);
217  }
218  in->left -= n;
219  in->next += n;
220 }
221 
222 /* read a four-byte unsigned integer, little-endian, from in */
223 unsigned long read4(file *in)
224 {
225  unsigned long val;
226 
227  val = read1(in);
228  val += (unsigned)read1(in) << 8;
229  val += (unsigned long)read1(in) << 16;
230  val += (unsigned long)read1(in) << 24;
231  return val;
232 }
233 
234 /* skip over gzip header */
236 {
237  int flags;
238  unsigned n;
239 
240  if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
241  if (read1(in) != 8) bye("unknown compression method in", in->name);
242  flags = read1(in);
243  if (flags & 0xe0) bye("unknown header flags set in", in->name);
244  skip(in, 6);
245  if (flags & 4) {
246  n = read1(in);
247  n += (unsigned)(read1(in)) << 8;
248  skip(in, n);
249  }
250  if (flags & 8) while (read1(in) != 0) ;
251  if (flags & 16) while (read1(in) != 0) ;
252  if (flags & 2) skip(in, 2);
253 }
254 
255 /* decompress gzip file "name", return strm with a deflate stream ready to
256  continue compression of the data in the gzip file, and return a file
257  descriptor pointing to where to write the compressed data -- the deflate
258  stream is initialized to compress using level "level" */
259 local int gzscan(char *name, z_stream *strm, int level)
260 {
261  int ret, lastbit, left, full;
262  unsigned have;
263  unsigned long crc, tot;
264  unsigned char *window;
265  off_t lastoff, end;
266  file gz;
267 
268  /* open gzip file */
269  gz.name = name;
270  gz.fd = open(name, O_RDWR, 0);
271  if (gz.fd == -1) bye("cannot open ", name);
272  gz.buf = malloc(CHUNK);
273  if (gz.buf == NULL) bye("out of memory", "");
274  gz.size = LGCHUNK;
275  gz.left = 0;
276 
277  /* skip gzip header */
278  gzheader(&gz);
279 
280  /* prepare to decompress */
281  window = malloc(DSIZE);
282  if (window == NULL) bye("out of memory", "");
283  strm->zalloc = Z_NULL;
284  strm->zfree = Z_NULL;
285  strm->opaque = Z_NULL;
286  ret = inflateInit2(strm, -15);
287  if (ret != Z_OK) bye("out of memory", " or library mismatch");
288 
289  /* decompress the deflate stream, saving append information */
290  lastbit = 0;
291  lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
292  left = 0;
293  strm->avail_in = gz.left;
294  strm->next_in = gz.next;
295  crc = crc32(0L, Z_NULL, 0);
296  have = full = 0;
297  do {
298  /* if needed, get more input */
299  if (strm->avail_in == 0) {
300  readmore(&gz);
301  strm->avail_in = gz.left;
302  strm->next_in = gz.next;
303  }
304 
305  /* set up output to next available section of sliding window */
306  strm->avail_out = DSIZE - have;
307  strm->next_out = window + have;
308 
309  /* inflate and check for errors */
310  ret = inflate(strm, Z_BLOCK);
311  if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
312  if (ret == Z_MEM_ERROR) bye("out of memory", "");
313  if (ret == Z_DATA_ERROR)
314  bye("invalid compressed data--format violated in", name);
315 
316  /* update crc and sliding window pointer */
317  crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
318  if (strm->avail_out)
319  have = DSIZE - strm->avail_out;
320  else {
321  have = 0;
322  full = 1;
323  }
324 
325  /* process end of block */
326  if (strm->data_type & 128) {
327  if (strm->data_type & 64)
328  left = strm->data_type & 0x1f;
329  else {
330  lastbit = strm->data_type & 0x1f;
331  lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
332  }
333  }
334  } while (ret != Z_STREAM_END);
335  inflateEnd(strm);
336  gz.left = strm->avail_in;
337  gz.next = strm->next_in;
338 
339  /* save the location of the end of the compressed data */
340  end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
341 
342  /* check gzip trailer and save total for deflate */
343  if (crc != read4(&gz))
344  bye("invalid compressed data--crc mismatch in ", name);
345  tot = strm->total_out;
346  if ((tot & 0xffffffffUL) != read4(&gz))
347  bye("invalid compressed data--length mismatch in", name);
348 
349  /* if not at end of file, warn */
350  if (gz.left || readin(&gz))
351  fprintf(stderr,
352  "gzappend warning: junk at end of gzip file overwritten\n");
353 
354  /* clear last block bit */
355  lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
356  if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
357  *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
358  lseek(gz.fd, -1L, SEEK_CUR);
359  if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
360 
361  /* if window wrapped, build dictionary from window by rotating */
362  if (full) {
363  rotate(window, DSIZE, have);
364  have = DSIZE;
365  }
366 
367  /* set up deflate stream with window, crc, total_in, and leftover bits */
368  ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
369  if (ret != Z_OK) bye("out of memory", "");
370  deflateSetDictionary(strm, window, have);
371  strm->adler = crc;
372  strm->total_in = tot;
373  if (left) {
374  lseek(gz.fd, --end, SEEK_SET);
375  if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
376  deflatePrime(strm, 8 - left, *gz.buf);
377  }
378  lseek(gz.fd, end, SEEK_SET);
379 
380  /* clean up and return */
381  free(window);
382  free(gz.buf);
383  return gz.fd;
384 }
385 
386 /* append file "name" to gzip file gd using deflate stream strm -- if last
387  is true, then finish off the deflate stream at the end */
388 local void gztack(char *name, int gd, z_stream *strm, int last)
389 {
390  int fd, len, ret;
391  unsigned left;
392  unsigned char *in, *out;
393 
394  /* open file to compress and append */
395  fd = 0;
396  if (name != NULL) {
397  fd = open(name, O_RDONLY, 0);
398  if (fd == -1)
399  fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
400  name);
401  }
402 
403  /* allocate buffers */
404  in = malloc(CHUNK);
405  out = malloc(CHUNK);
406  if (in == NULL || out == NULL) bye("out of memory", "");
407 
408  /* compress input file and append to gzip file */
409  do {
410  /* get more input */
411  len = read(fd, in, CHUNK);
412  if (len == -1) {
413  fprintf(stderr,
414  "gzappend warning: error reading %s, skipping rest ...\n",
415  name);
416  len = 0;
417  }
418  strm->avail_in = (unsigned)len;
419  strm->next_in = in;
420  if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
421 
422  /* compress and write all available output */
423  do {
424  strm->avail_out = CHUNK;
425  strm->next_out = out;
426  ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
427  left = CHUNK - strm->avail_out;
428  while (left) {
429  len = write(gd, out + CHUNK - strm->avail_out - left, left);
430  if (len == -1) bye("writing gzip file", "");
431  left -= (unsigned)len;
432  }
433  } while (strm->avail_out == 0 && ret != Z_STREAM_END);
434  } while (len != 0);
435 
436  /* write trailer after last entry */
437  if (last) {
438  deflateEnd(strm);
439  out[0] = (unsigned char)(strm->adler);
440  out[1] = (unsigned char)(strm->adler >> 8);
441  out[2] = (unsigned char)(strm->adler >> 16);
442  out[3] = (unsigned char)(strm->adler >> 24);
443  out[4] = (unsigned char)(strm->total_in);
444  out[5] = (unsigned char)(strm->total_in >> 8);
445  out[6] = (unsigned char)(strm->total_in >> 16);
446  out[7] = (unsigned char)(strm->total_in >> 24);
447  len = 8;
448  do {
449  ret = write(gd, out + 8 - len, len);
450  if (ret == -1) bye("writing gzip file", "");
451  len -= ret;
452  } while (len);
453  close(gd);
454  }
455 
456  /* clean up and return */
457  free(out);
458  free(in);
459  if (fd > 0) close(fd);
460 }
461 
462 /* process the compression level option if present, scan the gzip file, and
463  append the specified files, or append the data from stdin if no other file
464  names are provided on the command line -- the gzip file must be writable
465  and seekable */
466 int main(int argc, char **argv)
467 {
468  int gd, level;
469  z_stream strm;
470 
471  /* ignore command name */
472  argc--; argv++;
473 
474  /* provide usage if no arguments */
475  if (*argv == NULL) {
476  printf(
477  "gzappend 1.2 (11 Oct 2012) Copyright (C) 2003, 2012 Mark Adler\n"
478  );
479  printf(
480  "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
481  return 0;
482  }
483 
484  /* set compression level */
485  level = Z_DEFAULT_COMPRESSION;
486  if (argv[0][0] == '-') {
487  if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
488  bye("invalid compression level", "");
489  level = argv[0][1] - '0';
490  if (*++argv == NULL) bye("no gzip file name after options", "");
491  }
492 
493  /* prepare to append to gzip file */
494  gd = gzscan(*argv++, &strm, level);
495 
496  /* append files on command line, or from stdin if none */
497  if (*argv == NULL)
498  gztack(NULL, gd, &strm, 1);
499  else
500  do {
501  gztack(*argv, gd, &strm, argv[1] == NULL);
502  } while (*++argv != NULL);
503  return 0;
504 }
#define Z_BLOCK
Definition: zlib.h:169
voidpf opaque
Definition: zlib.h:99
int deflateSetDictionary(z_streamp strm, Bytef *dictionary, uInt dictLength)
Definition: deflate.c:323
uLong adler
Definition: zlib.h:102
AdjacencyIterator end(const Adjacency &adj, const Vector< D > &vct, size_t pixel_index)
Returns an iterator to position after the end of elements.
#define deflateInit2(strm, level, method, windowBits, memLevel, strategy)
Definition: zlib.h:1651
static void gzheader(file *in)
Definition: gzappend.c:235
#define Z_NO_FLUSH
Definition: zlib.h:164
int deflateEnd(z_streamp strm)
Definition: deflate.c:979
unsigned long read4(file *in)
Definition: gzappend.c:223
free_func zfree
Definition: zlib.h:98
unsigned char * buf
Definition: gzappend.c:174
static unsigned gcd(unsigned a, unsigned b)
Definition: gzappend.c:102
static void rotate(unsigned char *list, unsigned len, unsigned rot)
Definition: gzappend.c:123
static void bye(char *msg1, char *msg2)
Definition: gzappend.c:93
#define DSIZE
Definition: gzappend.c:90
int data_type
Definition: zlib.h:101
int size
Definition: gzappend.c:172
static void skip(file *in, unsigned n)
Definition: gzappend.c:202
uLong total_in
Definition: zlib.h:88
void free()
#define Z_STREAM_ERROR
Definition: zlib.h:177
alloc_func zalloc
Definition: zlib.h:97
unsigned long crc32(unsigned long crc, unsigned char *buf, uInt len)
Definition: crc32.c:204
#define z_const
Definition: zconf.h:224
Bytef * next_in
Definition: zlib.h:86
#define read1(in)
Definition: gzappend.c:198
#define inflateInit2(strm, windowBits)
Definition: zlib.h:1654
#define Z_FINISH
Definition: zlib.h:168
int write(ozstream &zs, const T *x, Items items)
Definition: zstream.h:264
#define LGCHUNK
Definition: gzappend.c:88
int deflatePrime(z_streamp strm, int bits, int value)
Definition: deflate.c:464
#define Z_DEFLATED
Definition: zlib.h:205
#define Z_DATA_ERROR
Definition: zlib.h:178
static int out(void *out_desc, unsigned char *buf, unsigned len)
Definition: gun.c:131
static int gzscan(char *name, z_stream *strm, int level)
Definition: gzappend.c:259
#define SEEK_CUR
Definition: zip.c:80
Definition: zlib.h:85
#define Z_STREAM_END
Definition: zlib.h:174
int read(izstream &zs, T *x, Items items)
Definition: zstream.h:115
#define Z_MEM_ERROR
Definition: zlib.h:179
z_const unsigned char * next
Definition: gzappend.c:175
uInt avail_out
Definition: zlib.h:91
#define SEEK_SET
Definition: zip.c:88
#define local
Definition: gzappend.c:87
unsigned left
Definition: gzappend.c:173
static unsigned in(void *in_desc, z_const unsigned char **buf)
Definition: gun.c:89
char * name
Definition: gzappend.c:176
#define Z_OK
Definition: zlib.h:173
int inflateEnd(z_streamp strm)
Definition: inflate.c:1254
int inflate(z_streamp strm, int flush)
Definition: inflate.c:605
Bytef * next_out
Definition: zlib.h:90
#define CHUNK
Definition: gzappend.c:89
uLong total_out
Definition: zlib.h:92
#define Z_DEFAULT_STRATEGY
Definition: zlib.h:196
#define Z_NULL
Definition: zlib.h:208
static int readin(file *in)
Definition: gzappend.c:180
uInt avail_in
Definition: zlib.h:87
static int readmore(file *in)
Definition: gzappend.c:192
Definition: gzappend.c:170
int main(int argc, char **argv)
Definition: gzappend.c:466
int fd
Definition: gzappend.c:171
int deflate(z_streamp strm, int flush)
Definition: deflate.c:665
voidp malloc()
#define Z_DEFAULT_COMPRESSION
Definition: zlib.h:189
static void gztack(char *name, int gd, z_stream *strm, int last)
Definition: gzappend.c:388