1 //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Reid Spencer and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the llvm::Compressor class, an abstraction for memory
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Config/config.h"
16 #include "llvm/Support/Compressor.h"
17 #include "llvm/ADT/StringExtras.h"
31 inline int getdata(char*& buffer, unsigned& size,
32 llvm::Compressor::OutputDataCallback* cb, void* context) {
35 int result = (*cb)(buffer, size, context);
36 assert(buffer != 0 && "Invalid result from Compressor callback");
37 assert(size != 0 && "Invalid result from Compressor callback");
41 //===----------------------------------------------------------------------===//
42 //=== NULLCOMP - a compression like set of routines that just copies data
43 //=== without doing any compression. This is provided so that if the
44 //=== configured environment doesn't have a compression library the
45 //=== program can still work, albeit using more data/memory.
46 //===----------------------------------------------------------------------===//
48 struct NULLCOMP_stream {
49 // User provided fields
56 uint64_t output_count; // Total count of output bytes
59 void NULLCOMP_init(NULLCOMP_stream* s) {
63 bool NULLCOMP_compress(NULLCOMP_stream* s) {
64 assert(s && "Invalid NULLCOMP_stream");
65 assert(s->next_in != 0);
66 assert(s->next_out != 0);
67 assert(s->avail_in >= 1);
68 assert(s->avail_out >= 1);
70 if (s->avail_out >= s->avail_in) {
71 ::memcpy(s->next_out, s->next_in, s->avail_in);
72 s->output_count += s->avail_in;
73 s->avail_out -= s->avail_in;
74 s->next_in += s->avail_in;
78 ::memcpy(s->next_out, s->next_in, s->avail_out);
79 s->output_count += s->avail_out;
80 s->avail_in -= s->avail_out;
81 s->next_in += s->avail_out;
87 bool NULLCOMP_decompress(NULLCOMP_stream* s) {
88 assert(s && "Invalid NULLCOMP_stream");
89 assert(s->next_in != 0);
90 assert(s->next_out != 0);
91 assert(s->avail_in >= 1);
92 assert(s->avail_out >= 1);
94 if (s->avail_out >= s->avail_in) {
95 ::memcpy(s->next_out, s->next_in, s->avail_in);
96 s->output_count += s->avail_in;
97 s->avail_out -= s->avail_in;
98 s->next_in += s->avail_in;
102 ::memcpy(s->next_out, s->next_in, s->avail_out);
103 s->output_count += s->avail_out;
104 s->avail_in -= s->avail_out;
105 s->next_in += s->avail_out;
111 void NULLCOMP_end(NULLCOMP_stream* strm) {
114 /// This structure is only used when a bytecode file is compressed.
115 /// As bytecode is being decompressed, the memory buffer might need
116 /// to be reallocated. The buffer allocation is handled in a callback
117 /// and this structure is needed to retain information across calls
119 /// @brief An internal buffer object used for handling decompression
120 struct BufferContext {
123 BufferContext(unsigned compressedSize ) {
124 // Null to indicate malloc of a new block
127 // Compute the initial length of the uncompression buffer. Note that this
128 // is twice the length of the compressed buffer and will be doubled again
129 // in the callback for an initial allocation of 4x compressedSize. This
130 // calculation is based on the typical compression ratio of bzip2 on LLVM
131 // bytecode files which typically ranges in the 50%-75% range. Since we
132 // tyipcally get at least 50%, doubling is insufficient. By using a 4x
133 // multiplier on the first allocation, we minimize the impact of having to
134 // copy the buffer on reallocation.
135 size = compressedSize*2;
138 /// This function handles allocation of the buffer used for decompression of
139 /// compressed bytecode files. It is called by Compressor::decompress which is
140 /// called by BytecodeReader::ParseBytecode.
141 static unsigned callback(char*&buff, unsigned& sz, void* ctxt){
142 // Case the context variable to our BufferContext
143 BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
145 // Compute the new, doubled, size of the block
146 unsigned new_size = bc->size * 2;
148 // Extend or allocate the block (realloc(0,n) == malloc(n))
149 char* new_buff = (char*) ::realloc(bc->buff, new_size);
151 // Figure out what to return to the Compressor. If this is the first call,
152 // then bc->buff will be null. In this case we want to return the entire
153 // buffer because there was no previous allocation. Otherwise, when the
154 // buffer is reallocated, we save the new base pointer in the BufferContext.buff
155 // field but return the address of only the extension, mid-way through the
156 // buffer (since its size was doubled). Furthermore, the sz result must be
157 // 1/2 the total size of the buffer.
158 if (bc->buff == 0 ) {
159 buff = bc->buff = new_buff;
163 buff = new_buff + bc->size;
167 // Retain the size of the allocated block
170 // Make sure we fail (return 1) if we didn't get any memory.
171 return (bc->buff == 0 ? 1 : 0);
175 // This structure retains the context when compressing the bytecode file. The
176 // WriteCompressedData function below uses it to keep track of the previously
177 // filled chunk of memory (which it writes) and how many bytes have been
179 struct WriterContext {
180 // Initialize the context
181 WriterContext(std::ostream*OS, unsigned CS)
182 : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
184 // Make sure we clean up memory
191 void write(unsigned size = 0) {
192 unsigned write_size = (size == 0 ? sz : size);
193 Out->write(chunk,write_size);
194 written += write_size;
200 // This function is a callback used by the Compressor::compress function to
201 // allocate memory for the compression buffer. This function fulfills that
202 // responsibility but also writes the previous (now filled) buffer out to the
204 static unsigned callback(char*& buffer, unsigned& size, void* context) {
205 // Cast the context to the structure it must point to.
206 WriterContext* ctxt =
207 reinterpret_cast<WriterContext*>(context);
209 // If there's a previously allocated chunk, it must now be filled with
210 // compressed data, so we write it out and deallocate it.
211 if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
215 // Compute the size of the next chunk to allocate. We attempt to allocate
216 // enough memory to handle the compression in a single memory allocation. In
217 // general, the worst we do on compression of bytecode is about 50% so we
218 // conservatively estimate compSize / 2 as the size needed for the
219 // compression buffer. compSize is the size of the compressed data, provided
220 // by WriteBytecodeToFile.
221 size = ctxt->sz = ctxt->compSize / 2;
223 // Allocate the chunks
224 buffer = ctxt->chunk = new char [size];
226 // We must return 1 if the allocation failed so that the Compressor knows
227 // not to use the buffer pointer.
228 return (ctxt->chunk == 0 ? 1 : 0);
231 char* chunk; // pointer to the chunk of memory filled by compression
232 unsigned sz; // size of chunk
233 unsigned written; // aggregate total of bytes written in all chunks
234 unsigned compSize; // size of the uncompressed buffer
235 std::ostream* Out; // The stream we write the data to.
242 // Compress in one of three ways
243 uint64_t Compressor::compress(const char* in, unsigned size,
244 OutputDataCallback* cb, Algorithm hint, void* context ) {
245 assert(in && "Can't compress null buffer");
246 assert(size && "Can't compress empty buffer");
247 assert(cb && "Can't compress without a callback function");
252 case COMP_TYPE_BZIP2: {
253 #if defined(HAVE_BZIP2)
254 // Set up the bz_stream
259 bzdata.next_in = (char*)in;
260 bzdata.avail_in = size;
262 bzdata.avail_out = 0;
263 switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
264 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
265 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
266 case BZ_MEM_ERROR: throw std::string("Out of memory");
272 // Get a block of memory
273 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
274 BZ2_bzCompressEnd(&bzdata);
275 throw std::string("Can't allocate output buffer");
278 // Put compression code in first byte
279 (*bzdata.next_out++) = COMP_TYPE_BZIP2;
283 int bzerr = BZ_FINISH_OK;
284 while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
285 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
286 BZ2_bzCompressEnd(&bzdata);
287 throw std::string("Can't allocate output buffer");
291 case BZ_SEQUENCE_ERROR:
292 case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
294 case BZ_STREAM_END: break;
295 default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
299 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
300 bzdata.total_out_lo32 + 1;
302 BZ2_bzCompressEnd(&bzdata);
309 case COMP_TYPE_ZLIB: {
310 #if defined(HAVE_ZLIB)
312 zdata.zalloc = Z_NULL;
313 zdata.zfree = Z_NULL;
314 zdata.opaque = Z_NULL;
315 zdata.next_in = (Bytef*)in;
316 zdata.avail_in = size;
317 if (Z_OK != deflateInit(&zdata,6))
318 throw std::string(zdata.msg ? zdata.msg : "zlib error");
320 if (0 != getdata((char*&)(zdata.next_out), zdata.avail_out,cb,context)) {
322 throw std::string("Can't allocate output buffer");
325 (*zdata.next_out++) = COMP_TYPE_ZLIB;
329 while ( Z_OK == deflate(&zdata,0) && zdata.avail_out == 0) {
330 if (0 != getdata((char*&)zdata.next_out, zdata.avail_out, cb,context)) {
332 throw std::string("Can't allocate output buffer");
336 while ( Z_STREAM_END != deflate(&zdata, Z_FINISH)) {
337 if (0 != getdata((char*&)zdata.next_out, zdata.avail_out, cb,context)) {
339 throw std::string("Can't allocate output buffer");
343 result = static_cast<uint64_t>(zdata.total_out) + 1;
352 case COMP_TYPE_SIMPLE: {
353 NULLCOMP_stream sdata;
354 sdata.next_in = (char*)in;
355 sdata.avail_in = size;
356 NULLCOMP_init(&sdata);
358 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
359 throw std::string("Can't allocate output buffer");
362 *(sdata.next_out++) = COMP_TYPE_SIMPLE;
365 while (!NULLCOMP_compress(&sdata)) {
366 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
367 throw std::string("Can't allocate output buffer");
371 result = sdata.output_count + 1;
372 NULLCOMP_end(&sdata);
376 throw std::string("Invalid compression type hint");
382 Compressor::compressToNewBuffer(const char* in, unsigned size, char*&out,
384 BufferContext bc(size);
385 unsigned result = compress(in,size,BufferContext::callback,hint,(void*)&bc);
391 Compressor::compressToStream(const char*in, unsigned size, std::ostream& out,
393 // Set up the context and writer
394 WriterContext ctxt(&out,size / 2);
396 // Compress everything after the magic number (which we'll alter)
397 uint64_t zipSize = Compressor::compress(in,size,
398 WriterContext::callback, hint, (void*)&ctxt);
401 ctxt.write(zipSize - ctxt.written);
406 // Decompress in one of three ways
407 uint64_t Compressor::decompress(const char *in, unsigned size,
408 OutputDataCallback* cb, void* context) {
409 assert(in && "Can't decompress null buffer");
410 assert(size > 1 && "Can't decompress empty buffer");
411 assert(cb && "Can't decompress without a callback function");
416 case COMP_TYPE_BZIP2: {
417 #if !defined(HAVE_BZIP2)
418 throw std::string("Can't decompress BZIP2 data");
420 // Set up the bz_stream
425 bzdata.next_in = (char*)in;
426 bzdata.avail_in = size - 1;
428 bzdata.avail_out = 0;
429 switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
430 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
431 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
432 case BZ_MEM_ERROR: throw std::string("Out of memory");
438 // Get a block of memory
439 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
440 BZ2_bzDecompressEnd(&bzdata);
441 throw std::string("Can't allocate output buffer");
446 while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
447 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
448 BZ2_bzDecompressEnd(&bzdata);
449 throw std::string("Can't allocate output buffer");
454 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
455 case BZ_MEM_ERROR: throw std::string("Out of memory");
456 case BZ_DATA_ERROR: throw std::string("Data integrity error");
457 case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
458 default: throw("Ooops");
464 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
465 bzdata.total_out_lo32;
466 BZ2_bzDecompressEnd(&bzdata);
471 case COMP_TYPE_ZLIB: {
472 #if !defined(HAVE_ZLIB)
473 throw std::string("Can't decompress ZLIB data");
476 zdata.zalloc = Z_NULL;
477 zdata.zfree = Z_NULL;
478 zdata.opaque = Z_NULL;
479 zdata.next_in = (Bytef*)(in);
480 zdata.avail_in = size - 1;
481 if ( Z_OK != inflateInit(&zdata))
482 throw std::string(zdata.msg ? zdata.msg : "zlib error");
484 if (0 != getdata((char*&)zdata.next_out, zdata.avail_out,cb,context)) {
486 throw std::string("Can't allocate output buffer");
490 while (Z_OK == (zerr = inflate(&zdata,0))) {
491 if (0 != getdata((char*&)zdata.next_out, zdata.avail_out,cb,context)) {
493 throw std::string("Can't allocate output buffer");
497 if (zerr != Z_STREAM_END)
498 throw std::string(zdata.msg?zdata.msg:"zlib error");
500 result = static_cast<uint64_t>(zdata.total_out);
506 case COMP_TYPE_SIMPLE: {
507 NULLCOMP_stream sdata;
508 sdata.next_in = (char*)in;
509 sdata.avail_in = size - 1;
510 NULLCOMP_init(&sdata);
512 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
513 throw std::string("Can't allocate output buffer");
516 while (!NULLCOMP_decompress(&sdata)) {
517 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
518 throw std::string("Can't allocate output buffer");
522 result = sdata.output_count;
523 NULLCOMP_end(&sdata);
528 throw std::string("Unknown type of compressed data");
535 Compressor::decompressToNewBuffer(const char* in, unsigned size, char*&out) {
536 BufferContext bc(size);
537 unsigned result = decompress(in,size,BufferContext::callback,(void*)&bc);
543 Compressor::decompressToStream(const char*in, unsigned size, std::ostream& out){
544 // Set up the context and writer
545 WriterContext ctxt(&out,size / 2);
547 // Compress everything after the magic number (which we'll alter)
548 uint64_t zipSize = Compressor::decompress(in,size,
549 WriterContext::callback, (void*)&ctxt);
552 ctxt.write(zipSize - ctxt.written);