1 //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Reid Spencer and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the llvm::Compressor class, an abstraction for memory
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Config/config.h"
16 #include "llvm/Support/Compressor.h"
17 #include "llvm/ADT/StringExtras.h"
21 #include "bzip2/bzlib.h"
24 enum CompressionTypes {
26 COMP_TYPE_BZIP2 = '2',
29 static int getdata(char*& buffer, size_t &size,
30 llvm::Compressor::OutputDataCallback* cb, void* context) {
33 int result = (*cb)(buffer, size, context);
34 assert(buffer != 0 && "Invalid result from Compressor callback");
35 assert(size != 0 && "Invalid result from Compressor callback");
39 static int getdata_uns(char*& buffer, unsigned &size,
40 llvm::Compressor::OutputDataCallback* cb, void* context) {
42 int Res = getdata(buffer, SizeOut, cb, context);
47 //===----------------------------------------------------------------------===//
48 //=== NULLCOMP - a compression like set of routines that just copies data
49 //=== without doing any compression. This is provided so that if the
50 //=== configured environment doesn't have a compression library the
51 //=== program can still work, albeit using more data/memory.
52 //===----------------------------------------------------------------------===//
54 struct NULLCOMP_stream {
55 // User provided fields
62 size_t output_count; // Total count of output bytes
65 static void NULLCOMP_init(NULLCOMP_stream* s) {
69 static bool NULLCOMP_compress(NULLCOMP_stream* s) {
70 assert(s && "Invalid NULLCOMP_stream");
71 assert(s->next_in != 0);
72 assert(s->next_out != 0);
73 assert(s->avail_in >= 1);
74 assert(s->avail_out >= 1);
76 if (s->avail_out >= s->avail_in) {
77 ::memcpy(s->next_out, s->next_in, s->avail_in);
78 s->output_count += s->avail_in;
79 s->avail_out -= s->avail_in;
80 s->next_in += s->avail_in;
84 ::memcpy(s->next_out, s->next_in, s->avail_out);
85 s->output_count += s->avail_out;
86 s->avail_in -= s->avail_out;
87 s->next_in += s->avail_out;
93 static bool NULLCOMP_decompress(NULLCOMP_stream* s) {
94 assert(s && "Invalid NULLCOMP_stream");
95 assert(s->next_in != 0);
96 assert(s->next_out != 0);
97 assert(s->avail_in >= 1);
98 assert(s->avail_out >= 1);
100 if (s->avail_out >= s->avail_in) {
101 ::memcpy(s->next_out, s->next_in, s->avail_in);
102 s->output_count += s->avail_in;
103 s->avail_out -= s->avail_in;
104 s->next_in += s->avail_in;
108 ::memcpy(s->next_out, s->next_in, s->avail_out);
109 s->output_count += s->avail_out;
110 s->avail_in -= s->avail_out;
111 s->next_in += s->avail_out;
117 static void NULLCOMP_end(NULLCOMP_stream* strm) {
122 /// This structure is only used when a bytecode file is compressed.
123 /// As bytecode is being decompressed, the memory buffer might need
124 /// to be reallocated. The buffer allocation is handled in a callback
125 /// and this structure is needed to retain information across calls
127 /// @brief An internal buffer object used for handling decompression
128 struct BufferContext {
131 BufferContext(size_t compressedSize) {
132 // Null to indicate malloc of a new block
135 // Compute the initial length of the uncompression buffer. Note that this
136 // is twice the length of the compressed buffer and will be doubled again
137 // in the callback for an initial allocation of 4x compressedSize. This
138 // calculation is based on the typical compression ratio of bzip2 on LLVM
139 // bytecode files which typically ranges in the 50%-75% range. Since we
140 // typically get at least 50%, doubling is insufficient. By using a 4x
141 // multiplier on the first allocation, we minimize the impact of having to
142 // copy the buffer on reallocation.
143 size = compressedSize*2;
146 /// trimTo - Reduce the size of the buffer down to the specified amount. This
147 /// is useful after have read in the bytecode file to discard extra unused
150 void trimTo(size_t NewSize) {
151 buff = (char*)::realloc(buff, NewSize);
155 /// This function handles allocation of the buffer used for decompression of
156 /// compressed bytecode files. It is called by Compressor::decompress which is
157 /// called by BytecodeReader::ParseBytecode.
158 static size_t callback(char*&buff, size_t &sz, void* ctxt){
159 // Case the context variable to our BufferContext
160 BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
162 // Compute the new, doubled, size of the block
163 size_t new_size = bc->size * 2;
165 // Extend or allocate the block (realloc(0,n) == malloc(n))
166 char* new_buff = (char*) ::realloc(bc->buff, new_size);
168 // Figure out what to return to the Compressor. If this is the first call,
169 // then bc->buff will be null. In this case we want to return the entire
170 // buffer because there was no previous allocation. Otherwise, when the
171 // buffer is reallocated, we save the new base pointer in the
172 // BufferContext.buff field but return the address of only the extension,
173 // mid-way through the buffer (since its size was doubled). Furthermore,
174 // the sz result must be 1/2 the total size of the buffer.
175 if (bc->buff == 0 ) {
176 buff = bc->buff = new_buff;
180 buff = new_buff + bc->size;
184 // Retain the size of the allocated block
187 // Make sure we fail (return 1) if we didn't get any memory.
188 return (bc->buff == 0 ? 1 : 0);
192 } // end anonymous namespace
197 // This structure retains the context when compressing the bytecode file. The
198 // WriteCompressedData function below uses it to keep track of the previously
199 // filled chunk of memory (which it writes) and how many bytes have been
201 struct WriterContext {
202 // Initialize the context
203 WriterContext(std::ostream*OS, size_t CS)
204 : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
206 // Make sure we clean up memory
213 void write(size_t size = 0) {
214 size_t write_size = (size == 0 ? sz : size);
215 Out->write(chunk,write_size);
216 written += write_size;
222 // This function is a callback used by the Compressor::compress function to
223 // allocate memory for the compression buffer. This function fulfills that
224 // responsibility but also writes the previous (now filled) buffer out to the
226 static size_t callback(char*& buffer, size_t &size, void* context) {
227 // Cast the context to the structure it must point to.
228 WriterContext* ctxt = reinterpret_cast<WriterContext*>(context);
230 // If there's a previously allocated chunk, it must now be filled with
231 // compressed data, so we write it out and deallocate it.
232 if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
236 // Compute the size of the next chunk to allocate. We attempt to allocate
237 // enough memory to handle the compression in a single memory allocation. In
238 // general, the worst we do on compression of bytecode is about 50% so we
239 // conservatively estimate compSize / 2 as the size needed for the
240 // compression buffer. compSize is the size of the compressed data, provided
241 // by WriteBytecodeToFile.
242 size = ctxt->sz = ctxt->compSize / 2;
244 // Allocate the chunks
245 buffer = ctxt->chunk = new char [size];
247 // We must return 1 if the allocation failed so that the Compressor knows
248 // not to use the buffer pointer.
249 return (ctxt->chunk == 0 ? 1 : 0);
252 char* chunk; // pointer to the chunk of memory filled by compression
253 size_t sz; // size of chunk
254 size_t written; // aggregate total of bytes written in all chunks
255 size_t compSize; // size of the uncompressed buffer
256 std::ostream* Out; // The stream we write the data to.
259 } // end anonymous namespace
261 // Compress in one of three ways
262 size_t Compressor::compress(const char* in, size_t size,
263 OutputDataCallback* cb, void* context) {
264 assert(in && "Can't compress null buffer");
265 assert(size && "Can't compress empty buffer");
266 assert(cb && "Can't compress without a callback function");
270 // For small files, we just don't bother compressing. bzip2 isn't very good
271 // with tiny files and can actually make the file larger, so we just avoid
273 if (size > 64*1024) {
274 // Set up the bz_stream
279 bzdata.next_in = (char*)in;
280 bzdata.avail_in = size;
282 bzdata.avail_out = 0;
283 switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
284 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
285 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
286 case BZ_MEM_ERROR: throw std::string("Out of memory");
292 // Get a block of memory
293 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
294 BZ2_bzCompressEnd(&bzdata);
295 throw std::string("Can't allocate output buffer");
298 // Put compression code in first byte
299 (*bzdata.next_out++) = COMP_TYPE_BZIP2;
303 int bzerr = BZ_FINISH_OK;
304 while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
305 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
306 BZ2_bzCompressEnd(&bzdata);
307 throw std::string("Can't allocate output buffer");
311 case BZ_SEQUENCE_ERROR:
312 case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
314 case BZ_STREAM_END: break;
315 default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
319 result = bzdata.total_out_lo32 + 1;
320 if (sizeof(size_t) == sizeof(uint64_t))
321 result |= static_cast<uint64_t>(bzdata.total_out_hi32) << 32;
323 BZ2_bzCompressEnd(&bzdata);
325 // Do null compression, for small files
326 NULLCOMP_stream sdata;
327 sdata.next_in = (char*)in;
328 sdata.avail_in = size;
329 NULLCOMP_init(&sdata);
331 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
332 throw std::string("Can't allocate output buffer");
335 *(sdata.next_out++) = COMP_TYPE_NONE;
338 while (!NULLCOMP_compress(&sdata)) {
339 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
340 throw std::string("Can't allocate output buffer");
344 result = sdata.output_count + 1;
345 NULLCOMP_end(&sdata);
350 size_t Compressor::compressToNewBuffer(const char* in, size_t size, char*&out) {
351 BufferContext bc(size);
352 size_t result = compress(in,size,BufferContext::callback,(void*)&bc);
359 Compressor::compressToStream(const char*in, size_t size, std::ostream& out) {
360 // Set up the context and writer
361 WriterContext ctxt(&out, size / 2);
363 // Compress everything after the magic number (which we'll alter).
364 size_t zipSize = Compressor::compress(in,size,
365 WriterContext::callback, (void*)&ctxt);
368 ctxt.write(zipSize - ctxt.written);
373 // Decompress in one of three ways
374 size_t Compressor::decompress(const char *in, size_t size,
375 OutputDataCallback* cb, void* context) {
376 assert(in && "Can't decompress null buffer");
377 assert(size > 1 && "Can't decompress empty buffer");
378 assert(cb && "Can't decompress without a callback function");
383 case COMP_TYPE_BZIP2: {
384 // Set up the bz_stream
389 bzdata.next_in = (char*)in;
390 bzdata.avail_in = size - 1;
392 bzdata.avail_out = 0;
393 switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
394 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
395 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
396 case BZ_MEM_ERROR: throw std::string("Out of memory");
402 // Get a block of memory
403 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
404 BZ2_bzDecompressEnd(&bzdata);
405 throw std::string("Can't allocate output buffer");
410 while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
411 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) {
412 BZ2_bzDecompressEnd(&bzdata);
413 throw std::string("Can't allocate output buffer");
418 case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
419 case BZ_MEM_ERROR: throw std::string("Out of memory");
420 case BZ_DATA_ERROR: throw std::string("Data integrity error");
421 case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
422 default: throw("Ooops");
428 result = bzdata.total_out_lo32;
429 if (sizeof(size_t) == sizeof(uint64_t))
430 result |= (static_cast<uint64_t>(bzdata.total_out_hi32) << 32);
431 BZ2_bzDecompressEnd(&bzdata);
435 case COMP_TYPE_NONE: {
436 NULLCOMP_stream sdata;
437 sdata.next_in = (char*)in;
438 sdata.avail_in = size - 1;
439 NULLCOMP_init(&sdata);
441 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
442 throw std::string("Can't allocate output buffer");
445 while (!NULLCOMP_decompress(&sdata)) {
446 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
447 throw std::string("Can't allocate output buffer");
451 result = sdata.output_count;
452 NULLCOMP_end(&sdata);
457 throw std::string("Unknown type of compressed data");
464 Compressor::decompressToNewBuffer(const char* in, size_t size, char*&out) {
465 BufferContext bc(size);
466 size_t result = decompress(in,size,BufferContext::callback,(void*)&bc);
472 Compressor::decompressToStream(const char*in, size_t size, std::ostream& out){
473 // Set up the context and writer
474 WriterContext ctxt(&out,size / 2);
476 // Compress everything after the magic number (which we'll alter)
477 size_t zipSize = Compressor::decompress(in,size,
478 WriterContext::callback, (void*)&ctxt);
481 ctxt.write(zipSize - ctxt.written);