lib/Support/Compressor.cpp

   1 //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Reid Spencer and is distributed under the
   6 // University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the llvm::Compressor class, an abstraction for memory
  11 // block compression.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/Config/config.h"
  16 #include "llvm/Support/Compressor.h"
  17 #include "llvm/ADT/StringExtras.h"
  18 #include <cassert>
  19 #include <string>
  20 #include "bzip2/bzlib.h"
  21
  22 namespace {
  23
  24 enum CompressionTypes {
  25   COMP_TYPE_NONE  = '0',
  26   COMP_TYPE_BZIP2 = '2',
  27 };
  28
  29 inline int getdata(char*& buffer, unsigned& size,
  30                    llvm::Compressor::OutputDataCallback* cb, void* context) {
  31   buffer = 0;
  32   size = 0;
  33   int result = (*cb)(buffer, size, context);
  34   assert(buffer != 0 && "Invalid result from Compressor callback");
  35   assert(size != 0 && "Invalid result from Compressor callback");
  36   return result;
  37 }
  38
  39 //===----------------------------------------------------------------------===//
  40 //=== NULLCOMP - a compression like set of routines that just copies data
  41 //===            without doing any compression. This is provided so that if the
  42 //===            configured environment doesn't have a compression library the
  43 //===            program can still work, albeit using more data/memory.
  44 //===----------------------------------------------------------------------===//
  45
  46 struct NULLCOMP_stream {
  47   // User provided fields
  48   char* next_in;
  49   unsigned avail_in;
  50   char* next_out;
  51   unsigned avail_out;
  52
  53   // Information fields
  54   uint64_t output_count; // Total count of output bytes
  55 };
  56
  57 void NULLCOMP_init(NULLCOMP_stream* s) {
  58   s->output_count = 0;
  59 }
  60
  61 bool NULLCOMP_compress(NULLCOMP_stream* s) {
  62   assert(s && "Invalid NULLCOMP_stream");
  63   assert(s->next_in != 0);
  64   assert(s->next_out != 0);
  65   assert(s->avail_in >= 1);
  66   assert(s->avail_out >= 1);
  67
  68   if (s->avail_out >= s->avail_in) {
  69     ::memcpy(s->next_out, s->next_in, s->avail_in);
  70     s->output_count += s->avail_in;
  71     s->avail_out -= s->avail_in;
  72     s->next_in += s->avail_in;
  73     s->avail_in = 0;
  74     return true;
  75   } else {
  76     ::memcpy(s->next_out, s->next_in, s->avail_out);
  77     s->output_count += s->avail_out;
  78     s->avail_in -= s->avail_out;
  79     s->next_in += s->avail_out;
  80     s->avail_out = 0;
  81     return false;
  82   }
  83 }
  84
  85 bool NULLCOMP_decompress(NULLCOMP_stream* s) {
  86   assert(s && "Invalid NULLCOMP_stream");
  87   assert(s->next_in != 0);
  88   assert(s->next_out != 0);
  89   assert(s->avail_in >= 1);
  90   assert(s->avail_out >= 1);
  91
  92   if (s->avail_out >= s->avail_in) {
  93     ::memcpy(s->next_out, s->next_in, s->avail_in);
  94     s->output_count += s->avail_in;
  95     s->avail_out -= s->avail_in;
  96     s->next_in += s->avail_in;
  97     s->avail_in = 0;
  98     return true;
  99   } else {
 100     ::memcpy(s->next_out, s->next_in, s->avail_out);
 101     s->output_count += s->avail_out;
 102     s->avail_in -= s->avail_out;
 103     s->next_in += s->avail_out;
 104     s->avail_out = 0;
 105     return false;
 106   }
 107 }
 108
 109 void NULLCOMP_end(NULLCOMP_stream* strm) {
 110 }
 111
 112 /// This structure is only used when a bytecode file is compressed.
 113 /// As bytecode is being decompressed, the memory buffer might need
 114 /// to be reallocated. The buffer allocation is handled in a callback
 115 /// and this structure is needed to retain information across calls
 116 /// to the callback.
 117 /// @brief An internal buffer object used for handling decompression
 118 struct BufferContext {
 119   char* buff;
 120   unsigned size;
 121   BufferContext(unsigned compressedSize ) {
 122     // Null to indicate malloc of a new block
 123     buff = 0;
 124
 125     // Compute the initial length of the uncompression buffer. Note that this
 126     // is twice the length of the compressed buffer and will be doubled again
 127     // in the callback for an initial allocation of 4x compressedSize.  This
 128     // calculation is based on the typical compression ratio of bzip2 on LLVM
 129     // bytecode files which typically ranges in the 50%-75% range.   Since we
 130     // tyipcally get at least 50%, doubling is insufficient. By using a 4x
 131     // multiplier on the first allocation, we minimize the impact of having to
 132     // copy the buffer on reallocation.
 133     size = compressedSize*2;
 134   }
 135
 136   /// This function handles allocation of the buffer used for decompression of
 137   /// compressed bytecode files. It is called by Compressor::decompress which is
 138   /// called by BytecodeReader::ParseBytecode.
 139   static unsigned callback(char*&buff, unsigned& sz, void* ctxt){
 140     // Case the context variable to our BufferContext
 141     BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
 142
 143     // Compute the new, doubled, size of the block
 144     unsigned new_size = bc->size * 2;
 145
 146     // Extend or allocate the block (realloc(0,n) == malloc(n))
 147     char* new_buff = (char*) ::realloc(bc->buff, new_size);
 148
 149     // Figure out what to return to the Compressor. If this is the first call,
 150     // then bc->buff will be null. In this case we want to return the entire
 151     // buffer because there was no previous allocation.  Otherwise, when the
 152     // buffer is reallocated, we save the new base pointer in the
 153     // BufferContext.buff field but return the address of only the extension,
 154     // mid-way through the buffer (since its size was doubled). Furthermore,
 155     // the sz result must be 1/2 the total size of the buffer.
 156     if (bc->buff == 0 ) {
 157       buff = bc->buff = new_buff;
 158       sz = new_size;
 159     } else {
 160       bc->buff = new_buff;
 161       buff = new_buff + bc->size;
 162       sz = bc->size;
 163     }
 164
 165     // Retain the size of the allocated block
 166     bc->size = new_size;
 167
 168     // Make sure we fail (return 1) if we didn't get any memory.
 169     return (bc->buff == 0 ? 1 : 0);
 170   }
 171 };
 172
 173 // This structure retains the context when compressing the bytecode file. The
 174 // WriteCompressedData function below uses it to keep track of the previously
 175 // filled chunk of memory (which it writes) and how many bytes have been
 176 // written.
 177 struct WriterContext {
 178   // Initialize the context
 179   WriterContext(std::ostream*OS, unsigned CS)
 180     : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
 181
 182   // Make sure we clean up memory
 183   ~WriterContext() {
 184     if (chunk)
 185       delete [] chunk;
 186   }
 187
 188   // Write the chunk
 189   void write(unsigned size = 0) {
 190     unsigned write_size = (size == 0 ? sz : size);
 191     Out->write(chunk,write_size);
 192     written += write_size;
 193     delete [] chunk;
 194     chunk = 0;
 195     sz = 0;
 196   }
 197
 198   // This function is a callback used by the Compressor::compress function to
 199   // allocate memory for the compression buffer. This function fulfills that
 200   // responsibility but also writes the previous (now filled) buffer out to the
 201   // stream.
 202   static unsigned callback(char*& buffer, unsigned& size, void* context) {
 203     // Cast the context to the structure it must point to.
 204     WriterContext* ctxt =
 205       reinterpret_cast<WriterContext*>(context);
 206
 207     // If there's a previously allocated chunk, it must now be filled with
 208     // compressed data, so we write it out and deallocate it.
 209     if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
 210       ctxt->write();
 211     }
 212
 213     // Compute the size of the next chunk to allocate. We attempt to allocate
 214     // enough memory to handle the compression in a single memory allocation. In
 215     // general, the worst we do on compression of bytecode is about 50% so we
 216     // conservatively estimate compSize / 2 as the size needed for the
 217     // compression buffer. compSize is the size of the compressed data, provided
 218     // by WriteBytecodeToFile.
 219     size = ctxt->sz = ctxt->compSize / 2;
 220
 221     // Allocate the chunks
 222     buffer = ctxt->chunk = new char [size];
 223
 224     // We must return 1 if the allocation failed so that the Compressor knows
 225     // not to use the buffer pointer.
 226     return (ctxt->chunk == 0 ? 1 : 0);
 227   }
 228
 229   char* chunk;       // pointer to the chunk of memory filled by compression
 230   unsigned sz;       // size of chunk
 231   unsigned written;  // aggregate total of bytes written in all chunks
 232   unsigned compSize; // size of the uncompressed buffer
 233   std::ostream* Out; // The stream we write the data to.
 234 };
 235
 236 }
 237
 238 namespace llvm {
 239
 240 // Compress in one of three ways
 241 uint64_t Compressor::compress(const char* in, unsigned size,
 242     OutputDataCallback* cb, void* context ) {
 243   assert(in && "Can't compress null buffer");
 244   assert(size && "Can't compress empty buffer");
 245   assert(cb && "Can't compress without a callback function");
 246
 247   uint64_t result = 0;
 248
 249   // For small files, we just don't bother compressing. bzip2 isn't very good
 250   // with tiny files and can actually make the file larger, so we just avoid
 251   // it altogether.
 252   if (size > 64*1024) {
 253     // Set up the bz_stream
 254     bz_stream bzdata;
 255     bzdata.bzalloc = 0;
 256     bzdata.bzfree = 0;
 257     bzdata.opaque = 0;
 258     bzdata.next_in = (char*)in;
 259     bzdata.avail_in = size;
 260     bzdata.next_out = 0;
 261     bzdata.avail_out = 0;
 262     switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) {
 263       case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
 264       case BZ_PARAM_ERROR:  throw std::string("Compressor internal error");
 265       case BZ_MEM_ERROR:    throw std::string("Out of memory");
 266       case BZ_OK:
 267       default:
 268         break;
 269     }
 270
 271     // Get a block of memory
 272     if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
 273       BZ2_bzCompressEnd(&bzdata);
 274       throw std::string("Can't allocate output buffer");
 275     }
 276
 277     // Put compression code in first byte
 278     (*bzdata.next_out++) = COMP_TYPE_BZIP2;
 279     bzdata.avail_out--;
 280
 281     // Compress it
 282     int bzerr = BZ_FINISH_OK;
 283     while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
 284       if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
 285         BZ2_bzCompressEnd(&bzdata);
 286         throw std::string("Can't allocate output buffer");
 287       }
 288     }
 289     switch (bzerr) {
 290       case BZ_SEQUENCE_ERROR:
 291       case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
 292       case BZ_FINISH_OK:
 293       case BZ_STREAM_END: break;
 294       default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
 295     }
 296
 297     // Finish
 298     result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
 299         bzdata.total_out_lo32 + 1;
 300
 301     BZ2_bzCompressEnd(&bzdata);
 302   } else {
 303     // Do null compression, for small files
 304     NULLCOMP_stream sdata;
 305     sdata.next_in = (char*)in;
 306     sdata.avail_in = size;
 307     NULLCOMP_init(&sdata);
 308
 309     if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
 310       throw std::string("Can't allocate output buffer");
 311     }
 312
 313     *(sdata.next_out++) = COMP_TYPE_NONE;
 314     sdata.avail_out--;
 315
 316     while (!NULLCOMP_compress(&sdata)) {
 317       if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
 318         throw std::string("Can't allocate output buffer");
 319       }
 320     }
 321
 322     result = sdata.output_count + 1;
 323     NULLCOMP_end(&sdata);
 324   }
 325   return result;
 326 }
 327
 328 uint64_t
 329 Compressor::compressToNewBuffer(const char* in, unsigned size, char*&out) {
 330   BufferContext bc(size);
 331   unsigned result = compress(in,size,BufferContext::callback,(void*)&bc);
 332   out = bc.buff;
 333   return result;
 334 }
 335
 336 uint64_t
 337 Compressor::compressToStream(const char*in, unsigned size, std::ostream& out) {
 338   // Set up the context and writer
 339   WriterContext ctxt(&out,size / 2);
 340
 341   // Compress everything after the magic number (which we'll alter)
 342   uint64_t zipSize = Compressor::compress(in,size,
 343     WriterContext::callback, (void*)&ctxt);
 344
 345   if (ctxt.chunk) {
 346     ctxt.write(zipSize - ctxt.written);
 347   }
 348   return zipSize;
 349 }
 350
 351 // Decompress in one of three ways
 352 uint64_t Compressor::decompress(const char *in, unsigned size,
 353                                 OutputDataCallback* cb, void* context) {
 354   assert(in && "Can't decompress null buffer");
 355   assert(size > 1 && "Can't decompress empty buffer");
 356   assert(cb && "Can't decompress without a callback function");
 357
 358   uint64_t result = 0;
 359
 360   switch (*in++) {
 361     case COMP_TYPE_BZIP2: {
 362       // Set up the bz_stream
 363       bz_stream bzdata;
 364       bzdata.bzalloc = 0;
 365       bzdata.bzfree = 0;
 366       bzdata.opaque = 0;
 367       bzdata.next_in = (char*)in;
 368       bzdata.avail_in = size - 1;
 369       bzdata.next_out = 0;
 370       bzdata.avail_out = 0;
 371       switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
 372         case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
 373         case BZ_PARAM_ERROR:  throw std::string("Compressor internal error");
 374         case BZ_MEM_ERROR:    throw std::string("Out of memory");
 375         case BZ_OK:
 376         default:
 377           break;
 378       }
 379
 380       // Get a block of memory
 381       if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
 382         BZ2_bzDecompressEnd(&bzdata);
 383         throw std::string("Can't allocate output buffer");
 384       }
 385
 386       // Decompress it
 387       int bzerr = BZ_OK;
 388       while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
 389         if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) {
 390           BZ2_bzDecompressEnd(&bzdata);
 391           throw std::string("Can't allocate output buffer");
 392         }
 393       }
 394
 395       switch (bzerr) {
 396         case BZ_PARAM_ERROR:  throw std::string("Compressor internal error");
 397         case BZ_MEM_ERROR:    throw std::string("Out of memory");
 398         case BZ_DATA_ERROR:   throw std::string("Data integrity error");
 399         case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
 400         default: throw("Ooops");
 401         case BZ_STREAM_END:
 402           break;
 403       }
 404
 405       // Finish
 406       result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
 407         bzdata.total_out_lo32;
 408       BZ2_bzDecompressEnd(&bzdata);
 409       break;
 410     }
 411
 412     case COMP_TYPE_NONE: {
 413       NULLCOMP_stream sdata;
 414       sdata.next_in = (char*)in;
 415       sdata.avail_in = size - 1;
 416       NULLCOMP_init(&sdata);
 417
 418       if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
 419         throw std::string("Can't allocate output buffer");
 420       }
 421
 422       while (!NULLCOMP_decompress(&sdata)) {
 423         if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) {
 424           throw std::string("Can't allocate output buffer");
 425         }
 426       }
 427
 428       result = sdata.output_count;
 429       NULLCOMP_end(&sdata);
 430       break;
 431     }
 432
 433     default:
 434       throw std::string("Unknown type of compressed data");
 435   }
 436
 437   return result;
 438 }
 439
 440 uint64_t
 441 Compressor::decompressToNewBuffer(const char* in, unsigned size, char*&out) {
 442   BufferContext bc(size);
 443   unsigned result = decompress(in,size,BufferContext::callback,(void*)&bc);
 444   out = bc.buff;
 445   return result;
 446 }
 447
 448 uint64_t
 449 Compressor::decompressToStream(const char*in, unsigned size, std::ostream& out){
 450   // Set up the context and writer
 451   WriterContext ctxt(&out,size / 2);
 452
 453   // Compress everything after the magic number (which we'll alter)
 454   uint64_t zipSize = Compressor::decompress(in,size,
 455     WriterContext::callback, (void*)&ctxt);
 456
 457   if (ctxt.chunk) {
 458     ctxt.write(zipSize - ctxt.written);
 459   }
 460   return zipSize;
 461 }
 462
 463 }
 464
 465 // vim: sw=2 ai