2 LZ4 HC - High Compression Mode of LZ4
\r
3 Copyright (C) 2011-2013, Yann Collet.
\r
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
\r
6 Redistribution and use in source and binary forms, with or without
\r
7 modification, are permitted provided that the following conditions are
\r
10 * Redistributions of source code must retain the above copyright
\r
11 notice, this list of conditions and the following disclaimer.
\r
12 * Redistributions in binary form must reproduce the above
\r
13 copyright notice, this list of conditions and the following disclaimer
\r
14 in the documentation and/or other materials provided with the
\r
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
\r
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
\r
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
\r
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
\r
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
\r
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
\r
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
\r
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
29 You can contact the author at :
\r
30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
\r
31 - LZ4 source repository : http://code.google.com/p/lz4/
\r
35 Note : this source file requires "lz4hc_encoder.h"
\r
39 //**************************************
\r
41 //**************************************
\r
42 #include <stdlib.h> // calloc, free
\r
43 #define ALLOCATOR(s) calloc(1,s)
\r
44 #define FREEMEM free
\r
45 #include <string.h> // memset, memcpy
\r
46 #define MEM_INIT memset
\r
49 //**************************************
\r
50 // CPU Feature Detection
\r
51 //**************************************
\r
53 #if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
\r
54 || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
\r
55 || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
\r
56 || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) // Detects 64 bits mode
\r
57 # define LZ4_ARCH64 1
\r
59 # define LZ4_ARCH64 0
\r
62 // Little Endian or Big Endian ?
\r
63 // Overwrite the #define below if you know your architecture endianess
\r
64 #if defined (__GLIBC__)
\r
65 # include <endian.h>
\r
66 # if (__BYTE_ORDER == __BIG_ENDIAN)
\r
67 # define LZ4_BIG_ENDIAN 1
\r
69 #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
\r
70 # define LZ4_BIG_ENDIAN 1
\r
71 #elif defined(__sparc) || defined(__sparc__) \
\r
72 || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
\r
73 || defined(__hpux) || defined(__hppa) \
\r
74 || defined(_MIPSEB) || defined(__s390__)
\r
75 # define LZ4_BIG_ENDIAN 1
\r
77 // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
\r
80 // Unaligned memory access is automatically enabled for "common" CPU, such as x86.
\r
81 // For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
\r
82 // If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
\r
83 #if defined(__ARM_FEATURE_UNALIGNED)
\r
84 # define LZ4_FORCE_UNALIGNED_ACCESS 1
\r
87 // Define this parameter if your target system or compiler does not support hardware bit count
\r
88 #if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count
\r
89 # define LZ4_FORCE_SW_BITCOUNT
\r
93 //**************************************
\r
95 //**************************************
\r
96 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
\r
97 /* "restrict" is a known keyword */
\r
99 # define restrict // Disable restrict
\r
103 # define forceinline __forceinline
\r
104 # include <intrin.h> // For Visual 2005
\r
105 # if LZ4_ARCH64 // 64-bits
\r
106 # pragma intrinsic(_BitScanForward64) // For Visual 2005
\r
107 # pragma intrinsic(_BitScanReverse64) // For Visual 2005
\r
109 # pragma intrinsic(_BitScanForward) // For Visual 2005
\r
110 # pragma intrinsic(_BitScanReverse) // For Visual 2005
\r
112 # pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
\r
113 # pragma warning(disable : 4701) // disable: C4701: potentially uninitialized local variable used
\r
116 # define forceinline inline __attribute__((always_inline))
\r
118 # define forceinline inline
\r
122 #ifdef _MSC_VER // Visual Studio
\r
123 #define lz4_bswap16(x) _byteswap_ushort(x)
\r
125 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
\r
129 //**************************************
\r
131 //**************************************
\r
136 //**************************************
\r
138 //**************************************
\r
139 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
\r
140 # include <stdint.h>
\r
141 typedef uint8_t BYTE;
\r
142 typedef uint16_t U16;
\r
143 typedef uint32_t U32;
\r
144 typedef int32_t S32;
\r
145 typedef uint64_t U64;
\r
147 typedef unsigned char BYTE;
\r
148 typedef unsigned short U16;
\r
149 typedef unsigned int U32;
\r
150 typedef signed int S32;
\r
151 typedef unsigned long long U64;
\r
154 #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
\r
155 # define _PACKED __attribute__ ((packed))
\r
160 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
\r
164 # pragma pack(push, 1)
\r
168 typedef struct _U16_S { U16 v; } _PACKED U16_S;
\r
169 typedef struct _U32_S { U32 v; } _PACKED U32_S;
\r
170 typedef struct _U64_S { U64 v; } _PACKED U64_S;
\r
172 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
\r
176 #define A64(x) (((U64_S *)(x))->v)
\r
177 #define A32(x) (((U32_S *)(x))->v)
\r
178 #define A16(x) (((U16_S *)(x))->v)
\r
181 //**************************************
\r
183 //**************************************
\r
186 #define DICTIONARY_LOGSIZE 16
\r
187 #define MAXD (1<<DICTIONARY_LOGSIZE)
\r
188 #define MAXD_MASK ((U32)(MAXD - 1))
\r
189 #define MAX_DISTANCE (MAXD - 1)
\r
191 #define HASH_LOG (DICTIONARY_LOGSIZE-1)
\r
192 #define HASHTABLESIZE (1 << HASH_LOG)
\r
193 #define HASH_MASK (HASHTABLESIZE - 1)
\r
195 #define MAX_NB_ATTEMPTS 256
\r
198 #define ML_MASK (size_t)((1U<<ML_BITS)-1)
\r
199 #define RUN_BITS (8-ML_BITS)
\r
200 #define RUN_MASK ((1U<<RUN_BITS)-1)
\r
202 #define COPYLENGTH 8
\r
203 #define LASTLITERALS 5
\r
204 #define MFLIMIT (COPYLENGTH+MINMATCH)
\r
205 #define MINLENGTH (MFLIMIT+1)
\r
206 #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
\r
208 #define KB *(1U<<10)
\r
209 #define MB *(1U<<20)
\r
210 #define GB *(1U<<30)
\r
213 //**************************************
\r
214 // Architecture-specific macros
\r
215 //**************************************
\r
216 #if LZ4_ARCH64 // 64-bit
\r
217 # define STEPSIZE 8
\r
218 # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
\r
219 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
\r
223 # define INITBASE(b,s) const BYTE* const b = s
\r
225 # define STEPSIZE 4
\r
226 # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
\r
227 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
\r
230 //# define HTYPE const BYTE*
\r
231 //# define INITBASE(b,s) const int b = 0
\r
233 # define INITBASE(b,s) const BYTE* const b = s
\r
236 #if defined(LZ4_BIG_ENDIAN)
\r
237 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
\r
238 # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
\r
239 #else // Little Endian
\r
240 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
\r
241 # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
\r
245 //************************************************************
\r
247 //************************************************************
\r
250 const BYTE* inputBuffer;
\r
253 HTYPE hashTable[HASHTABLESIZE];
\r
254 U16 chainTable[MAXD];
\r
255 const BYTE* nextToUpdate;
\r
256 } LZ4HC_Data_Structure;
\r
259 //**************************************
\r
261 //**************************************
\r
262 #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
\r
263 #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
\r
264 #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
\r
265 #define HASH_VALUE(p) HASH_FUNCTION(A32(p))
\r
266 #define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base)
\r
267 #define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK]
\r
268 #define GETNEXT(p) ((p) - (size_t)DELTANEXT(p))
\r
271 //**************************************
\r
272 // Private functions
\r
273 //**************************************
\r
276 inline static int LZ4_NbCommonBytes (register U64 val)
\r
278 #if defined(LZ4_BIG_ENDIAN)
\r
279 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
\r
280 unsigned long r = 0;
\r
281 _BitScanReverse64( &r, val );
\r
282 return (int)(r>>3);
\r
283 # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
\r
284 return (__builtin_clzll(val) >> 3);
\r
287 if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
\r
288 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
\r
293 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
\r
294 unsigned long r = 0;
\r
295 _BitScanForward64( &r, val );
\r
296 return (int)(r>>3);
\r
297 # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
\r
298 return (__builtin_ctzll(val) >> 3);
\r
300 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
\r
301 return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
\r
308 inline static int LZ4_NbCommonBytes (register U32 val)
\r
310 #if defined(LZ4_BIG_ENDIAN)
\r
311 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
\r
313 _BitScanReverse( &r, val );
\r
314 return (int)(r>>3);
\r
315 # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
\r
316 return (__builtin_clz(val) >> 3);
\r
319 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
\r
324 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
\r
326 _BitScanForward( &r, val );
\r
327 return (int)(r>>3);
\r
328 # elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
\r
329 return (__builtin_ctz(val) >> 3);
\r
331 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
\r
332 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
\r
340 static inline int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
\r
342 MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
\r
343 MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
\r
344 hc4->nextToUpdate = base + 1;
\r
346 hc4->inputBuffer = base;
\r
352 void* LZ4_createHC (const char* slidingInputBuffer)
\r
354 void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
\r
355 LZ4_InitHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)slidingInputBuffer);
\r
360 int LZ4_freeHC (void* LZ4HC_Data)
\r
362 FREEMEM(LZ4HC_Data);
\r
367 // Update chains up to ip (excluded)
\r
368 static forceinline void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
\r
370 U16* chainTable = hc4->chainTable;
\r
371 HTYPE* HashTable = hc4->hashTable;
\r
372 INITBASE(base,hc4->base);
\r
374 while(hc4->nextToUpdate < ip)
\r
376 const BYTE* const p = hc4->nextToUpdate;
\r
377 size_t delta = (p) - HASH_POINTER(p);
\r
378 if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
\r
379 DELTANEXT(p) = (U16)delta;
\r
380 HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base);
\r
381 hc4->nextToUpdate++;
\r
386 char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
\r
388 LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
\r
389 U32 distance = (U32)(hc4->end - hc4->inputBuffer) - 64 KB;
\r
390 distance = (distance >> 16) << 16; // Must be a multiple of 64 KB
\r
391 LZ4HC_Insert(hc4, hc4->end - MINMATCH);
\r
392 memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB);
\r
393 hc4->nextToUpdate -= distance;
\r
394 hc4->base -= distance;
\r
395 if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) // Avoid overflow
\r
399 for (i=0; i<HASHTABLESIZE; i++) hc4->hashTable[i] -= 1 GB;
\r
401 hc4->end -= distance;
\r
402 return (char*)(hc4->end);
\r
406 static forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
\r
408 const BYTE* p1t = p1;
\r
410 while (p1t<matchlimit-(STEPSIZE-1))
\r
412 UARCH diff = AARCH(p2) ^ AARCH(p1t);
\r
413 if (!diff) { p1t+=STEPSIZE; p2+=STEPSIZE; continue; }
\r
414 p1t += LZ4_NbCommonBytes(diff);
\r
417 if (LZ4_ARCH64) if ((p1t<(matchlimit-3)) && (A32(p2) == A32(p1t))) { p1t+=4; p2+=4; }
\r
418 if ((p1t<(matchlimit-1)) && (A16(p2) == A16(p1t))) { p1t+=2; p2+=2; }
\r
419 if ((p1t<matchlimit) && (*p2 == *p1t)) p1t++;
\r
424 static forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
\r
426 U16* const chainTable = hc4->chainTable;
\r
427 HTYPE* const HashTable = hc4->hashTable;
\r
429 INITBASE(base,hc4->base);
\r
430 int nbAttempts=MAX_NB_ATTEMPTS;
\r
431 size_t repl=0, ml=0;
\r
432 U16 delta=0; // useless assignment, to remove an uninitialization warning
\r
434 // HC4 match finder
\r
435 LZ4HC_Insert(hc4, ip);
\r
436 ref = HASH_POINTER(ip);
\r
438 #define REPEAT_OPTIMIZATION
\r
439 #ifdef REPEAT_OPTIMIZATION
\r
440 // Detect repetitive sequences of length <= 4
\r
441 if ((U32)(ip-ref) <= 4) // potential repetition
\r
443 if (A32(ref) == A32(ip)) // confirmed
\r
445 delta = (U16)(ip-ref);
\r
446 repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
\r
449 ref = GETNEXT(ref);
\r
453 while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
\r
456 if (*(ref+ml) == *(ip+ml))
\r
457 if (A32(ref) == A32(ip))
\r
459 size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
\r
460 if (mlt > ml) { ml = mlt; *matchpos = ref; }
\r
462 ref = GETNEXT(ref);
\r
465 #ifdef REPEAT_OPTIMIZATION
\r
469 const BYTE* ptr = ip;
\r
472 end = ip + repl - (MINMATCH-1);
\r
473 while(ptr < end-delta)
\r
475 DELTANEXT(ptr) = delta; // Pre-Load
\r
480 DELTANEXT(ptr) = delta;
\r
481 HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base); // Head of chain
\r
483 } while(ptr < end);
\r
484 hc4->nextToUpdate = end;
\r
492 static forceinline int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
\r
494 U16* const chainTable = hc4->chainTable;
\r
495 HTYPE* const HashTable = hc4->hashTable;
\r
496 INITBASE(base,hc4->base);
\r
498 int nbAttempts = MAX_NB_ATTEMPTS;
\r
499 int delta = (int)(ip-startLimit);
\r
502 LZ4HC_Insert(hc4, ip);
\r
503 ref = HASH_POINTER(ip);
\r
505 while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
\r
508 if (*(startLimit + longest) == *(ref - delta + longest))
\r
509 if (A32(ref) == A32(ip))
\r
512 const BYTE* reft = ref+MINMATCH;
\r
513 const BYTE* ipt = ip+MINMATCH;
\r
514 const BYTE* startt = ip;
\r
516 while (ipt<matchlimit-(STEPSIZE-1))
\r
518 UARCH diff = AARCH(reft) ^ AARCH(ipt);
\r
519 if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
\r
520 ipt += LZ4_NbCommonBytes(diff);
\r
523 if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
\r
524 if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
\r
525 if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
\r
529 // Easier for code maintenance, but unfortunately slower too
\r
530 const BYTE* startt = ip;
\r
531 const BYTE* reft = ref;
\r
532 const BYTE* ipt = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit);
\r
535 while ((startt>startLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;}
\r
537 if ((ipt-startt) > longest)
\r
539 longest = (int)(ipt-startt);
\r
541 *startpos = startt;
\r
544 ref = GETNEXT(ref);
\r
552 //**************************************
\r
553 // Compression functions
\r
554 //**************************************
\r
557 int LZ4_compressHC(
\r
558 const char* source,
\r
562 Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
\r
563 Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
\r
564 return : the number of bytes written in buffer 'dest'
\r
566 #define FUNCTION_NAME LZ4_compressHC
\r
567 #include "lz4hc_encoder.h"
\r
571 int LZ4_compressHC_limitedOutput(
\r
572 const char* source,
\r
577 Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
\r
578 If it cannot achieve it, compression will stop, and result of the function will be zero.
\r
579 return : the number of bytes written in buffer 'dest', or 0 if the compression fails
\r
581 #define FUNCTION_NAME LZ4_compressHC_limitedOutput
\r
582 #define LIMITED_OUTPUT
\r
583 #include "lz4hc_encoder.h"
\r