PM / Hibernate: Improve performance of LZO/plain hibernation, checksum image
authorBojan Smojver <bojan@rexursive.com>
Thu, 13 Oct 2011 21:58:07 +0000 (23:58 +0200)
committerRafael J. Wysocki <rjw@sisk.pl>
Sun, 16 Oct 2011 21:30:38 +0000 (23:30 +0200)
Use threads for LZO compression/decompression on hibernate/thaw.
Improve buffering on hibernate/thaw.
Calculate/verify CRC32 of the image pages on hibernate/thaw.

In my testing, this improved write/read speed by a factor of about two.

Signed-off-by: Bojan Smojver <bojan@rexursive.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
kernel/power/Kconfig
kernel/power/hibernate.c
kernel/power/power.h
kernel/power/swap.c

index e01e6899592c6b43b3d983fc300d8f62cfc2cb4f..cedd9982306a227e5ef6f6d3e88e3f0352ca207f 100644 (file)
@@ -27,6 +27,7 @@ config HIBERNATION
        select HIBERNATE_CALLBACKS
        select LZO_COMPRESS
        select LZO_DECOMPRESS
+       select CRC32
        ---help---
          Enable the suspend to disk (STD) functionality, which is usually
          called "hibernation" in user interfaces.  STD checkpoints the
index ea12c8f1bdfdb7e8cd01186ce84cf8f48cf4c6a8..1c53f7fad5f7ba9e4fd20cd85e5021755c71ada8 100644 (file)
@@ -657,6 +657,9 @@ int hibernate(void)
                        flags |= SF_PLATFORM_MODE;
                if (nocompress)
                        flags |= SF_NOCOMPRESS_MODE;
+               else
+                       flags |= SF_CRC32_MODE;
+
                pr_debug("PM: writing image.\n");
                error = swsusp_write(flags);
                swsusp_free();
index e6206397ce67c89022ab6e2e750896623a23fdce..23a2db1ec442352a4e0d7cbebd3c1763f38a2711 100644 (file)
@@ -146,6 +146,7 @@ extern int swsusp_swap_in_use(void);
  */
 #define SF_PLATFORM_MODE       1
 #define SF_NOCOMPRESS_MODE     2
+#define SF_CRC32_MODE          4
 
 /* kernel/power/hibernate.c */
 extern int swsusp_check(void);
index 7c97c3a0eee393ea1e5e879d84bc8c78a6ecabf2..11a594c4ba251227c6fc25b62c307747356fccfc 100644 (file)
 #include <linux/slab.h>
 #include <linux/lzo.h>
 #include <linux/vmalloc.h>
+#include <linux/cpumask.h>
+#include <linux/atomic.h>
+#include <linux/kthread.h>
+#include <linux/crc32.h>
 
 #include "power.h"
 
@@ -43,8 +47,7 @@
  *     allocated and populated one at a time, so we only need one memory
  *     page to set up the entire structure.
  *
- *     During resume we also only need to use one swap_map_page structure
- *     at a time.
+ *     During resume we pick up all swap_map_page structures into a list.
  */
 
 #define MAP_PAGE_ENTRIES       (PAGE_SIZE / sizeof(sector_t) - 1)
@@ -54,6 +57,11 @@ struct swap_map_page {
        sector_t next_swap;
 };
 
+struct swap_map_page_list {
+       struct swap_map_page *map;
+       struct swap_map_page_list *next;
+};
+
 /**
  *     The swap_map_handle structure is used for handling swap in
  *     a file-alike way
@@ -61,13 +69,18 @@ struct swap_map_page {
 
 struct swap_map_handle {
        struct swap_map_page *cur;
+       struct swap_map_page_list *maps;
        sector_t cur_swap;
        sector_t first_sector;
        unsigned int k;
+       unsigned long nr_free_pages, written;
+       u32 crc32;
 };
 
 struct swsusp_header {
-       char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
+       char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
+                     sizeof(u32)];
+       u32     crc32;
        sector_t image;
        unsigned int flags;     /* Flags to pass to the "boot" kernel */
        char    orig_sig[10];
@@ -199,6 +212,8 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
                memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
                swsusp_header->image = handle->first_sector;
                swsusp_header->flags = flags;
+               if (flags & SF_CRC32_MODE)
+                       swsusp_header->crc32 = handle->crc32;
                error = hib_bio_write_page(swsusp_resume_block,
                                        swsusp_header, NULL);
        } else {
@@ -245,6 +260,7 @@ static int swsusp_swap_check(void)
 static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
 {
        void *src;
+       int ret;
 
        if (!offset)
                return -ENOSPC;
@@ -254,9 +270,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
                if (src) {
                        copy_page(src, buf);
                } else {
-                       WARN_ON_ONCE(1);
-                       bio_chain = NULL;       /* Go synchronous */
-                       src = buf;
+                       ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+                       if (ret)
+                               return ret;
+                       src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+                       if (src) {
+                               copy_page(src, buf);
+                       } else {
+                               WARN_ON_ONCE(1);
+                               bio_chain = NULL;       /* Go synchronous */
+                               src = buf;
+                       }
                }
        } else {
                src = buf;
@@ -293,6 +317,8 @@ static int get_swap_writer(struct swap_map_handle *handle)
                goto err_rel;
        }
        handle->k = 0;
+       handle->nr_free_pages = nr_free_pages() >> 1;
+       handle->written = 0;
        handle->first_sector = handle->cur_swap;
        return 0;
 err_rel:
@@ -316,20 +342,23 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
                return error;
        handle->cur->entries[handle->k++] = offset;
        if (handle->k >= MAP_PAGE_ENTRIES) {
-               error = hib_wait_on_bio_chain(bio_chain);
-               if (error)
-                       goto out;
                offset = alloc_swapdev_block(root_swap);
                if (!offset)
                        return -ENOSPC;
                handle->cur->next_swap = offset;
-               error = write_page(handle->cur, handle->cur_swap, NULL);
+               error = write_page(handle->cur, handle->cur_swap, bio_chain);
                if (error)
                        goto out;
                clear_page(handle->cur);
                handle->cur_swap = offset;
                handle->k = 0;
        }
+       if (bio_chain && ++handle->written > handle->nr_free_pages) {
+               error = hib_wait_on_bio_chain(bio_chain);
+               if (error)
+                       goto out;
+               handle->written = 0;
+       }
  out:
        return error;
 }
@@ -372,6 +401,13 @@ static int swap_writer_finish(struct swap_map_handle *handle,
                                     LZO_HEADER, PAGE_SIZE)
 #define LZO_CMP_SIZE   (LZO_CMP_PAGES * PAGE_SIZE)
 
+/* Maximum number of threads for compression/decompression. */
+#define LZO_THREADS    3
+
+/* Maximum number of pages for read buffering. */
+#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8)
+
+
 /**
  *     save_image - save the suspend image data
  */
@@ -419,6 +455,92 @@ static int save_image(struct swap_map_handle *handle,
        return ret;
 }
 
+/**
+ * Structure used for CRC32.
+ */
+struct crc_data {
+       struct task_struct *thr;                  /* thread */
+       atomic_t ready;                           /* ready to start flag */
+       atomic_t stop;                            /* ready to stop flag */
+       unsigned run_threads;                     /* nr current threads */
+       wait_queue_head_t go;                     /* start crc update */
+       wait_queue_head_t done;                   /* crc update done */
+       u32 *crc32;                               /* points to handle's crc32 */
+       size_t *unc_len[LZO_THREADS];             /* uncompressed lengths */
+       unsigned char *unc[LZO_THREADS];          /* uncompressed data */
+};
+
+/**
+ * CRC32 update function that runs in its own thread.
+ */
+static int crc32_threadfn(void *data)
+{
+       struct crc_data *d = data;
+       unsigned i;
+
+       while (1) {
+               wait_event(d->go, atomic_read(&d->ready) ||
+                                 kthread_should_stop());
+               if (kthread_should_stop()) {
+                       d->thr = NULL;
+                       atomic_set(&d->stop, 1);
+                       wake_up(&d->done);
+                       break;
+               }
+               atomic_set(&d->ready, 0);
+
+               for (i = 0; i < d->run_threads; i++)
+                       *d->crc32 = crc32_le(*d->crc32,
+                                            d->unc[i], *d->unc_len[i]);
+               atomic_set(&d->stop, 1);
+               wake_up(&d->done);
+       }
+       return 0;
+}
+/**
+ * Structure used for LZO data compression.
+ */
+struct cmp_data {
+       struct task_struct *thr;                  /* thread */
+       atomic_t ready;                           /* ready to start flag */
+       atomic_t stop;                            /* ready to stop flag */
+       int ret;                                  /* return code */
+       wait_queue_head_t go;                     /* start compression */
+       wait_queue_head_t done;                   /* compression done */
+       size_t unc_len;                           /* uncompressed length */
+       size_t cmp_len;                           /* compressed length */
+       unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
+       unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
+       unsigned char wrk[LZO1X_1_MEM_COMPRESS];  /* compression workspace */
+};
+
+/**
+ * Compression function that runs in its own thread.
+ */
+static int lzo_compress_threadfn(void *data)
+{
+       struct cmp_data *d = data;
+
+       while (1) {
+               wait_event(d->go, atomic_read(&d->ready) ||
+                                 kthread_should_stop());
+               if (kthread_should_stop()) {
+                       d->thr = NULL;
+                       d->ret = -1;
+                       atomic_set(&d->stop, 1);
+                       wake_up(&d->done);
+                       break;
+               }
+               atomic_set(&d->ready, 0);
+
+               d->ret = lzo1x_1_compress(d->unc, d->unc_len,
+                                         d->cmp + LZO_HEADER, &d->cmp_len,
+                                         d->wrk);
+               atomic_set(&d->stop, 1);
+               wake_up(&d->done);
+       }
+       return 0;
+}
 
 /**
  * save_image_lzo - Save the suspend image data compressed with LZO.
@@ -437,42 +559,93 @@ static int save_image_lzo(struct swap_map_handle *handle,
        struct bio *bio;
        struct timeval start;
        struct timeval stop;
-       size_t off, unc_len, cmp_len;
-       unsigned char *unc, *cmp, *wrk, *page;
+       size_t off;
+       unsigned thr, run_threads, nr_threads;
+       unsigned char *page = NULL;
+       struct cmp_data *data = NULL;
+       struct crc_data *crc = NULL;
+
+       /*
+        * We'll limit the number of threads for compression to limit memory
+        * footprint.
+        */
+       nr_threads = num_online_cpus() - 1;
+       nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
 
        page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
        if (!page) {
                printk(KERN_ERR "PM: Failed to allocate LZO page\n");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out_clean;
        }
 
-       wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
-       if (!wrk) {
-               printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
-               free_page((unsigned long)page);
-               return -ENOMEM;
+       data = vmalloc(sizeof(*data) * nr_threads);
+       if (!data) {
+               printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+               ret = -ENOMEM;
+               goto out_clean;
        }
+       for (thr = 0; thr < nr_threads; thr++)
+               memset(&data[thr], 0, offsetof(struct cmp_data, go));
 
-       unc = vmalloc(LZO_UNC_SIZE);
-       if (!unc) {
-               printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
-               vfree(wrk);
-               free_page((unsigned long)page);
-               return -ENOMEM;
+       crc = kmalloc(sizeof(*crc), GFP_KERNEL);
+       if (!crc) {
+               printk(KERN_ERR "PM: Failed to allocate crc\n");
+               ret = -ENOMEM;
+               goto out_clean;
+       }
+       memset(crc, 0, offsetof(struct crc_data, go));
+
+       /*
+        * Start the compression threads.
+        */
+       for (thr = 0; thr < nr_threads; thr++) {
+               init_waitqueue_head(&data[thr].go);
+               init_waitqueue_head(&data[thr].done);
+
+               data[thr].thr = kthread_run(lzo_compress_threadfn,
+                                           &data[thr],
+                                           "image_compress/%u", thr);
+               if (IS_ERR(data[thr].thr)) {
+                       data[thr].thr = NULL;
+                       printk(KERN_ERR
+                              "PM: Cannot start compression threads\n");
+                       ret = -ENOMEM;
+                       goto out_clean;
+               }
        }
 
-       cmp = vmalloc(LZO_CMP_SIZE);
-       if (!cmp) {
-               printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
-               vfree(unc);
-               vfree(wrk);
-               free_page((unsigned long)page);
-               return -ENOMEM;
+       /*
+        * Adjust number of free pages after all allocations have been done.
+        * We don't want to run out of pages when writing.
+        */
+       handle->nr_free_pages = nr_free_pages() >> 1;
+
+       /*
+        * Start the CRC32 thread.
+        */
+       init_waitqueue_head(&crc->go);
+       init_waitqueue_head(&crc->done);
+
+       handle->crc32 = 0;
+       crc->crc32 = &handle->crc32;
+       for (thr = 0; thr < nr_threads; thr++) {
+               crc->unc[thr] = data[thr].unc;
+               crc->unc_len[thr] = &data[thr].unc_len;
+       }
+
+       crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
+       if (IS_ERR(crc->thr)) {
+               crc->thr = NULL;
+               printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
+               ret = -ENOMEM;
+               goto out_clean;
        }
 
        printk(KERN_INFO
+               "PM: Using %u thread(s) for compression.\n"
                "PM: Compressing and saving image data (%u pages) ...     ",
-               nr_to_write);
+               nr_threads, nr_to_write);
        m = nr_to_write / 100;
        if (!m)
                m = 1;
@@ -480,55 +653,83 @@ static int save_image_lzo(struct swap_map_handle *handle,
        bio = NULL;
        do_gettimeofday(&start);
        for (;;) {
-               for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
-                       ret = snapshot_read_next(snapshot);
-                       if (ret < 0)
-                               goto out_finish;
-
-                       if (!ret)
+               for (thr = 0; thr < nr_threads; thr++) {
+                       for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+                               ret = snapshot_read_next(snapshot);
+                               if (ret < 0)
+                                       goto out_finish;
+
+                               if (!ret)
+                                       break;
+
+                               memcpy(data[thr].unc + off,
+                                      data_of(*snapshot), PAGE_SIZE);
+
+                               if (!(nr_pages % m))
+                                       printk(KERN_CONT "\b\b\b\b%3d%%",
+                                              nr_pages / m);
+                               nr_pages++;
+                       }
+                       if (!off)
                                break;
 
-                       memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
+                       data[thr].unc_len = off;
 
-                       if (!(nr_pages % m))
-                               printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
-                       nr_pages++;
+                       atomic_set(&data[thr].ready, 1);
+                       wake_up(&data[thr].go);
                }
 
-               if (!off)
+               if (!thr)
                        break;
 
-               unc_len = off;
-               ret = lzo1x_1_compress(unc, unc_len,
-                                      cmp + LZO_HEADER, &cmp_len, wrk);
-               if (ret < 0) {
-                       printk(KERN_ERR "PM: LZO compression failed\n");
-                       break;
-               }
+               crc->run_threads = thr;
+               atomic_set(&crc->ready, 1);
+               wake_up(&crc->go);
 
-               if (unlikely(!cmp_len ||
-                            cmp_len > lzo1x_worst_compress(unc_len))) {
-                       printk(KERN_ERR "PM: Invalid LZO compressed length\n");
-                       ret = -1;
-                       break;
-               }
+               for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+                       wait_event(data[thr].done,
+                                  atomic_read(&data[thr].stop));
+                       atomic_set(&data[thr].stop, 0);
 
-               *(size_t *)cmp = cmp_len;
+                       ret = data[thr].ret;
 
-               /*
-                * Given we are writing one page at a time to disk, we copy
-                * that much from the buffer, although the last bit will likely
-                * be smaller than full page. This is OK - we saved the length
-                * of the compressed data, so any garbage at the end will be
-                * discarded when we read it.
-                */
-               for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
-                       memcpy(page, cmp + off, PAGE_SIZE);
+                       if (ret < 0) {
+                               printk(KERN_ERR "PM: LZO compression failed\n");
+                               goto out_finish;
+                       }
 
-                       ret = swap_write_page(handle, page, &bio);
-                       if (ret)
+                       if (unlikely(!data[thr].cmp_len ||
+                                    data[thr].cmp_len >
+                                    lzo1x_worst_compress(data[thr].unc_len))) {
+                               printk(KERN_ERR
+                                      "PM: Invalid LZO compressed length\n");
+                               ret = -1;
                                goto out_finish;
+                       }
+
+                       *(size_t *)data[thr].cmp = data[thr].cmp_len;
+
+                       /*
+                        * Given we are writing one page at a time to disk, we
+                        * copy that much from the buffer, although the last
+                        * bit will likely be smaller than full page. This is
+                        * OK - we saved the length of the compressed data, so
+                        * any garbage at the end will be discarded when we
+                        * read it.
+                        */
+                       for (off = 0;
+                            off < LZO_HEADER + data[thr].cmp_len;
+                            off += PAGE_SIZE) {
+                               memcpy(page, data[thr].cmp + off, PAGE_SIZE);
+
+                               ret = swap_write_page(handle, page, &bio);
+                               if (ret)
+                                       goto out_finish;
+                       }
                }
+
+               wait_event(crc->done, atomic_read(&crc->stop));
+               atomic_set(&crc->stop, 0);
        }
 
 out_finish:
@@ -536,16 +737,25 @@ out_finish:
        do_gettimeofday(&stop);
        if (!ret)
                ret = err2;
-       if (!ret)
+       if (!ret) {
                printk(KERN_CONT "\b\b\b\bdone\n");
-       else
+       } else {
                printk(KERN_CONT "\n");
+       }
        swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
-
-       vfree(cmp);
-       vfree(unc);
-       vfree(wrk);
-       free_page((unsigned long)page);
+out_clean:
+       if (crc) {
+               if (crc->thr)
+                       kthread_stop(crc->thr);
+               kfree(crc);
+       }
+       if (data) {
+               for (thr = 0; thr < nr_threads; thr++)
+                       if (data[thr].thr)
+                               kthread_stop(data[thr].thr);
+               vfree(data);
+       }
+       if (page) free_page((unsigned long)page);
 
        return ret;
 }
@@ -625,8 +835,15 @@ out_finish:
 
 static void release_swap_reader(struct swap_map_handle *handle)
 {
-       if (handle->cur)
-               free_page((unsigned long)handle->cur);
+       struct swap_map_page_list *tmp;
+
+       while (handle->maps) {
+               if (handle->maps->map)
+                       free_page((unsigned long)handle->maps->map);
+               tmp = handle->maps;
+               handle->maps = handle->maps->next;
+               kfree(tmp);
+       }
        handle->cur = NULL;
 }
 
@@ -634,22 +851,46 @@ static int get_swap_reader(struct swap_map_handle *handle,
                unsigned int *flags_p)
 {
        int error;
+       struct swap_map_page_list *tmp, *last;
+       sector_t offset;
 
        *flags_p = swsusp_header->flags;
 
        if (!swsusp_header->image) /* how can this happen? */
                return -EINVAL;
 
-       handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
-       if (!handle->cur)
-               return -ENOMEM;
+       handle->cur = NULL;
+       last = handle->maps = NULL;
+       offset = swsusp_header->image;
+       while (offset) {
+               tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL);
+               if (!tmp) {
+                       release_swap_reader(handle);
+                       return -ENOMEM;
+               }
+               memset(tmp, 0, sizeof(*tmp));
+               if (!handle->maps)
+                       handle->maps = tmp;
+               if (last)
+                       last->next = tmp;
+               last = tmp;
+
+               tmp->map = (struct swap_map_page *)
+                          __get_free_page(__GFP_WAIT | __GFP_HIGH);
+               if (!tmp->map) {
+                       release_swap_reader(handle);
+                       return -ENOMEM;
+               }
 
-       error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL);
-       if (error) {
-               release_swap_reader(handle);
-               return error;
+               error = hib_bio_read_page(offset, tmp->map, NULL);
+               if (error) {
+                       release_swap_reader(handle);
+                       return error;
+               }
+               offset = tmp->map->next_swap;
        }
        handle->k = 0;
+       handle->cur = handle->maps->map;
        return 0;
 }
 
@@ -658,6 +899,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
 {
        sector_t offset;
        int error;
+       struct swap_map_page_list *tmp;
 
        if (!handle->cur)
                return -EINVAL;
@@ -668,13 +910,15 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
        if (error)
                return error;
        if (++handle->k >= MAP_PAGE_ENTRIES) {
-               error = hib_wait_on_bio_chain(bio_chain);
                handle->k = 0;
-               offset = handle->cur->next_swap;
-               if (!offset)
+               free_page((unsigned long)handle->maps->map);
+               tmp = handle->maps;
+               handle->maps = handle->maps->next;
+               kfree(tmp);
+               if (!handle->maps)
                        release_swap_reader(handle);
-               else if (!error)
-                       error = hib_bio_read_page(offset, handle->cur, NULL);
+               else
+                       handle->cur = handle->maps->map;
        }
        return error;
 }
@@ -697,7 +941,7 @@ static int load_image(struct swap_map_handle *handle,
                       unsigned int nr_to_read)
 {
        unsigned int m;
-       int error = 0;
+       int ret = 0;
        struct timeval start;
        struct timeval stop;
        struct bio *bio;
@@ -713,15 +957,15 @@ static int load_image(struct swap_map_handle *handle,
        bio = NULL;
        do_gettimeofday(&start);
        for ( ; ; ) {
-               error = snapshot_write_next(snapshot);
-               if (error <= 0)
+               ret = snapshot_write_next(snapshot);
+               if (ret <= 0)
                        break;
-               error = swap_read_page(handle, data_of(*snapshot), &bio);
-               if (error)
+               ret = swap_read_page(handle, data_of(*snapshot), &bio);
+               if (ret)
                        break;
                if (snapshot->sync_read)
-                       error = hib_wait_on_bio_chain(&bio);
-               if (error)
+                       ret = hib_wait_on_bio_chain(&bio);
+               if (ret)
                        break;
                if (!(nr_pages % m))
                        printk("\b\b\b\b%3d%%", nr_pages / m);
@@ -729,17 +973,61 @@ static int load_image(struct swap_map_handle *handle,
        }
        err2 = hib_wait_on_bio_chain(&bio);
        do_gettimeofday(&stop);
-       if (!error)
-               error = err2;
-       if (!error) {
+       if (!ret)
+               ret = err2;
+       if (!ret) {
                printk("\b\b\b\bdone\n");
                snapshot_write_finalize(snapshot);
                if (!snapshot_image_loaded(snapshot))
-                       error = -ENODATA;
+                       ret = -ENODATA;
        } else
                printk("\n");
        swsusp_show_speed(&start, &stop, nr_to_read, "Read");
-       return error;
+       return ret;
+}
+
+/**
+ * Structure used for LZO data decompression.
+ */
+struct dec_data {
+       struct task_struct *thr;                  /* thread */
+       atomic_t ready;                           /* ready to start flag */
+       atomic_t stop;                            /* ready to stop flag */
+       int ret;                                  /* return code */
+       wait_queue_head_t go;                     /* start decompression */
+       wait_queue_head_t done;                   /* decompression done */
+       size_t unc_len;                           /* uncompressed length */
+       size_t cmp_len;                           /* compressed length */
+       unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
+       unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
+};
+
+/**
+ * Deompression function that runs in its own thread.
+ */
+static int lzo_decompress_threadfn(void *data)
+{
+       struct dec_data *d = data;
+
+       while (1) {
+               wait_event(d->go, atomic_read(&d->ready) ||
+                                 kthread_should_stop());
+               if (kthread_should_stop()) {
+                       d->thr = NULL;
+                       d->ret = -1;
+                       atomic_set(&d->stop, 1);
+                       wake_up(&d->done);
+                       break;
+               }
+               atomic_set(&d->ready, 0);
+
+               d->unc_len = LZO_UNC_SIZE;
+               d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
+                                              d->unc, &d->unc_len);
+               atomic_set(&d->stop, 1);
+               wake_up(&d->done);
+       }
+       return 0;
 }
 
 /**
@@ -753,50 +1041,120 @@ static int load_image_lzo(struct swap_map_handle *handle,
                           unsigned int nr_to_read)
 {
        unsigned int m;
-       int error = 0;
+       int ret = 0;
+       int eof = 0;
        struct bio *bio;
        struct timeval start;
        struct timeval stop;
        unsigned nr_pages;
-       size_t i, off, unc_len, cmp_len;
-       unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
-
-       for (i = 0; i < LZO_CMP_PAGES; i++) {
-               page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
-               if (!page[i]) {
-                       printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+       size_t off;
+       unsigned i, thr, run_threads, nr_threads;
+       unsigned ring = 0, pg = 0, ring_size = 0,
+                have = 0, want, need, asked = 0;
+       unsigned long read_pages;
+       unsigned char **page = NULL;
+       struct dec_data *data = NULL;
+       struct crc_data *crc = NULL;
+
+       /*
+        * We'll limit the number of threads for decompression to limit memory
+        * footprint.
+        */
+       nr_threads = num_online_cpus() - 1;
+       nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
+
+       page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
+       if (!page) {
+               printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+               ret = -ENOMEM;
+               goto out_clean;
+       }
 
-                       while (i)
-                               free_page((unsigned long)page[--i]);
+       data = vmalloc(sizeof(*data) * nr_threads);
+       if (!data) {
+               printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+               ret = -ENOMEM;
+               goto out_clean;
+       }
+       for (thr = 0; thr < nr_threads; thr++)
+               memset(&data[thr], 0, offsetof(struct dec_data, go));
 
-                       return -ENOMEM;
+       crc = kmalloc(sizeof(*crc), GFP_KERNEL);
+       if (!crc) {
+               printk(KERN_ERR "PM: Failed to allocate crc\n");
+               ret = -ENOMEM;
+               goto out_clean;
+       }
+       memset(crc, 0, offsetof(struct crc_data, go));
+
+       /*
+        * Start the decompression threads.
+        */
+       for (thr = 0; thr < nr_threads; thr++) {
+               init_waitqueue_head(&data[thr].go);
+               init_waitqueue_head(&data[thr].done);
+
+               data[thr].thr = kthread_run(lzo_decompress_threadfn,
+                                           &data[thr],
+                                           "image_decompress/%u", thr);
+               if (IS_ERR(data[thr].thr)) {
+                       data[thr].thr = NULL;
+                       printk(KERN_ERR
+                              "PM: Cannot start decompression threads\n");
+                       ret = -ENOMEM;
+                       goto out_clean;
                }
        }
 
-       unc = vmalloc(LZO_UNC_SIZE);
-       if (!unc) {
-               printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
-
-               for (i = 0; i < LZO_CMP_PAGES; i++)
-                       free_page((unsigned long)page[i]);
-
-               return -ENOMEM;
+       /*
+        * Start the CRC32 thread.
+        */
+       init_waitqueue_head(&crc->go);
+       init_waitqueue_head(&crc->done);
+
+       handle->crc32 = 0;
+       crc->crc32 = &handle->crc32;
+       for (thr = 0; thr < nr_threads; thr++) {
+               crc->unc[thr] = data[thr].unc;
+               crc->unc_len[thr] = &data[thr].unc_len;
        }
 
-       cmp = vmalloc(LZO_CMP_SIZE);
-       if (!cmp) {
-               printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
+       crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
+       if (IS_ERR(crc->thr)) {
+               crc->thr = NULL;
+               printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
+               ret = -ENOMEM;
+               goto out_clean;
+       }
 
-               vfree(unc);
-               for (i = 0; i < LZO_CMP_PAGES; i++)
-                       free_page((unsigned long)page[i]);
+       /*
+        * Adjust number of pages for read buffering, in case we are short.
+        */
+       read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1;
+       read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES);
 
-               return -ENOMEM;
+       for (i = 0; i < read_pages; i++) {
+               page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
+                                                 __GFP_WAIT | __GFP_HIGH :
+                                                 __GFP_WAIT);
+               if (!page[i]) {
+                       if (i < LZO_CMP_PAGES) {
+                               ring_size = i;
+                               printk(KERN_ERR
+                                      "PM: Failed to allocate LZO pages\n");
+                               ret = -ENOMEM;
+                               goto out_clean;
+                       } else {
+                               break;
+                       }
+               }
        }
+       want = ring_size = i;
 
        printk(KERN_INFO
+               "PM: Using %u thread(s) for decompression.\n"
                "PM: Loading and decompressing image data (%u pages) ...     ",
-               nr_to_read);
+               nr_threads, nr_to_read);
        m = nr_to_read / 100;
        if (!m)
                m = 1;
@@ -804,85 +1162,189 @@ static int load_image_lzo(struct swap_map_handle *handle,
        bio = NULL;
        do_gettimeofday(&start);
 
-       error = snapshot_write_next(snapshot);
-       if (error <= 0)
+       ret = snapshot_write_next(snapshot);
+       if (ret <= 0)
                goto out_finish;
 
-       for (;;) {
-               error = swap_read_page(handle, page[0], NULL); /* sync */
-               if (error)
-                       break;
-
-               cmp_len = *(size_t *)page[0];
-               if (unlikely(!cmp_len ||
-                            cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
-                       printk(KERN_ERR "PM: Invalid LZO compressed length\n");
-                       error = -1;
-                       break;
+       for(;;) {
+               for (i = 0; !eof && i < want; i++) {
+                       ret = swap_read_page(handle, page[ring], &bio);
+                       if (ret) {
+                               /*
+                                * On real read error, finish. On end of data,
+                                * set EOF flag and just exit the read loop.
+                                */
+                               if (handle->cur &&
+                                   handle->cur->entries[handle->k]) {
+                                       goto out_finish;
+                               } else {
+                                       eof = 1;
+                                       break;
+                               }
+                       }
+                       if (++ring >= ring_size)
+                               ring = 0;
                }
+               asked += i;
+               want -= i;
 
-               for (off = PAGE_SIZE, i = 1;
-                    off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
-                       error = swap_read_page(handle, page[i], &bio);
-                       if (error)
+               /*
+                * We are out of data, wait for some more.
+                */
+               if (!have) {
+                       if (!asked)
+                               break;
+
+                       ret = hib_wait_on_bio_chain(&bio);
+                       if (ret)
                                goto out_finish;
+                       have += asked;
+                       asked = 0;
+                       if (eof)
+                               eof = 2;
                }
 
-               error = hib_wait_on_bio_chain(&bio); /* need all data now */
-               if (error)
-                       goto out_finish;
-
-               for (off = 0, i = 0;
-                    off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
-                       memcpy(cmp + off, page[i], PAGE_SIZE);
+               if (crc->run_threads) {
+                       wait_event(crc->done, atomic_read(&crc->stop));
+                       atomic_set(&crc->stop, 0);
+                       crc->run_threads = 0;
                }
 
-               unc_len = LZO_UNC_SIZE;
-               error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
-                                             unc, &unc_len);
-               if (error < 0) {
-                       printk(KERN_ERR "PM: LZO decompression failed\n");
-                       break;
+               for (thr = 0; have && thr < nr_threads; thr++) {
+                       data[thr].cmp_len = *(size_t *)page[pg];
+                       if (unlikely(!data[thr].cmp_len ||
+                                    data[thr].cmp_len >
+                                    lzo1x_worst_compress(LZO_UNC_SIZE))) {
+                               printk(KERN_ERR
+                                      "PM: Invalid LZO compressed length\n");
+                               ret = -1;
+                               goto out_finish;
+                       }
+
+                       need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+                                           PAGE_SIZE);
+                       if (need > have) {
+                               if (eof > 1) {
+                                       ret = -1;
+                                       goto out_finish;
+                               }
+                               break;
+                       }
+
+                       for (off = 0;
+                            off < LZO_HEADER + data[thr].cmp_len;
+                            off += PAGE_SIZE) {
+                               memcpy(data[thr].cmp + off,
+                                      page[pg], PAGE_SIZE);
+                               have--;
+                               want++;
+                               if (++pg >= ring_size)
+                                       pg = 0;
+                       }
+
+                       atomic_set(&data[thr].ready, 1);
+                       wake_up(&data[thr].go);
                }
 
-               if (unlikely(!unc_len ||
-                            unc_len > LZO_UNC_SIZE ||
-                            unc_len & (PAGE_SIZE - 1))) {
-                       printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
-                       error = -1;
-                       break;
+               /*
+                * Wait for more data while we are decompressing.
+                */
+               if (have < LZO_CMP_PAGES && asked) {
+                       ret = hib_wait_on_bio_chain(&bio);
+                       if (ret)
+                               goto out_finish;
+                       have += asked;
+                       asked = 0;
+                       if (eof)
+                               eof = 2;
                }
 
-               for (off = 0; off < unc_len; off += PAGE_SIZE) {
-                       memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
+               for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+                       wait_event(data[thr].done,
+                                  atomic_read(&data[thr].stop));
+                       atomic_set(&data[thr].stop, 0);
+
+                       ret = data[thr].ret;
 
-                       if (!(nr_pages % m))
-                               printk("\b\b\b\b%3d%%", nr_pages / m);
-                       nr_pages++;
+                       if (ret < 0) {
+                               printk(KERN_ERR
+                                      "PM: LZO decompression failed\n");
+                               goto out_finish;
+                       }
 
-                       error = snapshot_write_next(snapshot);
-                       if (error <= 0)
+                       if (unlikely(!data[thr].unc_len ||
+                                    data[thr].unc_len > LZO_UNC_SIZE ||
+                                    data[thr].unc_len & (PAGE_SIZE - 1))) {
+                               printk(KERN_ERR
+                                      "PM: Invalid LZO uncompressed length\n");
+                               ret = -1;
                                goto out_finish;
+                       }
+
+                       for (off = 0;
+                            off < data[thr].unc_len; off += PAGE_SIZE) {
+                               memcpy(data_of(*snapshot),
+                                      data[thr].unc + off, PAGE_SIZE);
+
+                               if (!(nr_pages % m))
+                                       printk("\b\b\b\b%3d%%", nr_pages / m);
+                               nr_pages++;
+
+                               ret = snapshot_write_next(snapshot);
+                               if (ret <= 0) {
+                                       crc->run_threads = thr + 1;
+                                       atomic_set(&crc->ready, 1);
+                                       wake_up(&crc->go);
+                                       goto out_finish;
+                               }
+                       }
                }
+
+               crc->run_threads = thr;
+               atomic_set(&crc->ready, 1);
+               wake_up(&crc->go);
        }
 
 out_finish:
+       if (crc->run_threads) {
+               wait_event(crc->done, atomic_read(&crc->stop));
+               atomic_set(&crc->stop, 0);
+       }
        do_gettimeofday(&stop);
-       if (!error) {
+       if (!ret) {
                printk("\b\b\b\bdone\n");
                snapshot_write_finalize(snapshot);
                if (!snapshot_image_loaded(snapshot))
-                       error = -ENODATA;
+                       ret = -ENODATA;
+               if (!ret) {
+                       if (swsusp_header->flags & SF_CRC32_MODE) {
+                               if(handle->crc32 != swsusp_header->crc32) {
+                                       printk(KERN_ERR
+                                              "PM: Invalid image CRC32!\n");
+                                       ret = -ENODATA;
+                               }
+                       }
+               }
        } else
                printk("\n");
        swsusp_show_speed(&start, &stop, nr_to_read, "Read");
-
-       vfree(cmp);
-       vfree(unc);
-       for (i = 0; i < LZO_CMP_PAGES; i++)
+out_clean:
+       for (i = 0; i < ring_size; i++)
                free_page((unsigned long)page[i]);
+       if (crc) {
+               if (crc->thr)
+                       kthread_stop(crc->thr);
+               kfree(crc);
+       }
+       if (data) {
+               for (thr = 0; thr < nr_threads; thr++)
+                       if (data[thr].thr)
+                               kthread_stop(data[thr].thr);
+               vfree(data);
+       }
+       if (page) vfree(page);
 
-       return error;
+       return ret;
 }
 
 /**