2 * c 2001 PPC 64 Team, IBM Corp
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * /dev/nvram driver for PPC64
11 * This perhaps should live in drivers/char
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/init.h>
18 #include <linux/spinlock.h>
19 #include <linux/slab.h>
20 #include <linux/kmsg_dump.h>
21 #include <linux/pstore.h>
22 #include <linux/ctype.h>
23 #include <linux/zlib.h>
24 #include <asm/uaccess.h>
25 #include <asm/nvram.h>
28 #include <asm/machdep.h>
30 /* Max bytes to read/write in one go */
34 * Set oops header version to distingush between old and new format header.
35 * lnx,oops-log partition max size is 4000, header version > 4000 will
36 * help in identifying new header.
38 #define OOPS_HDR_VERSION 5000
40 static unsigned int nvram_size;
41 static int nvram_fetch, nvram_store;
42 static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
43 static DEFINE_SPINLOCK(nvram_lock);
50 struct nvram_os_partition {
52 int req_size; /* desired size, in bytes */
53 int min_size; /* minimum acceptable size (0 means req_size) */
54 long size; /* size of data portion (excluding err_log_info) */
55 long index; /* offset of data portion of partition */
56 bool os_partition; /* partition initialized by OS, not FW */
59 static struct nvram_os_partition rtas_log_partition = {
60 .name = "ibm,rtas-log",
67 static struct nvram_os_partition oops_log_partition = {
68 .name = "lnx,oops-log",
75 static const char *pseries_nvram_os_partitions[] = {
81 struct oops_log_info {
85 } __attribute__((packed));
87 static void oops_to_nvram(struct kmsg_dumper *dumper,
88 enum kmsg_dump_reason reason);
90 static struct kmsg_dumper nvram_kmsg_dumper = {
94 /* See clobbering_unread_rtas_event() */
95 #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */
96 static unsigned long last_unread_rtas_event; /* timestamp */
99 * For capturing and compressing an oops or panic report...
101 * big_oops_buf[] holds the uncompressed text we're capturing.
103 * oops_buf[] holds the compressed text, preceded by a oops header.
104 * oops header has u16 holding the version of oops header (to differentiate
105 * between old and new format header) followed by u16 holding the length of
106 * the compressed* text (*Or uncompressed, if compression fails.) and u64
107 * holding the timestamp. oops_buf[] gets written to NVRAM.
109 * oops_log_info points to the header. oops_data points to the compressed text.
114 * +-----------+-----------+-----------+------------------------+
115 * | version | length | timestamp | text |
116 * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) |
117 * +-----------+-----------+-----------+------------------------+
121 * We preallocate these buffers during init to avoid kmalloc during oops/panic.
123 static size_t big_oops_buf_sz;
124 static char *big_oops_buf, *oops_buf;
125 static char *oops_data;
126 static size_t oops_data_sz;
128 /* Compression parameters */
129 #define COMPR_LEVEL 6
130 #define WINDOW_BITS 12
132 static struct z_stream_s stream;
135 static struct nvram_os_partition of_config_partition = {
138 .os_partition = false
141 static struct nvram_os_partition common_partition = {
144 .os_partition = false
147 static enum pstore_type_id nvram_type_ids[] = {
149 PSTORE_TYPE_PPC_RTAS,
151 PSTORE_TYPE_PPC_COMMON,
154 static int read_type;
155 static unsigned long last_rtas_event;
158 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
167 if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
170 if (*index >= nvram_size)
174 if (i + count > nvram_size)
175 count = nvram_size - i;
177 spin_lock_irqsave(&nvram_lock, flags);
179 for (; count != 0; count -= len) {
184 if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
185 len) != 0) || len != done) {
186 spin_unlock_irqrestore(&nvram_lock, flags);
190 memcpy(p, nvram_buf, len);
196 spin_unlock_irqrestore(&nvram_lock, flags);
202 static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
210 if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
213 if (*index >= nvram_size)
217 if (i + count > nvram_size)
218 count = nvram_size - i;
220 spin_lock_irqsave(&nvram_lock, flags);
222 for (; count != 0; count -= len) {
227 memcpy(nvram_buf, p, len);
229 if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
230 len) != 0) || len != done) {
231 spin_unlock_irqrestore(&nvram_lock, flags);
238 spin_unlock_irqrestore(&nvram_lock, flags);
244 static ssize_t pSeries_nvram_get_size(void)
246 return nvram_size ? nvram_size : -ENODEV;
250 /* nvram_write_os_partition, nvram_write_error_log
252 * We need to buffer the error logs into nvram to ensure that we have
253 * the failure information to decode. If we have a severe error there
254 * is no way to guarantee that the OS or the machine is in a state to
255 * get back to user land and write the error to disk. For example if
256 * the SCSI device driver causes a Machine Check by writing to a bad
257 * IO address, there is no way of guaranteeing that the device driver
258 * is in any state that is would also be able to write the error data
259 * captured to disk, thus we buffer it in NVRAM for analysis on the
262 * In NVRAM the partition containing the error log buffer will looks like:
264 * +-----------+----------+--------+------------+------------------+
265 * | signature | checksum | length | name | data |
266 * |0 |1 |2 3|4 15|16 length-1|
267 * +-----------+----------+--------+------------+------------------+
269 * The 'data' section would look like (in bytes):
270 * +--------------+------------+-----------------------------------+
271 * | event_logged | sequence # | error log |
272 * |0 3|4 7|8 error_log_size-1|
273 * +--------------+------------+-----------------------------------+
275 * event_logged: 0 if event has not been logged to syslog, 1 if it has
276 * sequence #: The unique sequence # for each event. (until it wraps)
277 * error log: The error log from event_scan
279 int nvram_write_os_partition(struct nvram_os_partition *part, char * buff,
280 int length, unsigned int err_type, unsigned int error_log_cnt)
284 struct err_log_info info;
286 if (part->index == -1) {
290 if (length > part->size) {
294 info.error_type = err_type;
295 info.seq_num = error_log_cnt;
297 tmp_index = part->index;
299 rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
301 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
305 rc = ppc_md.nvram_write(buff, length, &tmp_index);
307 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
314 int nvram_write_error_log(char * buff, int length,
315 unsigned int err_type, unsigned int error_log_cnt)
317 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
318 err_type, error_log_cnt);
320 last_unread_rtas_event = get_seconds();
322 last_rtas_event = get_seconds();
329 /* nvram_read_partition
331 * Reads nvram partition for at most 'length'
333 int nvram_read_partition(struct nvram_os_partition *part, char *buff,
334 int length, unsigned int *err_type,
335 unsigned int *error_log_cnt)
339 struct err_log_info info;
341 if (part->index == -1)
344 if (length > part->size)
347 tmp_index = part->index;
349 if (part->os_partition) {
350 rc = ppc_md.nvram_read((char *)&info,
351 sizeof(struct err_log_info),
354 pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__,
360 rc = ppc_md.nvram_read(buff, length, &tmp_index);
362 pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc);
366 if (part->os_partition) {
367 *error_log_cnt = info.seq_num;
368 *err_type = info.error_type;
374 /* nvram_read_error_log
376 * Reads nvram for error log for at most 'length'
378 int nvram_read_error_log(char *buff, int length,
379 unsigned int *err_type, unsigned int *error_log_cnt)
381 return nvram_read_partition(&rtas_log_partition, buff, length,
382 err_type, error_log_cnt);
385 /* This doesn't actually zero anything, but it sets the event_logged
386 * word to tell that this event is safely in syslog.
388 int nvram_clear_error_log(void)
391 int clear_word = ERR_FLAG_ALREADY_LOGGED;
394 if (rtas_log_partition.index == -1)
397 tmp_index = rtas_log_partition.index;
399 rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
401 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
404 last_unread_rtas_event = 0;
409 /* pseries_nvram_init_os_partition
411 * This sets up a partition with an "OS" signature.
413 * The general strategy is the following:
414 * 1.) If a partition with the indicated name already exists...
415 * - If it's large enough, use it.
416 * - Otherwise, recycle it and keep going.
417 * 2.) Search for a free partition that is large enough.
418 * 3.) If there's not a free partition large enough, recycle any obsolete
419 * OS partitions and try again.
420 * 4.) Will first try getting a chunk that will satisfy the requested size.
421 * 5.) If a chunk of the requested size cannot be allocated, then try finding
422 * a chunk that will satisfy the minum needed.
424 * Returns 0 on success, else -1.
426 static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
432 /* Scan nvram for partitions */
433 nvram_scan_partitions();
436 p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
438 /* Found one but too small, remove it */
439 if (p && size < part->min_size) {
440 pr_info("nvram: Found too small %s partition,"
441 " removing it...\n", part->name);
442 nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
446 /* Create one if we didn't find */
448 p = nvram_create_partition(part->name, NVRAM_SIG_OS,
449 part->req_size, part->min_size);
451 pr_info("nvram: No room to create %s partition, "
452 "deleting any obsolete OS partitions...\n",
454 nvram_remove_partition(NULL, NVRAM_SIG_OS,
455 pseries_nvram_os_partitions);
456 p = nvram_create_partition(part->name, NVRAM_SIG_OS,
457 part->req_size, part->min_size);
462 pr_err("nvram: Failed to find or create %s"
463 " partition, err %d\n", part->name, (int)p);
468 part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
474 * Are we using the ibm,rtas-log for oops/panic reports? And if so,
475 * would logging this oops/panic overwrite an RTAS event that rtas_errd
476 * hasn't had a chance to read and process? Return 1 if so, else 0.
478 * We assume that if rtas_errd hasn't read the RTAS event in
479 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
481 static int clobbering_unread_rtas_event(void)
483 return (oops_log_partition.index == rtas_log_partition.index
484 && last_unread_rtas_event
485 && get_seconds() - last_unread_rtas_event <=
486 NVRAM_RTAS_READ_TIMEOUT);
489 /* Derived from logfs_compress() */
490 static int nvram_compress(const void *in, void *out, size_t inlen,
496 err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
497 MEM_LEVEL, Z_DEFAULT_STRATEGY);
502 stream.avail_in = inlen;
504 stream.next_out = out;
505 stream.avail_out = outlen;
506 stream.total_out = 0;
508 err = zlib_deflate(&stream, Z_FINISH);
509 if (err != Z_STREAM_END)
512 err = zlib_deflateEnd(&stream);
516 if (stream.total_out >= stream.total_in)
519 ret = stream.total_out;
524 /* Compress the text from big_oops_buf into oops_buf. */
525 static int zip_oops(size_t text_len)
527 struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
528 int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
530 if (zipped_len < 0) {
531 pr_err("nvram: compression failed; returned %d\n", zipped_len);
532 pr_err("nvram: logging uncompressed oops/panic report\n");
535 oops_hdr->version = OOPS_HDR_VERSION;
536 oops_hdr->report_length = (u16) zipped_len;
537 oops_hdr->timestamp = get_seconds();
542 /* Derived from logfs_uncompress */
543 int nvram_decompress(void *in, void *out, size_t inlen, size_t outlen)
548 err = zlib_inflateInit(&stream);
553 stream.avail_in = inlen;
555 stream.next_out = out;
556 stream.avail_out = outlen;
557 stream.total_out = 0;
559 err = zlib_inflate(&stream, Z_FINISH);
560 if (err != Z_STREAM_END)
563 err = zlib_inflateEnd(&stream);
567 ret = stream.total_out;
572 static int unzip_oops(char *oops_buf, char *big_buf)
574 struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
575 u64 timestamp = oops_hdr->timestamp;
576 char *big_oops_data = NULL;
577 char *oops_data_buf = NULL;
578 size_t big_oops_data_sz;
581 big_oops_data = big_buf + sizeof(struct oops_log_info);
582 big_oops_data_sz = big_oops_buf_sz - sizeof(struct oops_log_info);
583 oops_data_buf = oops_buf + sizeof(struct oops_log_info);
585 unzipped_len = nvram_decompress(oops_data_buf, big_oops_data,
586 oops_hdr->report_length,
589 if (unzipped_len < 0) {
590 pr_err("nvram: decompression failed; returned %d\n",
594 oops_hdr = (struct oops_log_info *)big_buf;
595 oops_hdr->version = OOPS_HDR_VERSION;
596 oops_hdr->report_length = (u16) unzipped_len;
597 oops_hdr->timestamp = timestamp;
601 static int nvram_pstore_open(struct pstore_info *psi)
603 /* Reset the iterator to start reading partitions again */
609 * nvram_pstore_write - pstore write callback for nvram
610 * @type: Type of message logged
611 * @reason: reason behind dump (oops/panic)
612 * @id: identifier to indicate the write performed
613 * @part: pstore writes data to registered buffer in parts,
614 * part number will indicate the same.
615 * @count: Indicates oops count
616 * @hsize: Size of header added by pstore
617 * @size: number of bytes written to the registered buffer
618 * @psi: registered pstore_info structure
620 * Called by pstore_dump() when an oops or panic report is logged in the
622 * Returns 0 on successful write.
624 static int nvram_pstore_write(enum pstore_type_id type,
625 enum kmsg_dump_reason reason,
626 u64 *id, unsigned int part, int count,
627 size_t hsize, size_t size,
628 struct pstore_info *psi)
631 unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
632 struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
634 /* part 1 has the recent messages from printk buffer */
635 if (part > 1 || type != PSTORE_TYPE_DMESG ||
636 clobbering_unread_rtas_event())
639 oops_hdr->version = OOPS_HDR_VERSION;
640 oops_hdr->report_length = (u16) size;
641 oops_hdr->timestamp = get_seconds();
646 * If compression fails copy recent log messages from
647 * big_oops_buf to oops_data.
650 size_t diff = size - oops_data_sz + hsize;
652 if (size > oops_data_sz) {
653 memcpy(oops_data, big_oops_buf, hsize);
654 memcpy(oops_data + hsize, big_oops_buf + diff,
655 oops_data_sz - hsize);
657 oops_hdr->report_length = (u16) oops_data_sz;
659 memcpy(oops_data, big_oops_buf, size);
661 err_type = ERR_TYPE_KERNEL_PANIC_GZ;
664 rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
665 (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
676 * Reads the oops/panic report, rtas, of-config and common partition.
677 * Returns the length of the data we read from each partition.
678 * Returns 0 if we've been called before.
680 static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
681 int *count, struct timespec *time, char **buf,
682 struct pstore_info *psi)
684 struct oops_log_info *oops_hdr;
685 unsigned int err_type, id_no, size = 0;
686 struct nvram_os_partition *part = NULL;
687 char *buff = NULL, *big_buff = NULL;
694 switch (nvram_type_ids[read_type]) {
695 case PSTORE_TYPE_DMESG:
696 part = &oops_log_partition;
697 *type = PSTORE_TYPE_DMESG;
699 case PSTORE_TYPE_PPC_RTAS:
700 part = &rtas_log_partition;
701 *type = PSTORE_TYPE_PPC_RTAS;
702 time->tv_sec = last_rtas_event;
705 case PSTORE_TYPE_PPC_OF:
707 part = &of_config_partition;
708 *type = PSTORE_TYPE_PPC_OF;
709 *id = PSTORE_TYPE_PPC_OF;
713 case PSTORE_TYPE_PPC_COMMON:
715 part = &common_partition;
716 *type = PSTORE_TYPE_PPC_COMMON;
717 *id = PSTORE_TYPE_PPC_COMMON;
725 if (!part->os_partition) {
726 p = nvram_find_partition(part->name, sig, &size);
728 pr_err("nvram: Failed to find partition %s, "
729 "err %d\n", part->name, (int)p);
736 buff = kmalloc(part->size, GFP_KERNEL);
741 if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
748 if (part->os_partition)
751 if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
752 oops_hdr = (struct oops_log_info *)buff;
753 *buf = buff + sizeof(*oops_hdr);
755 if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) {
756 big_buff = kmalloc(big_oops_buf_sz, GFP_KERNEL);
760 rc = unzip_oops(buff, big_buff);
768 oops_hdr = (struct oops_log_info *)big_buff;
769 *buf = big_buff + sizeof(*oops_hdr);
773 time->tv_sec = oops_hdr->timestamp;
775 return oops_hdr->report_length;
782 static struct pstore_info nvram_pstore_info = {
783 .owner = THIS_MODULE,
785 .open = nvram_pstore_open,
786 .read = nvram_pstore_read,
787 .write = nvram_pstore_write,
790 static int nvram_pstore_init(void)
795 nvram_pstore_info.buf = big_oops_buf;
796 nvram_pstore_info.bufsize = big_oops_buf_sz;
798 nvram_pstore_info.buf = oops_data;
799 nvram_pstore_info.bufsize = oops_data_sz;
802 rc = pstore_register(&nvram_pstore_info);
804 pr_err("nvram: pstore_register() failed, defaults to "
805 "kmsg_dump; returned %d\n", rc);
810 static int nvram_pstore_init(void)
816 static void __init nvram_init_oops_partition(int rtas_partition_exists)
820 rc = pseries_nvram_init_os_partition(&oops_log_partition);
822 if (!rtas_partition_exists)
824 pr_notice("nvram: Using %s partition to log both"
825 " RTAS errors and oops/panic reports\n",
826 rtas_log_partition.name);
827 memcpy(&oops_log_partition, &rtas_log_partition,
828 sizeof(rtas_log_partition));
830 oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
832 pr_err("nvram: No memory for %s partition\n",
833 oops_log_partition.name);
836 oops_data = oops_buf + sizeof(struct oops_log_info);
837 oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
840 * Figure compression (preceded by elimination of each line's <n>
841 * severity prefix) will reduce the oops/panic report to at most
842 * 45% of its original size.
844 big_oops_buf_sz = (oops_data_sz * 100) / 45;
845 big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
847 stream.workspace = kmalloc(zlib_deflate_workspacesize(
848 WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
849 if (!stream.workspace) {
850 pr_err("nvram: No memory for compression workspace; "
851 "skipping compression of %s partition data\n",
852 oops_log_partition.name);
857 pr_err("No memory for uncompressed %s data; "
858 "skipping compression\n", oops_log_partition.name);
859 stream.workspace = NULL;
862 rc = nvram_pstore_init();
867 rc = kmsg_dump_register(&nvram_kmsg_dumper);
869 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
872 kfree(stream.workspace);
876 static int __init pseries_nvram_init_log_partitions(void)
880 rc = pseries_nvram_init_os_partition(&rtas_log_partition);
881 nvram_init_oops_partition(rc == 0);
884 machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
886 int __init pSeries_nvram_init(void)
888 struct device_node *nvram;
889 const unsigned int *nbytes_p;
890 unsigned int proplen;
892 nvram = of_find_node_by_type(NULL, "nvram");
896 nbytes_p = of_get_property(nvram, "#bytes", &proplen);
897 if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
902 nvram_size = *nbytes_p;
904 nvram_fetch = rtas_token("nvram-fetch");
905 nvram_store = rtas_token("nvram-store");
906 printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
909 ppc_md.nvram_read = pSeries_nvram_read;
910 ppc_md.nvram_write = pSeries_nvram_write;
911 ppc_md.nvram_size = pSeries_nvram_get_size;
918 * This is our kmsg_dump callback, called after an oops or panic report
919 * has been written to the printk buffer. We want to capture as much
920 * of the printk buffer as possible. First, capture as much as we can
921 * that we think will compress sufficiently to fit in the lnx,oops-log
922 * partition. If that's too much, go back and capture uncompressed text.
924 static void oops_to_nvram(struct kmsg_dumper *dumper,
925 enum kmsg_dump_reason reason)
927 struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
928 static unsigned int oops_count = 0;
929 static bool panicking = false;
930 static DEFINE_SPINLOCK(lock);
933 unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
937 case KMSG_DUMP_RESTART:
939 case KMSG_DUMP_POWEROFF:
940 /* These are almost always orderly shutdowns. */
944 case KMSG_DUMP_PANIC:
947 case KMSG_DUMP_EMERG:
949 /* Panic report already captured. */
953 pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
954 __FUNCTION__, (int) reason);
958 if (clobbering_unread_rtas_event())
961 if (!spin_trylock_irqsave(&lock, flags))
965 kmsg_dump_get_buffer(dumper, false,
966 big_oops_buf, big_oops_buf_sz, &text_len);
967 rc = zip_oops(text_len);
970 kmsg_dump_rewind(dumper);
971 kmsg_dump_get_buffer(dumper, false,
972 oops_data, oops_data_sz, &text_len);
973 err_type = ERR_TYPE_KERNEL_PANIC;
974 oops_hdr->version = OOPS_HDR_VERSION;
975 oops_hdr->report_length = (u16) text_len;
976 oops_hdr->timestamp = get_seconds();
979 (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
980 (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
983 spin_unlock_irqrestore(&lock, flags);