2 * PowerNV OPAL high level interfaces
4 * Copyright 2011 IBM Corp.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 #define pr_fmt(fmt) "opal: " fmt
14 #include <linux/printk.h>
15 #include <linux/types.h>
17 #include <linux/of_fdt.h>
18 #include <linux/of_platform.h>
19 #include <linux/interrupt.h>
20 #include <linux/notifier.h>
21 #include <linux/slab.h>
22 #include <linux/sched.h>
23 #include <linux/kobject.h>
24 #include <linux/delay.h>
25 #include <linux/memblock.h>
27 #include <asm/machdep.h>
29 #include <asm/firmware.h>
34 /* /sys/firmware/opal */
35 struct kobject *opal_kobj;
43 struct mcheck_recoverable_range {
49 static struct mcheck_recoverable_range *mc_recoverable_range;
50 static int mc_recoverable_range_len;
52 struct device_node *opal_node;
53 static DEFINE_SPINLOCK(opal_write_lock);
54 static unsigned int *opal_irqs;
55 static unsigned int opal_irq_count;
56 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
57 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
58 static DEFINE_SPINLOCK(opal_notifier_lock);
59 static uint64_t last_notified_mask = 0x0ul;
60 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
62 static void opal_reinit_cores(void)
64 /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
66 * It will preserve non volatile GPRs and HSPRG0/1. It will
67 * also restore HIDs and other SPRs to their original value
68 * but it might clobber a bunch.
71 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
73 opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
77 int __init early_init_dt_scan_opal(unsigned long node,
78 const char *uname, int depth, void *data)
80 const void *basep, *entryp, *sizep;
81 int basesz, entrysz, runtimesz;
83 if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
86 basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
87 entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
88 sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
90 if (!basep || !entryp || !sizep)
93 opal.base = of_read_number(basep, basesz/4);
94 opal.entry = of_read_number(entryp, entrysz/4);
95 opal.size = of_read_number(sizep, runtimesz/4);
97 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n",
98 opal.base, basep, basesz);
99 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
100 opal.entry, entryp, entrysz);
101 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
102 opal.size, sizep, runtimesz);
104 powerpc_firmware_features |= FW_FEATURE_OPAL;
105 if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
106 powerpc_firmware_features |= FW_FEATURE_OPALv2;
107 powerpc_firmware_features |= FW_FEATURE_OPALv3;
108 pr_info("OPAL V3 detected !\n");
109 } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
110 powerpc_firmware_features |= FW_FEATURE_OPALv2;
111 pr_info("OPAL V2 detected !\n");
113 pr_info("OPAL V1 detected !\n");
116 /* Reinit all cores with the right endian */
119 /* Restore some bits */
120 if (cur_cpu_spec->cpu_restore)
121 cur_cpu_spec->cpu_restore();
126 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
127 const char *uname, int depth, void *data)
132 if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
135 prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
140 pr_debug("Found machine check recoverable ranges.\n");
143 * Calculate number of available entries.
145 * Each recoverable address range entry is (start address, len,
146 * recovery address), 2 cells each for start and recovery address,
147 * 1 cell for len, totalling 5 cells per entry.
149 mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
152 if (!mc_recoverable_range_len)
155 /* Size required to hold all the entries. */
156 size = mc_recoverable_range_len *
157 sizeof(struct mcheck_recoverable_range);
160 * Allocate a buffer to hold the MC recoverable ranges. We would be
161 * accessing them in real mode, hence it needs to be within
164 mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
166 memset(mc_recoverable_range, 0, size);
168 for (i = 0; i < mc_recoverable_range_len; i++) {
169 mc_recoverable_range[i].start_addr =
170 of_read_number(prop + (i * 5) + 0, 2);
171 mc_recoverable_range[i].end_addr =
172 mc_recoverable_range[i].start_addr +
173 of_read_number(prop + (i * 5) + 2, 1);
174 mc_recoverable_range[i].recover_addr =
175 of_read_number(prop + (i * 5) + 3, 2);
177 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
178 mc_recoverable_range[i].start_addr,
179 mc_recoverable_range[i].end_addr,
180 mc_recoverable_range[i].recover_addr);
185 static int __init opal_register_exception_handlers(void)
187 #ifdef __BIG_ENDIAN__
190 if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
193 /* Hookup some exception handlers except machine check. We use the
194 * fwnmi area at 0x7000 to provide the glue space to OPAL
199 * Check if we are running on newer firmware that exports
200 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
201 * the HMI interrupt and we catch it directly in Linux.
203 * For older firmware (i.e currently released POWER8 System Firmware
204 * as of today <= SV810_087), we fallback to old behavior and let OPAL
205 * patch the HMI vector and handle it inside OPAL firmware.
207 * For newer firmware (in development/yet to be released) we will
208 * start catching/handling HMI directly in Linux.
210 if (!opal_check_token(OPAL_HANDLE_HMI)) {
211 pr_info("Old firmware detected, OPAL handles HMIs.\n");
212 opal_register_exception_handler(
213 OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
218 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
223 machine_early_initcall(powernv, opal_register_exception_handlers);
225 int opal_notifier_register(struct notifier_block *nb)
228 pr_warning("%s: Invalid argument (%p)\n",
233 atomic_notifier_chain_register(&opal_notifier_head, nb);
236 EXPORT_SYMBOL_GPL(opal_notifier_register);
238 int opal_notifier_unregister(struct notifier_block *nb)
241 pr_warning("%s: Invalid argument (%p)\n",
246 atomic_notifier_chain_unregister(&opal_notifier_head, nb);
249 EXPORT_SYMBOL_GPL(opal_notifier_unregister);
251 static void opal_do_notifier(uint64_t events)
254 uint64_t changed_mask;
256 if (atomic_read(&opal_notifier_hold))
259 spin_lock_irqsave(&opal_notifier_lock, flags);
260 changed_mask = last_notified_mask ^ events;
261 last_notified_mask = events;
262 spin_unlock_irqrestore(&opal_notifier_lock, flags);
265 * We feed with the event bits and changed bits for
266 * enough information to the callback.
268 atomic_notifier_call_chain(&opal_notifier_head,
269 events, (void *)changed_mask);
272 void opal_notifier_update_evt(uint64_t evt_mask,
277 spin_lock_irqsave(&opal_notifier_lock, flags);
278 last_notified_mask &= ~evt_mask;
279 last_notified_mask |= evt_val;
280 spin_unlock_irqrestore(&opal_notifier_lock, flags);
283 void opal_notifier_enable(void)
288 atomic_set(&opal_notifier_hold, 0);
290 /* Process pending events */
291 rc = opal_poll_events(&evt);
292 if (rc == OPAL_SUCCESS && evt)
293 opal_do_notifier(be64_to_cpu(evt));
296 void opal_notifier_disable(void)
298 atomic_set(&opal_notifier_hold, 1);
302 * Opal message notifier based on message type. Allow subscribers to get
303 * notified for specific messgae type.
305 int opal_message_notifier_register(enum OpalMessageType msg_type,
306 struct notifier_block *nb)
309 pr_warning("%s: Invalid argument (%p)\n",
313 if (msg_type > OPAL_MSG_TYPE_MAX) {
314 pr_warning("%s: Invalid message type argument (%d)\n",
318 return atomic_notifier_chain_register(
319 &opal_msg_notifier_head[msg_type], nb);
322 static void opal_message_do_notify(uint32_t msg_type, void *msg)
324 /* notify subscribers */
325 atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
329 static void opal_handle_message(void)
333 * TODO: pre-allocate a message buffer depending on opal-msg-size
334 * value in /proc/device-tree.
336 static struct opal_msg msg;
339 ret = opal_get_msg(__pa(&msg), sizeof(msg));
340 /* No opal message pending. */
341 if (ret == OPAL_RESOURCE)
344 /* check for errors. */
346 pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
351 type = be32_to_cpu(msg.msg_type);
354 if (type > OPAL_MSG_TYPE_MAX) {
355 pr_warning("%s: Unknown message type: %u\n", __func__, type);
358 opal_message_do_notify(type, (void *)&msg);
361 static int opal_message_notify(struct notifier_block *nb,
362 unsigned long events, void *change)
364 if (events & OPAL_EVENT_MSG_PENDING)
365 opal_handle_message();
369 static struct notifier_block opal_message_nb = {
370 .notifier_call = opal_message_notify,
375 static int __init opal_message_init(void)
379 for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
380 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
382 ret = opal_notifier_register(&opal_message_nb);
384 pr_err("%s: Can't register OPAL event notifier (%d)\n",
390 machine_early_initcall(powernv, opal_message_init);
392 int opal_get_chars(uint32_t vtermno, char *buf, int count)
399 opal_poll_events(&evt);
400 if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
402 len = cpu_to_be64(count);
403 rc = opal_console_read(vtermno, &len, buf);
404 if (rc == OPAL_SUCCESS)
405 return be64_to_cpu(len);
409 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
420 /* We want put_chars to be atomic to avoid mangling of hvsi
421 * packets. To do that, we first test for room and return
422 * -EAGAIN if there isn't enough.
424 * Unfortunately, opal_console_write_buffer_space() doesn't
425 * appear to work on opal v1, so we just assume there is
426 * enough room and be done with it
428 spin_lock_irqsave(&opal_write_lock, flags);
429 if (firmware_has_feature(FW_FEATURE_OPALv2)) {
430 rc = opal_console_write_buffer_space(vtermno, &olen);
431 len = be64_to_cpu(olen);
432 if (rc || len < total_len) {
433 spin_unlock_irqrestore(&opal_write_lock, flags);
434 /* Closed -> drop characters */
437 opal_poll_events(NULL);
442 /* We still try to handle partial completions, though they
443 * should no longer happen.
446 while(total_len > 0 && (rc == OPAL_BUSY ||
447 rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
448 olen = cpu_to_be64(total_len);
449 rc = opal_console_write(vtermno, &olen, data);
450 len = be64_to_cpu(olen);
452 /* Closed or other error drop */
453 if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
454 rc != OPAL_BUSY_EVENT) {
458 if (rc == OPAL_SUCCESS) {
463 /* This is a bit nasty but we need that for the console to
464 * flush when there aren't any interrupts. We will clean
465 * things a bit later to limit that to synchronous path
466 * such as the kernel console and xmon/udbg
469 opal_poll_events(&evt);
470 while(rc == OPAL_SUCCESS &&
471 (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
473 spin_unlock_irqrestore(&opal_write_lock, flags);
477 static int opal_recover_mce(struct pt_regs *regs,
478 struct machine_check_event *evt)
481 uint64_t ea = get_mce_fault_addr(evt);
483 if (!(regs->msr & MSR_RI)) {
484 /* If MSR_RI isn't set, we cannot recover */
486 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
487 /* Platform corrected itself */
489 } else if (ea && !is_kernel_addr(ea)) {
491 * Faulting address is not in kernel text. We should be fine.
492 * We need to find which process uses this address.
493 * For now, kill the task if we have received exception when
496 * TODO: Queue up this address for hwpoisioning later.
498 if (user_mode(regs) && !is_global_init(current)) {
499 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
503 } else if (user_mode(regs) && !is_global_init(current) &&
504 evt->severity == MCE_SEV_ERROR_SYNC) {
506 * If we have received a synchronous error when in userspace
509 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
515 int opal_machine_check(struct pt_regs *regs)
517 struct machine_check_event evt;
519 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
522 /* Print things out */
523 if (evt.version != MCE_V1) {
524 pr_err("Machine Check Exception, Unknown event version %d !\n",
528 machine_check_print_event_info(&evt);
530 if (opal_recover_mce(regs, &evt))
535 /* Early hmi handler called in real mode. */
536 int opal_hmi_exception_early(struct pt_regs *regs)
541 * call opal hmi handler. Pass paca address as token.
542 * The return value OPAL_SUCCESS is an indication that there is
543 * an HMI event generated waiting to pull by Linux.
545 rc = opal_handle_hmi();
546 if (rc == OPAL_SUCCESS) {
547 local_paca->hmi_event_available = 1;
553 /* HMI exception handler called in virtual mode during check_irq_replay. */
554 int opal_handle_hmi_exception(struct pt_regs *regs)
560 * Check if HMI event is available.
561 * if Yes, then call opal_poll_events to pull opal messages and
564 if (!local_paca->hmi_event_available)
567 local_paca->hmi_event_available = 0;
568 rc = opal_poll_events(&evt);
569 if (rc == OPAL_SUCCESS && evt)
570 opal_do_notifier(be64_to_cpu(evt));
575 static uint64_t find_recovery_address(uint64_t nip)
579 for (i = 0; i < mc_recoverable_range_len; i++)
580 if ((nip >= mc_recoverable_range[i].start_addr) &&
581 (nip < mc_recoverable_range[i].end_addr))
582 return mc_recoverable_range[i].recover_addr;
586 bool opal_mce_check_early_recovery(struct pt_regs *regs)
588 uint64_t recover_addr = 0;
590 if (!opal.base || !opal.size)
593 if ((regs->nip >= opal.base) &&
594 (regs->nip <= (opal.base + opal.size)))
595 recover_addr = find_recovery_address(regs->nip);
598 * Setup regs->nip to rfi into fixup address.
601 regs->nip = recover_addr;
604 return !!recover_addr;
607 static irqreturn_t opal_interrupt(int irq, void *data)
611 opal_handle_interrupt(virq_to_hw(irq), &events);
613 opal_do_notifier(be64_to_cpu(events));
618 static int opal_sysfs_init(void)
620 opal_kobj = kobject_create_and_add("opal", firmware_kobj);
622 pr_warn("kobject_create_and_add opal failed\n");
629 static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
630 struct bin_attribute *bin_attr,
631 char *buf, loff_t off, size_t count)
633 return memory_read_from_buffer(buf, count, &off, bin_attr->private,
637 static BIN_ATTR_RO(symbol_map, 0);
639 static void opal_export_symmap(void)
643 struct device_node *fw;
646 fw = of_find_node_by_path("/ibm,opal/firmware");
649 syms = of_get_property(fw, "symbol-map", &size);
650 if (!syms || size != 2 * sizeof(__be64))
653 /* Setup attributes */
654 bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
655 bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
657 rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
659 pr_warn("Error %d creating OPAL symbols file\n", rc);
662 static void __init opal_dump_region_init(void)
668 /* Register kernel log buffer */
669 addr = log_buf_addr_get();
673 size = log_buf_len_get();
677 rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
679 /* Don't warn if this is just an older OPAL that doesn't
680 * know about that call
682 if (rc && rc != OPAL_UNSUPPORTED)
683 pr_warn("DUMP: Failed to register kernel log buffer. "
687 static void opal_ipmi_init(struct device_node *opal_node)
689 struct device_node *np;
691 for_each_child_of_node(opal_node, np)
692 if (of_device_is_compatible(np, "ibm,opal-ipmi"))
693 of_platform_device_create(np, NULL, NULL);
696 static void opal_i2c_create_devs(void)
698 struct device_node *np;
700 for_each_compatible_node(np, NULL, "ibm,opal-i2c")
701 of_platform_device_create(np, NULL, NULL);
704 static void __init opal_irq_init(struct device_node *dn)
709 /* Get interrupt property */
710 irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
711 opal_irq_count = irqs ? (irqlen / 4) : 0;
712 pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
716 /* Install interrupt handlers */
717 opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
718 for (i = 0; irqs && i < opal_irq_count; i++, irqs++) {
719 unsigned int irq, virq;
722 /* Get hardware and virtual IRQ */
723 irq = be32_to_cpup(irqs);
724 virq = irq_create_mapping(NULL, irq);
725 if (virq == NO_IRQ) {
726 pr_warn("Failed to map irq 0x%x\n", irq);
730 /* Install interrupt handler */
731 rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
733 irq_dispose_mapping(virq);
734 pr_warn("Error %d requesting irq %d (0x%x)\n",
744 static int __init opal_init(void)
746 struct device_node *np, *consoles;
749 opal_node = of_find_node_by_path("/ibm,opal");
751 pr_warn("Device node not found\n");
755 /* Register OPAL consoles if any ports */
756 if (firmware_has_feature(FW_FEATURE_OPALv2))
757 consoles = of_find_node_by_path("/ibm,opal/consoles");
759 consoles = of_node_get(opal_node);
761 for_each_child_of_node(consoles, np) {
762 if (strcmp(np->name, "serial"))
764 of_platform_device_create(np, NULL, NULL);
766 of_node_put(consoles);
769 /* Create i2c platform devices */
770 opal_i2c_create_devs();
772 /* Find all OPAL interrupts and request them */
773 opal_irq_init(opal_node);
775 /* Create "opal" kobject under /sys/firmware */
776 rc = opal_sysfs_init();
778 /* Export symbol map to userspace */
779 opal_export_symmap();
780 /* Setup dump region interface */
781 opal_dump_region_init();
782 /* Setup error log interface */
783 rc = opal_elog_init();
784 /* Setup code update interface */
786 /* Setup platform dump extract interface */
787 opal_platform_dump_init();
788 /* Setup system parameters interface */
789 opal_sys_param_init();
790 /* Setup message log interface. */
794 opal_ipmi_init(opal_node);
798 machine_subsys_initcall(powernv, opal_init);
800 void opal_shutdown(void)
805 /* First free interrupts, which will also mask them */
806 for (i = 0; i < opal_irq_count; i++) {
808 free_irq(opal_irqs[i], NULL);
813 * Then sync with OPAL which ensure anything that can
814 * potentially write to our memory has completed such
815 * as an ongoing dump retrieval
817 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
818 rc = opal_sync_host_reboot();
820 opal_poll_events(NULL);
825 /* Unregister memory dump region */
826 opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
829 /* Export this so that test modules can use it */
830 EXPORT_SYMBOL_GPL(opal_invalid_call);
831 EXPORT_SYMBOL_GPL(opal_ipmi_send);
832 EXPORT_SYMBOL_GPL(opal_ipmi_recv);
834 /* Convert a region of vmalloc memory to an opal sg list */
835 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
836 unsigned long vmalloc_size)
838 struct opal_sg_list *sg, *first = NULL;
841 sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
847 while (vmalloc_size > 0) {
848 uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
849 uint64_t length = min(vmalloc_size, PAGE_SIZE);
851 sg->entry[i].data = cpu_to_be64(data);
852 sg->entry[i].length = cpu_to_be64(length);
855 if (i >= SG_ENTRIES_PER_NODE) {
856 struct opal_sg_list *next;
858 next = kzalloc(PAGE_SIZE, GFP_KERNEL);
862 sg->length = cpu_to_be64(
863 i * sizeof(struct opal_sg_entry) + 16);
865 sg->next = cpu_to_be64(__pa(next));
869 vmalloc_addr += length;
870 vmalloc_size -= length;
873 sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
878 pr_err("%s : Failed to allocate memory\n", __func__);
879 opal_free_sg_list(first);
883 void opal_free_sg_list(struct opal_sg_list *sg)
886 uint64_t next = be64_to_cpu(sg->next);
897 EXPORT_SYMBOL_GPL(opal_poll_events);
898 EXPORT_SYMBOL_GPL(opal_rtc_read);
899 EXPORT_SYMBOL_GPL(opal_rtc_write);
900 EXPORT_SYMBOL_GPL(opal_tpo_read);
901 EXPORT_SYMBOL_GPL(opal_tpo_write);
902 EXPORT_SYMBOL_GPL(opal_i2c_request);