2 * PowerNV OPAL high level interfaces
4 * Copyright 2011 IBM Corp.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
14 #include <linux/types.h>
16 #include <linux/of_fdt.h>
17 #include <linux/of_platform.h>
18 #include <linux/interrupt.h>
19 #include <linux/notifier.h>
20 #include <linux/slab.h>
21 #include <linux/sched.h>
22 #include <linux/kobject.h>
23 #include <linux/delay.h>
24 #include <linux/memblock.h>
26 #include <asm/firmware.h>
31 /* /sys/firmware/opal */
32 struct kobject *opal_kobj;
40 struct mcheck_recoverable_range {
46 static struct mcheck_recoverable_range *mc_recoverable_range;
47 static int mc_recoverable_range_len;
49 static struct device_node *opal_node;
50 static DEFINE_SPINLOCK(opal_write_lock);
51 extern u64 opal_mc_secondary_handler[];
52 static unsigned int *opal_irqs;
53 static unsigned int opal_irq_count;
54 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
55 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
56 static DEFINE_SPINLOCK(opal_notifier_lock);
57 static uint64_t last_notified_mask = 0x0ul;
58 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
60 int __init early_init_dt_scan_opal(unsigned long node,
61 const char *uname, int depth, void *data)
63 const void *basep, *entryp, *sizep;
64 unsigned long basesz, entrysz, runtimesz;
66 if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
69 basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
70 entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
71 sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
73 if (!basep || !entryp || !sizep)
76 opal.base = of_read_number(basep, basesz/4);
77 opal.entry = of_read_number(entryp, entrysz/4);
78 opal.size = of_read_number(sizep, runtimesz/4);
80 pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%ld)\n",
81 opal.base, basep, basesz);
82 pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%ld)\n",
83 opal.entry, entryp, entrysz);
84 pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%ld)\n",
85 opal.size, sizep, runtimesz);
87 powerpc_firmware_features |= FW_FEATURE_OPAL;
88 if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
89 powerpc_firmware_features |= FW_FEATURE_OPALv2;
90 powerpc_firmware_features |= FW_FEATURE_OPALv3;
91 printk("OPAL V3 detected !\n");
92 } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
93 powerpc_firmware_features |= FW_FEATURE_OPALv2;
94 printk("OPAL V2 detected !\n");
96 printk("OPAL V1 detected !\n");
102 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
103 const char *uname, int depth, void *data)
105 unsigned long i, size;
108 if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
111 prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &size);
116 pr_debug("Found machine check recoverable ranges.\n");
119 * Allocate a buffer to hold the MC recoverable ranges. We would be
120 * accessing them in real mode, hence it needs to be within
123 mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
125 memset(mc_recoverable_range, 0, size);
128 * Each recoverable address entry is an (start address,len,
129 * recover address) pair, * 2 cells each, totalling 4 cells per entry.
131 for (i = 0; i < size / (sizeof(*prop) * 5); i++) {
132 mc_recoverable_range[i].start_addr =
133 of_read_number(prop + (i * 5) + 0, 2);
134 mc_recoverable_range[i].end_addr =
135 mc_recoverable_range[i].start_addr +
136 of_read_number(prop + (i * 5) + 2, 1);
137 mc_recoverable_range[i].recover_addr =
138 of_read_number(prop + (i * 5) + 3, 2);
140 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
141 mc_recoverable_range[i].start_addr,
142 mc_recoverable_range[i].end_addr,
143 mc_recoverable_range[i].recover_addr);
145 mc_recoverable_range_len = i;
149 static int __init opal_register_exception_handlers(void)
151 #ifdef __BIG_ENDIAN__
154 if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
157 /* Hookup some exception handlers except machine check. We use the
158 * fwnmi area at 0x7000 to provide the glue space to OPAL
161 opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
164 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
170 early_initcall(opal_register_exception_handlers);
172 int opal_notifier_register(struct notifier_block *nb)
175 pr_warning("%s: Invalid argument (%p)\n",
180 atomic_notifier_chain_register(&opal_notifier_head, nb);
184 static void opal_do_notifier(uint64_t events)
187 uint64_t changed_mask;
189 if (atomic_read(&opal_notifier_hold))
192 spin_lock_irqsave(&opal_notifier_lock, flags);
193 changed_mask = last_notified_mask ^ events;
194 last_notified_mask = events;
195 spin_unlock_irqrestore(&opal_notifier_lock, flags);
198 * We feed with the event bits and changed bits for
199 * enough information to the callback.
201 atomic_notifier_call_chain(&opal_notifier_head,
202 events, (void *)changed_mask);
205 void opal_notifier_update_evt(uint64_t evt_mask,
210 spin_lock_irqsave(&opal_notifier_lock, flags);
211 last_notified_mask &= ~evt_mask;
212 last_notified_mask |= evt_val;
213 spin_unlock_irqrestore(&opal_notifier_lock, flags);
216 void opal_notifier_enable(void)
221 atomic_set(&opal_notifier_hold, 0);
223 /* Process pending events */
224 rc = opal_poll_events(&evt);
225 if (rc == OPAL_SUCCESS && evt)
226 opal_do_notifier(evt);
229 void opal_notifier_disable(void)
231 atomic_set(&opal_notifier_hold, 1);
235 * Opal message notifier based on message type. Allow subscribers to get
236 * notified for specific messgae type.
238 int opal_message_notifier_register(enum OpalMessageType msg_type,
239 struct notifier_block *nb)
242 pr_warning("%s: Invalid argument (%p)\n",
246 if (msg_type > OPAL_MSG_TYPE_MAX) {
247 pr_warning("%s: Invalid message type argument (%d)\n",
251 return atomic_notifier_chain_register(
252 &opal_msg_notifier_head[msg_type], nb);
255 static void opal_message_do_notify(uint32_t msg_type, void *msg)
257 /* notify subscribers */
258 atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
262 static void opal_handle_message(void)
266 * TODO: pre-allocate a message buffer depending on opal-msg-size
267 * value in /proc/device-tree.
269 static struct opal_msg msg;
271 ret = opal_get_msg(__pa(&msg), sizeof(msg));
272 /* No opal message pending. */
273 if (ret == OPAL_RESOURCE)
276 /* check for errors. */
278 pr_warning("%s: Failed to retrive opal message, err=%lld\n",
284 if (msg.msg_type > OPAL_MSG_TYPE_MAX) {
285 pr_warning("%s: Unknown message type: %u\n",
286 __func__, msg.msg_type);
289 opal_message_do_notify(msg.msg_type, (void *)&msg);
292 static int opal_message_notify(struct notifier_block *nb,
293 unsigned long events, void *change)
295 if (events & OPAL_EVENT_MSG_PENDING)
296 opal_handle_message();
300 static struct notifier_block opal_message_nb = {
301 .notifier_call = opal_message_notify,
306 static int __init opal_message_init(void)
310 for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
311 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
313 ret = opal_notifier_register(&opal_message_nb);
315 pr_err("%s: Can't register OPAL event notifier (%d)\n",
321 early_initcall(opal_message_init);
323 int opal_get_chars(uint32_t vtermno, char *buf, int count)
330 opal_poll_events(&evt);
331 if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
333 len = cpu_to_be64(count);
334 rc = opal_console_read(vtermno, &len, buf);
335 if (rc == OPAL_SUCCESS)
336 return be64_to_cpu(len);
340 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
351 /* We want put_chars to be atomic to avoid mangling of hvsi
352 * packets. To do that, we first test for room and return
353 * -EAGAIN if there isn't enough.
355 * Unfortunately, opal_console_write_buffer_space() doesn't
356 * appear to work on opal v1, so we just assume there is
357 * enough room and be done with it
359 spin_lock_irqsave(&opal_write_lock, flags);
360 if (firmware_has_feature(FW_FEATURE_OPALv2)) {
361 rc = opal_console_write_buffer_space(vtermno, &olen);
362 len = be64_to_cpu(olen);
363 if (rc || len < total_len) {
364 spin_unlock_irqrestore(&opal_write_lock, flags);
365 /* Closed -> drop characters */
368 opal_poll_events(NULL);
373 /* We still try to handle partial completions, though they
374 * should no longer happen.
377 while(total_len > 0 && (rc == OPAL_BUSY ||
378 rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
379 olen = cpu_to_be64(total_len);
380 rc = opal_console_write(vtermno, &olen, data);
381 len = be64_to_cpu(olen);
383 /* Closed or other error drop */
384 if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
385 rc != OPAL_BUSY_EVENT) {
389 if (rc == OPAL_SUCCESS) {
394 /* This is a bit nasty but we need that for the console to
395 * flush when there aren't any interrupts. We will clean
396 * things a bit later to limit that to synchronous path
397 * such as the kernel console and xmon/udbg
400 opal_poll_events(&evt);
401 while(rc == OPAL_SUCCESS &&
402 (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
404 spin_unlock_irqrestore(&opal_write_lock, flags);
408 static int opal_recover_mce(struct pt_regs *regs,
409 struct machine_check_event *evt)
412 uint64_t ea = get_mce_fault_addr(evt);
414 if (!(regs->msr & MSR_RI)) {
415 /* If MSR_RI isn't set, we cannot recover */
417 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
418 /* Platform corrected itself */
420 } else if (ea && !is_kernel_addr(ea)) {
422 * Faulting address is not in kernel text. We should be fine.
423 * We need to find which process uses this address.
424 * For now, kill the task if we have received exception when
427 * TODO: Queue up this address for hwpoisioning later.
429 if (user_mode(regs) && !is_global_init(current)) {
430 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
434 } else if (user_mode(regs) && !is_global_init(current) &&
435 evt->severity == MCE_SEV_ERROR_SYNC) {
437 * If we have received a synchronous error when in userspace
440 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
446 int opal_machine_check(struct pt_regs *regs)
448 struct machine_check_event evt;
450 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
453 /* Print things out */
454 if (evt.version != MCE_V1) {
455 pr_err("Machine Check Exception, Unknown event version %d !\n",
459 machine_check_print_event_info(&evt);
461 if (opal_recover_mce(regs, &evt))
466 static uint64_t find_recovery_address(uint64_t nip)
470 for (i = 0; i < mc_recoverable_range_len; i++)
471 if ((nip >= mc_recoverable_range[i].start_addr) &&
472 (nip < mc_recoverable_range[i].end_addr))
473 return mc_recoverable_range[i].recover_addr;
477 bool opal_mce_check_early_recovery(struct pt_regs *regs)
479 uint64_t recover_addr = 0;
481 if (!opal.base || !opal.size)
484 if ((regs->nip >= opal.base) &&
485 (regs->nip <= (opal.base + opal.size)))
486 recover_addr = find_recovery_address(regs->nip);
489 * Setup regs->nip to rfi into fixup address.
492 regs->nip = recover_addr;
495 return !!recover_addr;
498 static irqreturn_t opal_interrupt(int irq, void *data)
502 opal_handle_interrupt(virq_to_hw(irq), &events);
504 opal_do_notifier(events);
509 static int opal_sysfs_init(void)
511 opal_kobj = kobject_create_and_add("opal", firmware_kobj);
513 pr_warn("kobject_create_and_add opal failed\n");
520 static int __init opal_init(void)
522 struct device_node *np, *consoles;
526 opal_node = of_find_node_by_path("/ibm,opal");
528 pr_warn("opal: Node not found\n");
532 /* Register OPAL consoles if any ports */
533 if (firmware_has_feature(FW_FEATURE_OPALv2))
534 consoles = of_find_node_by_path("/ibm,opal/consoles");
536 consoles = of_node_get(opal_node);
538 for_each_child_of_node(consoles, np) {
539 if (strcmp(np->name, "serial"))
541 of_platform_device_create(np, NULL, NULL);
543 of_node_put(consoles);
546 /* Find all OPAL interrupts and request them */
547 irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
548 pr_debug("opal: Found %d interrupts reserved for OPAL\n",
549 irqs ? (irqlen / 4) : 0);
550 opal_irq_count = irqlen / 4;
551 opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
552 for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) {
553 unsigned int hwirq = be32_to_cpup(irqs);
554 unsigned int irq = irq_create_mapping(NULL, hwirq);
556 pr_warning("opal: Failed to map irq 0x%x\n", hwirq);
559 rc = request_irq(irq, opal_interrupt, 0, "opal", NULL);
561 pr_warning("opal: Error %d requesting irq %d"
562 " (0x%x)\n", rc, irq, hwirq);
566 /* Create "opal" kobject under /sys/firmware */
567 rc = opal_sysfs_init();
569 /* Setup error log interface */
570 rc = opal_elog_init();
571 /* Setup code update interface */
573 /* Setup platform dump extract interface */
574 opal_platform_dump_init();
575 /* Setup system parameters interface */
576 opal_sys_param_init();
581 subsys_initcall(opal_init);
583 void opal_shutdown(void)
588 /* First free interrupts, which will also mask them */
589 for (i = 0; i < opal_irq_count; i++) {
591 free_irq(opal_irqs[i], NULL);
596 * Then sync with OPAL which ensure anything that can
597 * potentially write to our memory has completed such
598 * as an ongoing dump retrieval
600 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
601 rc = opal_sync_host_reboot();
603 opal_poll_events(NULL);