2 * Intel 7300 class Memory Controllers kernel module (Clarksboro)
4 * This file may be distributed under the terms of the
5 * GNU General Public License version 2 only.
7 * Copyright (c) 2010 by:
8 * Mauro Carvalho Chehab <mchehab@redhat.com>
10 * Red Hat Inc. http://www.redhat.com
12 * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet
13 * http://www.intel.com/Assets/PDF/datasheet/318082.pdf
15 * TODO: The chipset allow checking for PCI Express errors also. Currently,
16 * the driver covers only memory error errors
18 * This driver uses "csrows" EDAC attribute to represent DIMM slot#
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/pci.h>
24 #include <linux/pci_ids.h>
25 #include <linux/slab.h>
26 #include <linux/edac.h>
27 #include <linux/mmzone.h>
29 #include "edac_core.h"
32 * Alter this version for the I7300 module when modifications are made
34 #define I7300_REVISION " Ver: 1.0.0 " __DATE__
36 #define EDAC_MOD_STR "i7300_edac"
38 #define i7300_printk(level, fmt, arg...) \
39 edac_printk(level, "i7300", fmt, ##arg)
41 #define i7300_mc_printk(mci, level, fmt, arg...) \
42 edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg)
45 * Memory topology is organized as:
46 * Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0)
47 * Branch 1 - 2 channels: channels 2 and 3 (FDB1 PCI dev 22.0)
48 * Each channel can have to 8 DIMM sets (called as SLOTS)
49 * Slots should generally be filled in pairs
50 * Except on Single Channel mode of operation
51 * just slot 0/channel0 filled on this mode
52 * On normal operation mode, the two channels on a branch should be
53 filled together for the same SLOT#
54 * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four
55 * channels on both branches should be filled
58 /* Limits for i7300 */
60 #define MAX_BRANCHES 2
61 #define MAX_CH_PER_BRANCH 2
62 #define MAX_CHANNELS (MAX_CH_PER_BRANCH * MAX_BRANCHES)
65 #define to_channel(ch, branch) ((((branch)) << 1) | (ch))
67 #define to_csrow(slot, ch, branch) \
68 (to_channel(ch, branch) | ((slot) << 2))
72 * Function 0: System Address (not documented)
73 * Function 1: Memory Branch Map, Control, Errors Register
74 * Function 2: FSB Error Registers
76 * All 3 functions of Device 16 (0,1,2) share the SAME DID and
77 * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2),
78 * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1
79 * for device 21 (0,1).
82 /* OFFSETS for Function 0 */
83 #define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */
84 #define MAXCH 0x56 /* Max Channel Number */
85 #define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */
87 /* OFFSETS for Function 1 */
88 #define MC_SETTINGS 0x40
102 /*TODO: double check it */
103 #define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0] indicate EVEN */
105 /* Fatal error registers */
106 #define FERR_FAT_FBD 0x98
108 /*TODO: double check it */
109 #define FERR_FAT_FBDCHAN (3<<28) /* channel index where the highest-order error occurred */
111 #define NERR_FAT_FBD 0x9c
112 #define FERR_NF_FBD 0xa0
114 /* Non-fatal error register */
115 #define NERR_NF_FBD 0xa4
117 /* Enable error mask */
118 #define EMASK_FBD 0xa8
120 #define ERR0_FBD 0xac
121 #define ERR1_FBD 0xb0
122 #define ERR2_FBD 0xb4
123 #define MCERR_FBD 0xb8
127 /* TODO: Dev 16 fn1 allows memory error injection - offsets 0x100-0x10b */
129 /* TODO: OFFSETS for Device 16 Function 2 */
133 * Function 0: Memory Map Branch 0
136 * Function 0: Memory Map Branch 1
139 /* OFFSETS for Function 0 */
142 * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available
143 * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it
144 * seems that we cannot use this information directly for the same usage.
145 * Each memory slot may have up to 2 AMB interfaces, one for income and another
146 * for outcome interface to the next slot.
147 * For now, the driver just stores the AMB present registers, but rely only at
148 * the MTR info to detect memory.
149 * Datasheet is also not clear about how to map each AMBPRESENT registers to
150 * one of the 4 available channels.
152 #define AMBPRESENT_0 0x64
153 #define AMBPRESENT_1 0x66
155 const static u16 mtr_regs [MAX_SLOTS] = {
156 0x80, 0x84, 0x88, 0x8c,
157 0x82, 0x86, 0x8a, 0x8e
160 /* Defines to extract the vaious fields from the
161 * MTRx - Memory Technology Registers
163 #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 8))
164 #define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 7))
165 #define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 6)) ? 8 : 4)
166 #define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 5)) ? 8 : 4)
167 #define MTR_DIMM_RANKS(mtr) (((mtr) & (1 << 4)) ? 1 : 0)
168 #define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3)
169 #define MTR_DRAM_BANKS_ADDR_BITS 2
170 #define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13)
171 #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
172 #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
175 /* OFFSETS for Function 1 */
178 #define NRECFGLOG 0x74
179 #define RECFGLOG 0x78
180 #define NRECMEMA 0xbe
181 #define NRECMEMB 0xc0
182 #define NRECFB_DIMMA 0xc4
183 #define NRECFB_DIMMB 0xc8
184 #define NRECFB_DIMMC 0xcc
185 #define NRECFB_DIMMD 0xd0
186 #define NRECFB_DIMME 0xd4
187 #define NRECFB_DIMMF 0xd8
191 #define RECFB_DIMMA 0xf8
192 #define RECFB_DIMMB 0xec
193 #define RECFB_DIMMC 0xf0
194 #define RECFB_DIMMD 0xf4
195 #define RECFB_DIMME 0xf8
196 #define RECFB_DIMMF 0xfC
198 /* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */
199 static inline int extract_fbdchan_indx(u32 x)
201 return (x>>28) & 0x3;
205 #ifdef CONFIG_EDAC_DEBUG
207 static const char *numrow_toString[] = {
215 static const char *numcol_toString[] = {
216 "1,024 - 10 columns",
217 "2,048 - 11 columns",
218 "4,096 - 12 columns",
226 * Error indicator bits and masks
227 * Error masks are according with Table 5-17 of i7300 datasheet
231 EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */
232 EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */
233 EMASK_M3 = 1<<2, /* Reserved */
234 EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */
235 EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */
236 EMASK_M6 = 1<<5, /* Unsupported on i7300 */
237 EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
238 EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */
239 EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */
240 EMASK_M10 = 1<<9, /* Unsupported on i7300 */
241 EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
242 EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */
243 EMASK_M13 = 1<<12, /* Memory Write error on first attempt */
244 EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */
245 EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */
246 EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */
247 EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */
248 EMASK_M18 = 1<<17, /* Unsupported on i7300 */
249 EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */
250 EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */
251 EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */
252 EMASK_M22 = 1<<21, /* SPD protocol Error */
253 EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */
254 EMASK_M24 = 1<<23, /* Refresh error */
255 EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */
256 EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */
257 EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */
258 EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */
259 EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */
263 * Names to translate bit error into something useful
265 static const char *error_name[] = {
266 [0] = "Memory Write error on non-redundant retry",
267 [1] = "Memory or FB-DIMM configuration CRC read error",
269 [3] = "Uncorrectable Data ECC on Replay",
270 [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
271 /* M6 Unsupported on i7300 */
272 [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
273 [7] = "Aliased Uncorrectable Patrol Data ECC",
274 [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
275 /* M10 Unsupported on i7300 */
276 [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
277 [11] = "Non-Aliased Uncorrectable Patrol Data ECC",
278 [12] = "Memory Write error on first attempt",
279 [13] = "FB-DIMM Configuration Write error on first attempt",
280 [14] = "Memory or FB-DIMM configuration CRC read error",
281 [15] = "Channel Failed-Over Occurred",
282 [16] = "Correctable Non-Mirrored Demand Data ECC",
283 /* M18 Unsupported on i7300 */
284 [18] = "Correctable Resilver- or Spare-Copy Data ECC",
285 [19] = "Correctable Patrol Data ECC",
286 [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status",
287 [21] = "SPD protocol Error",
288 [22] = "Non-Redundant Fast Reset Timeout",
289 [23] = "Refresh error",
290 [24] = "Memory Write error on redundant retry",
291 [25] = "Redundant Fast Reset Timeout",
292 [26] = "Correctable Counter Threshold Exceeded",
293 [27] = "DIMM-Spare Copy Completed",
294 [28] = "DIMM-Isolation Completed",
298 #define ERROR_FAT_MASK (EMASK_M1 | \
302 /* Correctable errors */
303 #define ERROR_NF_CORRECTABLE (EMASK_M27 | \
309 #define ERROR_NF_DIMM_SPARE (EMASK_M29 | \
311 #define ERROR_NF_SPD_PROTOCOL (EMASK_M22)
312 #define ERROR_NF_NORTH_CRC (EMASK_M21)
314 /* Recoverable errors */
315 #define ERROR_NF_RECOVERABLE (EMASK_M26 | \
328 /* uncorrectable errors */
329 #define ERROR_NF_UNCORRECTABLE (EMASK_M4)
331 /* mask to all non-fatal errors */
332 #define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \
333 ERROR_NF_UNCORRECTABLE | \
334 ERROR_NF_RECOVERABLE | \
335 ERROR_NF_DIMM_SPARE | \
336 ERROR_NF_SPD_PROTOCOL | \
340 * Define error masks for the several registers
343 /* Enable all fatal and non fatal errors */
344 #define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK)
346 /* mask for fatal error registers */
347 #define FERR_FAT_MASK ERROR_FAT_MASK
349 /* masks for non-fatal error register */
350 static inline int to_nf_mask(unsigned int mask)
352 return (mask & EMASK_M29) | (mask >> 3);
355 static inline int from_nf_ferr(unsigned int mask)
357 return (mask & EMASK_M29) | /* Bit 28 */
358 (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */
361 #define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK)
362 #define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE)
363 #define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE)
364 #define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL)
365 #define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC)
366 #define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE)
367 #define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE)
371 /* Device name and register DID (Device ID) */
372 struct i7300_dev_info {
373 const char *ctl_name; /* name for this device */
374 u16 fsb_mapping_errors; /* DID for the branchmap,control */
377 /* Table of devices attributes supported by this driver */
378 static const struct i7300_dev_info i7300_devs[] = {
381 .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR,
385 struct i7300_dimm_info {
386 int megabytes; /* size, 0 means not present */
389 /* driver private data structure */
391 struct pci_dev *system_address; /* 16.0 */
392 struct pci_dev *branchmap_werrors; /* 16.1 */
393 struct pci_dev *fsb_error_regs; /* 16.2 */
394 struct pci_dev *branch_pci[MAX_BRANCHES]; /* 21.0 and 22.0 */
396 u16 tolm; /* top of low memory */
397 u64 ambase; /* AMB BAR */
402 u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */
403 u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */
405 /* DIMM information matrix, allocating architecture maximums */
406 struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS];
410 /* I7300 MCH error information retrieved from Hardware */
411 struct i7300_error_info {
412 /* These registers are always read from the MC */
413 u32 ferr_fat_fbd; /* First Errors Fatal */
414 u32 nerr_fat_fbd; /* Next Errors Fatal */
415 u32 ferr_nf_fbd; /* First Errors Non-Fatal */
416 u32 nerr_nf_fbd; /* Next Errors Non-Fatal */
418 /* These registers are input ONLY if there was a Recoverable Error */
419 u32 redmemb; /* Recoverable Mem Data Error log B */
420 u16 recmema; /* Recoverable Mem Error log A */
421 u32 recmemb; /* Recoverable Mem Error log B */
423 /* These registers are input ONLY if there was a Non-Rec Error */
424 u16 nrecmema; /* Non-Recoverable Mem log A */
425 u16 nrecmemb; /* Non-Recoverable Mem log B */
430 /* FIXME: Why do we need to have this static? */
431 static struct edac_pci_ctl_info *i7300_pci;
435 /* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and
436 5400 better to use an inline function than a macro in this case */
437 static inline int nrec_bank(struct i7300_error_info *info)
439 return ((info->nrecmema) >> 12) & 0x7;
441 static inline int nrec_rank(struct i7300_error_info *info)
443 return ((info->nrecmema) >> 8) & 0xf;
445 static inline int nrec_buf_id(struct i7300_error_info *info)
447 return ((info->nrecmema)) & 0xff;
449 static inline int nrec_rdwr(struct i7300_error_info *info)
451 return (info->nrecmemb) >> 31;
453 /* This applies to both NREC and REC string so it can be used with nrec_rdwr
455 static inline const char *rdwr_str(int rdwr)
457 return rdwr ? "Write" : "Read";
459 static inline int nrec_cas(struct i7300_error_info *info)
461 return ((info->nrecmemb) >> 16) & 0x1fff;
463 static inline int nrec_ras(struct i7300_error_info *info)
465 return (info->nrecmemb) & 0xffff;
467 static inline int rec_bank(struct i7300_error_info *info)
469 return ((info->recmema) >> 12) & 0x7;
471 static inline int rec_rank(struct i7300_error_info *info)
473 return ((info->recmema) >> 8) & 0xf;
475 static inline int rec_rdwr(struct i7300_error_info *info)
477 return (info->recmemb) >> 31;
479 static inline int rec_cas(struct i7300_error_info *info)
481 return ((info->recmemb) >> 16) & 0x1fff;
483 static inline int rec_ras(struct i7300_error_info *info)
485 return (info->recmemb) & 0xffff;
489 * i7300_get_error_info Retrieve the hardware error information from
490 * the hardware and cache it in the 'info'
493 static void i7300_get_error_info(struct mem_ctl_info *mci,
494 struct i7300_error_info *info)
496 struct i7300_pvt *pvt;
501 /* read in the 1st FATAL error register */
502 pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value);
504 /* Mask only the bits that the doc says are valid
506 value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK);
508 /* If there is an error, then read in the
509 NEXT FATAL error register and the Memory Error Log Register A
511 if (value & FERR_FAT_MASK) {
512 info->ferr_fat_fbd = value;
514 /* harvest the various error data we need */
515 pci_read_config_dword(pvt->branchmap_werrors,
516 NERR_FAT_FBD, &info->nerr_fat_fbd);
517 pci_read_config_word(pvt->branchmap_werrors,
518 NRECMEMA, &info->nrecmema);
519 pci_read_config_word(pvt->branchmap_werrors,
520 NRECMEMB, &info->nrecmemb);
522 /* Clear the error bits, by writing them back */
523 pci_write_config_dword(pvt->branchmap_werrors,
524 FERR_FAT_FBD, value);
526 info->ferr_fat_fbd = 0;
527 info->nerr_fat_fbd = 0;
532 /* read in the 1st NON-FATAL error register */
533 pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value);
535 /* If there is an error, then read in the 1st NON-FATAL error
536 * register as well */
537 if (value & FERR_NF_MASK) {
538 info->ferr_nf_fbd = value;
540 /* harvest the various error data we need */
541 pci_read_config_dword(pvt->branchmap_werrors,
542 NERR_NF_FBD, &info->nerr_nf_fbd);
543 pci_read_config_word(pvt->branchmap_werrors,
544 RECMEMA, &info->recmema);
545 pci_read_config_dword(pvt->branchmap_werrors,
546 RECMEMB, &info->recmemb);
547 pci_read_config_dword(pvt->branchmap_werrors,
548 REDMEMB, &info->redmemb);
550 /* Clear the error bits, by writing them back */
551 pci_write_config_dword(pvt->branchmap_werrors,
554 info->ferr_nf_fbd = 0;
555 info->nerr_nf_fbd = 0;
563 * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci,
564 * struct i7300_error_info *info,
565 * int handle_errors);
567 * handle the Intel FATAL and unrecoverable errors, if any
569 static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci,
570 struct i7300_error_info *info,
571 unsigned long allErrors)
573 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
585 return; /* if no error, return now */
587 if (allErrors & ERROR_FAT_MASK)
589 else if (allErrors & FERR_NF_UNCORRECTABLE)
590 type = "NON-FATAL uncorrected";
592 type = "NON-FATAL recoverable";
594 /* ONLY ONE of the possible error bits will be set, as per the docs */
596 branch = extract_fbdchan_indx(info->ferr_fat_fbd);
599 /* Use the NON-Recoverable macros to extract data */
600 bank = nrec_bank(info);
601 rank = nrec_rank(info);
602 buf_id = nrec_buf_id(info);
603 rdwr = nrec_rdwr(info);
604 ras = nrec_ras(info);
605 cas = nrec_cas(info);
607 debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d "
608 "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
609 rank, channel, channel + 1, branch >> 1, bank,
610 buf_id, rdwr_str(rdwr), ras, cas);
612 /* Only 1 bit will be on */
613 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
615 /* Form out message */
616 snprintf(msg, sizeof(msg),
617 "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
618 "RAS=%d CAS=%d %s Err=0x%lx (%s))",
619 type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
620 type, allErrors, error_name[errnum]);
622 /* Call the helper to output message */
623 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
627 * i7300_process_fatal_error_info(struct mem_ctl_info *mci,
628 * struct i7300_error_info *info,
629 * int handle_errors);
631 * handle the Intel NON-FATAL errors, if any
633 static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci,
634 struct i7300_error_info *info)
636 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
637 unsigned long allErrors;
646 /* mask off the Error bits that are possible */
647 allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK);
649 return; /* if no error, return now */
651 /* ONLY ONE of the possible error bits will be set, as per the docs */
653 if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) {
654 i7300_proccess_non_recoverable_info(mci, info, allErrors);
658 /* Correctable errors */
659 if (allErrors & ERROR_NF_CORRECTABLE) {
660 debugf0("\tCorrected bits= 0x%lx\n", allErrors);
662 branch = extract_fbdchan_indx(info->ferr_nf_fbd);
665 if (REC_ECC_LOCATOR_ODD(info->redmemb))
668 /* Convert channel to be based from zero, instead of
669 * from branch base of 0 */
672 bank = rec_bank(info);
673 rank = rec_rank(info);
674 rdwr = rec_rdwr(info);
678 /* Only 1 bit will be on */
679 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
681 debugf0("\t\tCSROW= %d Channel= %d (Branch %d "
682 "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
683 rank, channel, branch >> 1, bank,
684 rdwr_str(rdwr), ras, cas);
686 /* Form out message */
687 snprintf(msg, sizeof(msg),
688 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s "
689 "RAS=%d CAS=%d, CE Err=0x%lx (%s))",
690 branch >> 1, bank, rdwr_str(rdwr), ras, cas,
691 allErrors, error_name[errnum]);
693 /* Call the helper to output message */
694 edac_mc_handle_fbd_ce(mci, rank, channel, msg);
699 /* Miscelaneous errors */
700 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
702 branch = extract_fbdchan_indx(info->ferr_nf_fbd);
704 i7300_mc_printk(mci, KERN_EMERG,
705 "Non-Fatal misc error (Branch=%d Err=%#lx (%s))",
706 branch >> 1, allErrors, error_name[errnum]);
710 * i7300_process_error_info Process the error info that is
711 * in the 'info' structure, previously retrieved from hardware
713 static void i7300_process_error_info(struct mem_ctl_info *mci,
714 struct i7300_error_info *info)
717 /* First handle any fatal errors that occurred */
718 allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK);
719 i7300_proccess_non_recoverable_info(mci, info, allErrors);
721 /* now handle any non-fatal errors that occurred */
722 i7300_process_nonfatal_error_info(mci, info);
726 * i7300_clear_error Retrieve any error from the hardware
727 * but do NOT process that error.
728 * Used for 'clearing' out of previous errors
729 * Called by the Core module.
731 static void i7300_clear_error(struct mem_ctl_info *mci)
733 struct i7300_error_info info;
735 i7300_get_error_info(mci, &info);
739 * i7300_check_error Retrieve and process errors reported by the
740 * hardware. Called by the Core module.
742 static void i7300_check_error(struct mem_ctl_info *mci)
744 struct i7300_error_info info;
745 debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
746 i7300_get_error_info(mci, &info);
747 i7300_process_error_info(mci, &info);
751 * i7300_enable_error_reporting
752 * Turn on the memory reporting features of the hardware
754 static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
756 struct i7300_pvt *pvt;
761 /* Read the FBD Error Mask Register */
762 pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD,
765 /* Enable with a '0' */
766 fbd_error_mask &= ~(ENABLE_EMASK_ALL);
768 pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD,
774 * determine_mtr(pvt, csrow, channel)
776 * return the proper MTR register as determine by the csrow and desired channel
778 static int decode_mtr(struct i7300_pvt *pvt,
779 int slot, int ch, int branch,
780 struct i7300_dimm_info *dinfo,
781 struct csrow_info *p_csrow)
783 int mtr, ans, addrBits, channel;
785 channel = to_channel(ch, branch);
787 mtr = pvt->mtr[slot][branch];
788 ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0;
790 debugf2("\tMTR%d CH%d: DIMMs are %s (mtr)\n",
792 ans ? "Present" : "NOT Present");
794 /* Determine if there is a DIMM present in this DIMM slot */
797 if (!amb_present || !ans)
804 /* Start with the number of bits for a Bank
806 addrBits = MTR_DRAM_BANKS_ADDR_BITS;
807 /* Add thenumber of ROW bits */
808 addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
809 /* add the number of COLUMN bits */
810 addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
811 /* add the number of RANK bits */
812 addrBits += MTR_DIMM_RANKS(mtr);
814 addrBits += 6; /* add 64 bits per DIMM */
815 addrBits -= 20; /* divide by 2^^20 */
816 addrBits -= 3; /* 8 bits per bytes */
818 dinfo->megabytes = 1 << addrBits;
820 debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
822 debugf2("\t\tELECTRICAL THROTTLING is %s\n",
823 MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
825 debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
826 debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANKS(mtr) ? "double" : "single");
827 debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]);
828 debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
829 debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes);
832 p_csrow->nr_pages = dinfo->megabytes << 8;
833 p_csrow->mtype = MEM_FB_DDR2;
834 p_csrow->edac_mode = EDAC_S8ECD8ED;
836 /* ask what device type on this row */
837 if (MTR_DRAM_WIDTH(mtr))
838 p_csrow->dtype = DEV_X8;
840 p_csrow->dtype = DEV_X4;
848 * also will output a DIMM matrix map, if debug is enabled, for viewing
849 * how the DIMMs are populated
851 static void print_dimm_size(struct i7300_pvt *pvt)
853 struct i7300_dimm_info *dinfo;
854 char *p, *mem_buffer;
859 mem_buffer = p = kmalloc(space, GFP_KERNEL);
861 i7300_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n",
866 n = snprintf(p, space, " ");
869 for (channel = 0; channel < MAX_CHANNELS; channel++) {
870 n = snprintf(p, space, "channel %d | ", channel);
874 debugf2("%s\n", mem_buffer);
877 n = snprintf(p, space, "-------------------------------"
878 "------------------------------");
881 debugf2("%s\n", mem_buffer);
885 for (slot = 0; slot < MAX_SLOTS; slot++) {
886 n = snprintf(p, space, "csrow/SLOT %d ", slot);
890 for (channel = 0; channel < MAX_CHANNELS; channel++) {
891 dinfo = &pvt->dimm_info[slot][channel];
892 n = snprintf(p, space, "%4d MB | ", dinfo->megabytes);
897 debugf2("%s\n", mem_buffer);
902 n = snprintf(p, space, "-------------------------------"
903 "------------------------------");
906 debugf2("%s\n", mem_buffer);
914 * i7300_init_csrows Initialize the 'csrows' table within
915 * the mci control structure with the
916 * addressing of memory.
920 * 1 no actual memory found on this MC
922 static int i7300_init_csrows(struct mem_ctl_info *mci)
924 struct i7300_pvt *pvt;
925 struct i7300_dimm_info *dinfo;
926 struct csrow_info *p_csrow;
929 int ch, branch, slot, channel;
933 empty = 1; /* Assume NO memory */
935 debugf2("Memory Technology Registers:\n");
937 /* Get the AMB present registers for the four channels */
938 for (branch = 0; branch < MAX_BRANCHES; branch++) {
939 /* Read and dump branch 0's MTRs */
940 channel = to_channel(0, branch);
941 pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_0,
942 &pvt->ambpresent[channel]);
943 debugf2("\t\tAMB-present CH%d = 0x%x:\n",
944 channel, pvt->ambpresent[channel]);
946 channel = to_channel(1, branch);
947 pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_1,
948 &pvt->ambpresent[channel]);
949 debugf2("\t\tAMB-present CH%d = 0x%x:\n",
950 channel, pvt->ambpresent[channel]);
953 /* Get the set of MTR[0-7] regs by each branch */
954 for (slot = 0; slot < MAX_SLOTS; slot++) {
955 int where = mtr_regs[slot];
956 for (branch = 0; branch < MAX_BRANCHES; branch++) {
957 pci_read_config_word(pvt->branch_pci[branch],
959 &pvt->mtr[slot][branch]);
960 for (ch = 0; ch < MAX_BRANCHES; ch++) {
961 int channel = to_channel(ch, branch);
963 dinfo = &pvt->dimm_info[slot][channel];
964 p_csrow = &mci->csrows[slot];
966 mtr = decode_mtr(pvt, slot, ch, branch,
968 /* if no DIMMS on this row, continue */
969 if (!MTR_DIMMS_PRESENT(mtr))
972 p_csrow->csrow_idx = slot;
974 /* FAKE OUT VALUES, FIXME */
975 p_csrow->first_page = 0 + slot * 20;
976 p_csrow->last_page = 9 + slot * 20;
977 p_csrow->page_mask = 0xfff;
987 static void decode_mir(int mir_no, u16 mir[MAX_MIR])
990 debugf2("MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n",
992 (mir[mir_no] >> 4) & 0xfff,
993 (mir[mir_no] & 1) ? "B0" : "",
994 (mir[mir_no] & 2) ? "B1": "");
998 * i7300_get_mc_regs read in the necessary registers and
1001 * Fills in the private data members
1003 static int i7300_get_mc_regs(struct mem_ctl_info *mci)
1005 struct i7300_pvt *pvt;
1009 pvt = mci->pvt_info;
1011 pci_read_config_dword(pvt->system_address, AMBASE,
1012 (u32 *) &pvt->ambase);
1014 debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase);
1016 /* Get the Branch Map regs */
1017 pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm);
1019 debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm,
1022 actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
1023 debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
1024 actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
1026 /* Get memory controller settings */
1027 pci_read_config_dword(pvt->branchmap_werrors, MC_SETTINGS,
1029 debugf0("Memory controller operating on %s mode\n",
1030 pvt->mc_settings & (1 << 16)? "mirrored" : "non-mirrored");
1031 debugf0("Error detection is %s\n",
1032 pvt->mc_settings & (1 << 5)? "enabled" : "disabled");
1034 /* Get Memory Interleave Range registers */
1035 pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir[0]);
1036 pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir[1]);
1037 pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir[2]);
1039 /* Decode the MIR regs */
1040 for (i = 0; i < MAX_MIR; i++)
1041 decode_mir(i, pvt->mir);
1043 rc = i7300_init_csrows(mci);
1047 /* Go and determine the size of each DIMM and place in an
1049 print_dimm_size(pvt);
1055 * i7300_put_devices 'put' all the devices that we have
1056 * reserved via 'get'
1058 static void i7300_put_devices(struct mem_ctl_info *mci)
1060 struct i7300_pvt *pvt;
1063 pvt = mci->pvt_info;
1065 /* Decrement usage count for devices */
1066 for (branch = 0; branch < MAX_CH_PER_BRANCH; branch++)
1067 pci_dev_put(pvt->branch_pci[branch]);
1068 pci_dev_put(pvt->fsb_error_regs);
1069 pci_dev_put(pvt->branchmap_werrors);
1073 * i7300_get_devices Find and perform 'get' operation on the MCH's
1074 * device/functions we want to reference for this driver
1076 * Need to 'get' device 16 func 1 and func 2
1078 static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx)
1080 struct i7300_pvt *pvt;
1081 struct pci_dev *pdev;
1083 pvt = mci->pvt_info;
1085 /* Attempt to 'get' the MCH register we want */
1087 while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) {
1088 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1089 PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev);
1091 /* End of list, leave */
1092 i7300_printk(KERN_ERR,
1093 "'system address,Process Bus' "
1095 "vendor 0x%x device 0x%x ERR funcs "
1097 PCI_VENDOR_ID_INTEL,
1098 PCI_DEVICE_ID_INTEL_I7300_MCH_ERR);
1102 /* Store device 16 funcs 1 and 2 */
1103 switch (PCI_FUNC(pdev->devfn)) {
1105 pvt->branchmap_werrors = pdev;
1108 pvt->fsb_error_regs = pdev;
1113 debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n",
1114 pci_name(pvt->system_address),
1115 pvt->system_address->vendor, pvt->system_address->device);
1116 debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
1117 pci_name(pvt->branchmap_werrors),
1118 pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device);
1119 debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n",
1120 pci_name(pvt->fsb_error_regs),
1121 pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
1123 pvt->branch_pci[0] = pci_get_device(PCI_VENDOR_ID_INTEL,
1124 PCI_DEVICE_ID_INTEL_I7300_MCH_FB0,
1126 if (!pvt->branch_pci[0]) {
1127 i7300_printk(KERN_ERR,
1128 "MC: 'BRANCH 0' device not found:"
1129 "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
1130 PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0);
1134 pvt->branch_pci[1] = pci_get_device(PCI_VENDOR_ID_INTEL,
1135 PCI_DEVICE_ID_INTEL_I7300_MCH_FB1,
1137 if (!pvt->branch_pci[1]) {
1138 i7300_printk(KERN_ERR,
1139 "MC: 'BRANCH 1' device not found:"
1140 "vendor 0x%x device 0x%x Func 0 "
1142 PCI_VENDOR_ID_INTEL,
1143 PCI_DEVICE_ID_INTEL_I7300_MCH_FB1);
1150 i7300_put_devices(mci);
1155 * i7300_probe1 Probe for ONE instance of device to see if it is
1158 * 0 for FOUND a device
1159 * < 0 for error code
1161 static int i7300_probe1(struct pci_dev *pdev, int dev_idx)
1163 struct mem_ctl_info *mci;
1164 struct i7300_pvt *pvt;
1166 int num_dimms_per_channel;
1169 if (dev_idx >= ARRAY_SIZE(i7300_devs))
1172 debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
1175 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1177 /* We only are looking for func 0 of the set */
1178 if (PCI_FUNC(pdev->devfn) != 0)
1181 /* As we don't have a motherboard identification routine to determine
1182 * actual number of slots/dimms per channel, we thus utilize the
1183 * resource as specified by the chipset. Thus, we might have
1184 * have more DIMMs per channel than actually on the mobo, but this
1185 * allows the driver to support upto the chipset max, without
1186 * some fancy mobo determination.
1188 num_dimms_per_channel = MAX_SLOTS;
1189 num_channels = MAX_CHANNELS;
1190 num_csrows = MAX_SLOTS * MAX_CHANNELS;
1192 debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n",
1193 __func__, num_channels, num_dimms_per_channel, num_csrows);
1195 /* allocate a new MC control structure */
1196 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
1201 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1203 mci->dev = &pdev->dev; /* record ptr to the generic device */
1205 pvt = mci->pvt_info;
1206 pvt->system_address = pdev; /* Record this device in our private */
1208 /* 'get' the pci devices we want to reserve for our use */
1209 if (i7300_get_devices(mci, dev_idx))
1213 mci->mtype_cap = MEM_FLAG_FB_DDR2;
1214 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1215 mci->edac_cap = EDAC_FLAG_NONE;
1216 mci->mod_name = "i7300_edac.c";
1217 mci->mod_ver = I7300_REVISION;
1218 mci->ctl_name = i7300_devs[dev_idx].ctl_name;
1219 mci->dev_name = pci_name(pdev);
1220 mci->ctl_page_to_phys = NULL;
1223 /* Set the function pointer to an actual operation function */
1224 mci->edac_check = i7300_check_error;
1227 /* initialize the MC control structure 'csrows' table
1228 * with the mapping and control information */
1229 if (i7300_get_mc_regs(mci)) {
1230 debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n"
1231 " because i7300_init_csrows() returned nonzero "
1233 mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
1236 debugf1("MC: Enable error reporting now\n");
1237 i7300_enable_error_reporting(mci);
1241 /* add this new MC control structure to EDAC's list of MCs */
1242 if (edac_mc_add_mc(mci)) {
1243 debugf0("MC: " __FILE__
1244 ": %s(): failed edac_mc_add_mc()\n", __func__);
1245 /* FIXME: perhaps some code should go here that disables error
1246 * reporting if we just enabled it
1252 i7300_clear_error(mci);
1255 /* allocating generic PCI control info */
1256 i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
1259 "%s(): Unable to create PCI control\n",
1262 "%s(): PCI error report via EDAC not setup\n",
1268 /* Error exit unwinding stack */
1271 i7300_put_devices(mci);
1279 * i7300_init_one constructor for one instance of device
1285 static int __devinit i7300_init_one(struct pci_dev *pdev,
1286 const struct pci_device_id *id)
1290 debugf0("MC: " __FILE__ ": %s()\n", __func__);
1292 /* wake up device */
1293 rc = pci_enable_device(pdev);
1297 /* now probe and enable the device */
1298 return i7300_probe1(pdev, id->driver_data);
1302 * i7300_remove_one destructor for one instance of device
1305 static void __devexit i7300_remove_one(struct pci_dev *pdev)
1307 struct mem_ctl_info *mci;
1309 debugf0(__FILE__ ": %s()\n", __func__);
1312 edac_pci_release_generic_ctl(i7300_pci);
1314 mci = edac_mc_del_mc(&pdev->dev);
1318 /* retrieve references to resources, and free those resources */
1319 i7300_put_devices(mci);
1325 * pci_device_id table for which devices we are looking for
1327 * The "E500P" device is the first device supported.
1329 static const struct pci_device_id i7300_pci_tbl[] __devinitdata = {
1330 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)},
1331 {0,} /* 0 terminated list. */
1334 MODULE_DEVICE_TABLE(pci, i7300_pci_tbl);
1337 * i7300_driver pci_driver structure for this module
1340 static struct pci_driver i7300_driver = {
1341 .name = "i7300_edac",
1342 .probe = i7300_init_one,
1343 .remove = __devexit_p(i7300_remove_one),
1344 .id_table = i7300_pci_tbl,
1348 * i7300_init Module entry function
1349 * Try to initialize this module for its devices
1351 static int __init i7300_init(void)
1355 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1357 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1360 pci_rc = pci_register_driver(&i7300_driver);
1362 return (pci_rc < 0) ? pci_rc : 0;
1366 * i7300_exit() Module exit function
1367 * Unregister the driver
1369 static void __exit i7300_exit(void)
1371 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1372 pci_unregister_driver(&i7300_driver);
1375 module_init(i7300_init);
1376 module_exit(i7300_exit);
1378 MODULE_LICENSE("GPL");
1379 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1380 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1381 MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - "
1384 module_param(edac_op_state, int, 0444);
1385 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");