2 * Copyright 2013 Tilera Corporation. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
14 * A code-rewriter that handles unaligned exception.
17 #include <linux/smp.h>
18 #include <linux/ptrace.h>
19 #include <linux/slab.h>
20 #include <linux/thread_info.h>
21 #include <linux/uaccess.h>
22 #include <linux/mman.h>
23 #include <linux/types.h>
24 #include <linux/err.h>
25 #include <linux/module.h>
26 #include <linux/compat.h>
27 #include <linux/prctl.h>
28 #include <asm/cacheflush.h>
29 #include <asm/traps.h>
30 #include <asm/uaccess.h>
31 #include <asm/unaligned.h>
33 #include <arch/spr_def.h>
34 #include <arch/opcode.h>
38 * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
39 * exception is supported out of single_step.c
44 static int __init setup_unaligned_printk(char *str)
47 if (kstrtol(str, 0, &val) != 0)
49 unaligned_printk = val;
50 pr_info("Printk for each unaligned data accesses is %s\n",
51 unaligned_printk ? "enabled" : "disabled");
54 __setup("unaligned_printk=", setup_unaligned_printk);
56 unsigned int unaligned_fixup_count;
61 * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
62 * The 1st 64-bit word saves fault PC address, 2nd word is the fault
63 * instruction bundle followed by 14 JIT bundles.
66 struct unaligned_jit_fragment {
68 tilegx_bundle_bits bundle;
69 tilegx_bundle_bits insn[14];
73 * Check if a nop or fnop at bundle's pipeline X0.
76 static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
78 return (((get_UnaryOpcodeExtension_X0(bundle) ==
79 NOP_UNARY_OPCODE_X0) &&
80 (get_RRROpcodeExtension_X0(bundle) ==
81 UNARY_RRR_0_OPCODE_X0) &&
82 (get_Opcode_X0(bundle) ==
84 ((get_UnaryOpcodeExtension_X0(bundle) ==
85 FNOP_UNARY_OPCODE_X0) &&
86 (get_RRROpcodeExtension_X0(bundle) ==
87 UNARY_RRR_0_OPCODE_X0) &&
88 (get_Opcode_X0(bundle) ==
93 * Check if nop or fnop at bundle's pipeline X1.
96 static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
98 return (((get_UnaryOpcodeExtension_X1(bundle) ==
99 NOP_UNARY_OPCODE_X1) &&
100 (get_RRROpcodeExtension_X1(bundle) ==
101 UNARY_RRR_0_OPCODE_X1) &&
102 (get_Opcode_X1(bundle) ==
104 ((get_UnaryOpcodeExtension_X1(bundle) ==
105 FNOP_UNARY_OPCODE_X1) &&
106 (get_RRROpcodeExtension_X1(bundle) ==
107 UNARY_RRR_0_OPCODE_X1) &&
108 (get_Opcode_X1(bundle) ==
113 * Check if nop or fnop at bundle's Y0 pipeline.
116 static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
118 return (((get_UnaryOpcodeExtension_Y0(bundle) ==
119 NOP_UNARY_OPCODE_Y0) &&
120 (get_RRROpcodeExtension_Y0(bundle) ==
121 UNARY_RRR_1_OPCODE_Y0) &&
122 (get_Opcode_Y0(bundle) ==
124 ((get_UnaryOpcodeExtension_Y0(bundle) ==
125 FNOP_UNARY_OPCODE_Y0) &&
126 (get_RRROpcodeExtension_Y0(bundle) ==
127 UNARY_RRR_1_OPCODE_Y0) &&
128 (get_Opcode_Y0(bundle) ==
133 * Check if nop or fnop at bundle's pipeline Y1.
136 static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
138 return (((get_UnaryOpcodeExtension_Y1(bundle) ==
139 NOP_UNARY_OPCODE_Y1) &&
140 (get_RRROpcodeExtension_Y1(bundle) ==
141 UNARY_RRR_1_OPCODE_Y1) &&
142 (get_Opcode_Y1(bundle) ==
144 ((get_UnaryOpcodeExtension_Y1(bundle) ==
145 FNOP_UNARY_OPCODE_Y1) &&
146 (get_RRROpcodeExtension_Y1(bundle) ==
147 UNARY_RRR_1_OPCODE_Y1) &&
148 (get_Opcode_Y1(bundle) ==
153 * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
156 static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
158 return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
162 * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
165 static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
167 return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
171 * Find the destination, source registers of fault unalign access instruction
172 * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
173 * clob3, which are guaranteed different from any register used in the fault
174 * bundle. r_alias is used to return if the other instructions other than the
175 * unalign load/store shares same register with ra, rb and rd.
178 static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
179 uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
180 uint64_t *clob3, bool *r_alias)
184 uint64_t reg_map = 0, alias_reg_map = 0, map;
199 * Parse fault bundle, find potential used registers and mark
200 * corresponding bits in reg_map and alias_map. These 2 bit maps
201 * are used to find the scratch registers and determine if there
204 if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* Y Mode Bundle. */
206 reg = get_SrcA_Y2(bundle);
207 reg_map |= 1ULL << reg;
209 reg = get_SrcBDest_Y2(bundle);
210 reg_map |= 1ULL << reg;
215 alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
219 alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
222 if (!is_bundle_y1_nop(bundle)) {
223 reg = get_SrcA_Y1(bundle);
224 reg_map |= (1ULL << reg);
227 reg = get_SrcB_Y1(bundle);
228 reg_map |= (1ULL << reg);
229 map |= (1ULL << reg);
231 reg = get_Dest_Y1(bundle);
232 reg_map |= (1ULL << reg);
233 map |= (1ULL << reg);
235 if (map & alias_reg_map)
239 if (!is_bundle_y0_nop(bundle)) {
240 reg = get_SrcA_Y0(bundle);
241 reg_map |= (1ULL << reg);
244 reg = get_SrcB_Y0(bundle);
245 reg_map |= (1ULL << reg);
246 map |= (1ULL << reg);
248 reg = get_Dest_Y0(bundle);
249 reg_map |= (1ULL << reg);
250 map |= (1ULL << reg);
252 if (map & alias_reg_map)
255 } else { /* X Mode Bundle. */
257 reg = get_SrcA_X1(bundle);
258 reg_map |= (1ULL << reg);
262 reg = get_Dest_X1(bundle);
263 reg_map |= (1ULL << reg);
265 alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
268 reg = get_SrcB_X1(bundle);
269 reg_map |= (1ULL << reg);
271 alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
274 if (!is_bundle_x0_nop(bundle)) {
275 reg = get_SrcA_X0(bundle);
276 reg_map |= (1ULL << reg);
279 reg = get_SrcB_X0(bundle);
280 reg_map |= (1ULL << reg);
281 map |= (1ULL << reg);
283 reg = get_Dest_X0(bundle);
284 reg_map |= (1ULL << reg);
285 map |= (1ULL << reg);
287 if (map & alias_reg_map)
293 * "alias" indicates if the unalign access registers have collision
294 * with others in the same bundle. We jsut simply test all register
295 * operands case (RRR), ignored the case with immidate. If a bundle
296 * has no register alias, we may do fixup in a simple or fast manner.
297 * So if an immidata field happens to hit with a register, we may end
298 * up fall back to the generic handling.
303 /* Flip bits on reg_map. */
306 /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
307 for (i = 0; i < TREG_SP; i++) {
308 if (reg_map & (0x1ULL << i)) {
311 } else if (*clob2 == -1) {
313 } else if (*clob3 == -1) {
322 * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
326 static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
327 uint64_t clob1, uint64_t clob2, uint64_t clob3)
329 bool unexpected = false;
330 if ((ra >= 56) && (ra != TREG_ZERO))
333 if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
337 if ((rd >= 56) && (rd != TREG_ZERO))
340 if ((rb >= 56) && (rb != TREG_ZERO))
347 #define GX_INSN_X0_MASK ((1ULL << 31) - 1)
348 #define GX_INSN_X1_MASK (((1ULL << 31) - 1) << 31)
349 #define GX_INSN_Y0_MASK ((0xFULL << 27) | (0xFFFFFULL))
350 #define GX_INSN_Y1_MASK (GX_INSN_Y0_MASK << 31)
351 #define GX_INSN_Y2_MASK ((0x7FULL << 51) | (0x7FULL << 20))
353 #ifdef __LITTLE_ENDIAN
354 #define GX_INSN_BSWAP(_bundle_) (_bundle_)
356 #define GX_INSN_BSWAP(_bundle_) swab64(_bundle_)
357 #endif /* __LITTLE_ENDIAN */
360 * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
361 * The corresponding static function jix_x#_###(.) generates partial or
362 * whole bundle based on the template and given arguments.
365 #define __JIT_CODE(_X_) \
366 asm (".pushsection .rodata.unalign_data, \"a\"\n" \
370 __JIT_CODE("__unalign_jit_x1_mtspr: {mtspr 0, r0}");
371 static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
373 extern tilegx_bundle_bits __unalign_jit_x1_mtspr;
374 return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
375 create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
378 __JIT_CODE("__unalign_jit_x1_mfspr: {mfspr r0, 0}");
379 static tilegx_bundle_bits jit_x1_mfspr(int reg, int spr)
381 extern tilegx_bundle_bits __unalign_jit_x1_mfspr;
382 return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
383 create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
386 __JIT_CODE("__unalign_jit_x0_addi: {addi r0, r0, 0; iret}");
387 static tilegx_bundle_bits jit_x0_addi(int rd, int ra, int imm8)
389 extern tilegx_bundle_bits __unalign_jit_x0_addi;
390 return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
391 create_Dest_X0(rd) | create_SrcA_X0(ra) |
392 create_Imm8_X0(imm8);
395 __JIT_CODE("__unalign_jit_x1_ldna: {ldna r0, r0}");
396 static tilegx_bundle_bits jit_x1_ldna(int rd, int ra)
398 extern tilegx_bundle_bits __unalign_jit_x1_ldna;
399 return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) & GX_INSN_X1_MASK) |
400 create_Dest_X1(rd) | create_SrcA_X1(ra);
403 __JIT_CODE("__unalign_jit_x0_dblalign: {dblalign r0, r0 ,r0}");
404 static tilegx_bundle_bits jit_x0_dblalign(int rd, int ra, int rb)
406 extern tilegx_bundle_bits __unalign_jit_x0_dblalign;
407 return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
408 create_Dest_X0(rd) | create_SrcA_X0(ra) |
412 __JIT_CODE("__unalign_jit_x1_iret: {iret}");
413 static tilegx_bundle_bits jit_x1_iret(void)
415 extern tilegx_bundle_bits __unalign_jit_x1_iret;
416 return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
419 __JIT_CODE("__unalign_jit_x01_fnop: {fnop;fnop}");
420 static tilegx_bundle_bits jit_x0_fnop(void)
422 extern tilegx_bundle_bits __unalign_jit_x01_fnop;
423 return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
426 static tilegx_bundle_bits jit_x1_fnop(void)
428 extern tilegx_bundle_bits __unalign_jit_x01_fnop;
429 return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
432 __JIT_CODE("__unalign_jit_y2_dummy: {fnop; fnop; ld zero, sp}");
433 static tilegx_bundle_bits jit_y2_dummy(void)
435 extern tilegx_bundle_bits __unalign_jit_y2_dummy;
436 return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
439 static tilegx_bundle_bits jit_y1_fnop(void)
441 extern tilegx_bundle_bits __unalign_jit_y2_dummy;
442 return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
445 __JIT_CODE("__unalign_jit_x1_st1_add: {st1_add r1, r0, 0}");
446 static tilegx_bundle_bits jit_x1_st1_add(int ra, int rb, int imm8)
448 extern tilegx_bundle_bits __unalign_jit_x1_st1_add;
449 return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
450 (~create_SrcA_X1(-1)) &
451 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
452 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
455 __JIT_CODE("__unalign_jit_x1_st: {crc32_8 r1, r0, r0; st r0, r0}");
456 static tilegx_bundle_bits jit_x1_st(int ra, int rb)
458 extern tilegx_bundle_bits __unalign_jit_x1_st;
459 return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
460 create_SrcA_X1(ra) | create_SrcB_X1(rb);
463 __JIT_CODE("__unalign_jit_x1_st_add: {st_add r1, r0, 0}");
464 static tilegx_bundle_bits jit_x1_st_add(int ra, int rb, int imm8)
466 extern tilegx_bundle_bits __unalign_jit_x1_st_add;
467 return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
468 (~create_SrcA_X1(-1)) &
469 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
470 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
473 __JIT_CODE("__unalign_jit_x1_ld: {crc32_8 r1, r0, r0; ld r0, r0}");
474 static tilegx_bundle_bits jit_x1_ld(int rd, int ra)
476 extern tilegx_bundle_bits __unalign_jit_x1_ld;
477 return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
478 create_Dest_X1(rd) | create_SrcA_X1(ra);
481 __JIT_CODE("__unalign_jit_x1_ld_add: {ld_add r1, r0, 0}");
482 static tilegx_bundle_bits jit_x1_ld_add(int rd, int ra, int imm8)
484 extern tilegx_bundle_bits __unalign_jit_x1_ld_add;
485 return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
486 (~create_Dest_X1(-1)) &
487 GX_INSN_X1_MASK) | create_Dest_X1(rd) |
488 create_SrcA_X1(ra) | create_Imm8_X1(imm8);
491 __JIT_CODE("__unalign_jit_x0_bfexts: {bfexts r0, r0, 0, 0}");
492 static tilegx_bundle_bits jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
494 extern tilegx_bundle_bits __unalign_jit_x0_bfexts;
495 return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
497 create_Dest_X0(rd) | create_SrcA_X0(ra) |
498 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
501 __JIT_CODE("__unalign_jit_x0_bfextu: {bfextu r0, r0, 0, 0}");
502 static tilegx_bundle_bits jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
504 extern tilegx_bundle_bits __unalign_jit_x0_bfextu;
505 return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
507 create_Dest_X0(rd) | create_SrcA_X0(ra) |
508 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
511 __JIT_CODE("__unalign_jit_x1_addi: {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
512 static tilegx_bundle_bits jit_x1_addi(int rd, int ra, int imm8)
514 extern tilegx_bundle_bits __unalign_jit_x1_addi;
515 return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
516 create_Dest_X1(rd) | create_SrcA_X1(ra) |
517 create_Imm8_X1(imm8);
520 __JIT_CODE("__unalign_jit_x0_shrui: {shrui r0, r0, 0; iret}");
521 static tilegx_bundle_bits jit_x0_shrui(int rd, int ra, int imm6)
523 extern tilegx_bundle_bits __unalign_jit_x0_shrui;
524 return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
526 create_Dest_X0(rd) | create_SrcA_X0(ra) |
527 create_ShAmt_X0(imm6);
530 __JIT_CODE("__unalign_jit_x0_rotli: {rotli r0, r0, 0; iret}");
531 static tilegx_bundle_bits jit_x0_rotli(int rd, int ra, int imm6)
533 extern tilegx_bundle_bits __unalign_jit_x0_rotli;
534 return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
536 create_Dest_X0(rd) | create_SrcA_X0(ra) |
537 create_ShAmt_X0(imm6);
540 __JIT_CODE("__unalign_jit_x1_bnezt: {bnezt r0, __unalign_jit_x1_bnezt}");
541 static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff)
543 extern tilegx_bundle_bits __unalign_jit_x1_bnezt;
544 return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
546 create_SrcA_X1(ra) | create_BrOff_X1(broff);
552 * This function generates unalign fixup JIT.
554 * We fist find unalign load/store instruction's destination, source
555 * reguisters: ra, rb and rd. and 3 scratch registers by calling
556 * find_regs(...). 3 scratch clobbers should not alias with any register
557 * used in the fault bundle. Then analyze the fault bundle to determine
558 * if it's a load or store, operand width, branch or address increment etc.
559 * At last generated JIT is copied into JIT code area in user space.
563 void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
566 struct thread_info *info = current_thread_info();
567 struct unaligned_jit_fragment frag;
568 struct unaligned_jit_fragment *jit_code_area;
569 tilegx_bundle_bits bundle_2 = 0;
570 /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
571 bool bundle_2_enable = true;
572 uint64_t ra, rb, rd = -1, clob1, clob2, clob3;
574 * Indicate if the unalign access
575 * instruction's registers hit with
576 * others in the same bundle.
579 bool load_n_store = true;
580 bool load_store_signed = false;
581 unsigned int load_store_size = 8;
582 bool y1_br = false; /* True, for a branch in same bundle at Y1.*/
584 /* True for link operation. i.e. jalr or lnk at Y1 */
587 bool x1_add = false;/* True, for load/store ADD instruction at X1*/
589 bool unexpected = false;
593 (struct unaligned_jit_fragment *)(info->unalign_jit_base);
595 memset((void *)&frag, 0, sizeof(frag));
597 /* 0: X mode, Otherwise: Y mode. */
598 if (bundle & TILEGX_BUNDLE_MODE_MASK) {
599 unsigned int mod, opcode;
601 if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
602 get_RRROpcodeExtension_Y1(bundle) ==
603 UNARY_RRR_1_OPCODE_Y1) {
605 opcode = get_UnaryOpcodeExtension_Y1(bundle);
608 * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
612 case JALR_UNARY_OPCODE_Y1:
613 case JALRP_UNARY_OPCODE_Y1:
615 y1_lr_reg = 55; /* Link register. */
617 case JR_UNARY_OPCODE_Y1:
618 case JRP_UNARY_OPCODE_Y1:
620 y1_br_reg = get_SrcA_Y1(bundle);
622 case LNK_UNARY_OPCODE_Y1:
623 /* "lnk" at Y1 pipeline. */
625 y1_lr_reg = get_Dest_Y1(bundle);
630 opcode = get_Opcode_Y2(bundle);
631 mod = get_Mode(bundle);
634 * bundle_2 is bundle after making Y2 as a dummy operation
637 bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
639 /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
640 if (y1_br || y1_lr) {
641 bundle_2 &= ~(GX_INSN_Y1_MASK);
642 bundle_2 |= jit_y1_fnop();
645 if (is_y0_y1_nop(bundle_2))
646 bundle_2_enable = false;
648 if (mod == MODE_OPCODE_YC2) {
650 load_n_store = false;
651 load_store_size = 1 << opcode;
652 load_store_signed = false;
653 find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
655 if (load_store_size > 8)
660 if (mod == MODE_OPCODE_YB2) {
663 load_store_signed = false;
667 load_store_signed = true;
671 load_store_signed = false;
677 } else if (mod == MODE_OPCODE_YA2) {
678 if (opcode == LD2S_OPCODE_Y2) {
679 load_store_signed = true;
681 } else if (opcode == LD2U_OPCODE_Y2) {
682 load_store_signed = false;
688 find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
694 /* bundle_2 is bundle after making X1 as "fnop". */
695 bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
697 if (is_x0_x1_nop(bundle_2))
698 bundle_2_enable = false;
700 if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
701 opcode = get_UnaryOpcodeExtension_X1(bundle);
703 if (get_RRROpcodeExtension_X1(bundle) ==
704 UNARY_RRR_0_OPCODE_X1) {
706 find_regs(bundle, &rd, &ra, &rb, &clob1,
707 &clob2, &clob3, &alias);
710 case LD_UNARY_OPCODE_X1:
711 load_store_signed = false;
714 case LD4S_UNARY_OPCODE_X1:
715 load_store_signed = true;
717 case LD4U_UNARY_OPCODE_X1:
721 case LD2S_UNARY_OPCODE_X1:
722 load_store_signed = true;
724 case LD2U_UNARY_OPCODE_X1:
731 load_n_store = false;
732 load_store_signed = false;
733 find_regs(bundle, 0, &ra, &rb,
734 &clob1, &clob2, &clob3,
737 opcode = get_RRROpcodeExtension_X1(bundle);
739 case ST_RRR_0_OPCODE_X1:
742 case ST4_RRR_0_OPCODE_X1:
745 case ST2_RRR_0_OPCODE_X1:
752 } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
754 opcode = get_Imm8OpcodeExtension_X1(bundle);
756 case LD_ADD_IMM8_OPCODE_X1:
760 case LD4S_ADD_IMM8_OPCODE_X1:
761 load_store_signed = true;
763 case LD4U_ADD_IMM8_OPCODE_X1:
767 case LD2S_ADD_IMM8_OPCODE_X1:
768 load_store_signed = true;
770 case LD2U_ADD_IMM8_OPCODE_X1:
774 case ST_ADD_IMM8_OPCODE_X1:
775 load_n_store = false;
778 case ST4_ADD_IMM8_OPCODE_X1:
779 load_n_store = false;
782 case ST2_ADD_IMM8_OPCODE_X1:
783 load_n_store = false;
793 x1_add_imm8 = get_Imm8_X1(bundle);
795 x1_add_imm8 = get_Dest_Imm8_X1(bundle);
798 find_regs(bundle, load_n_store ? (&rd) : NULL,
799 &ra, &rb, &clob1, &clob2, &clob3, &alias);
805 * Some sanity check for register numbers extracted from fault bundle.
807 if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
810 /* Give warning if register ra has an aligned address. */
812 WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
816 * Fault came from kernel space, here we only need take care of
817 * unaligned "get_user/put_user" macros defined in "uaccess.h".
818 * Basically, we will handle bundle like this:
819 * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
820 * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
821 * For either load or store, byte-wise operation is performed by calling
822 * get_user() or put_user(). If the macro returns non-zero value,
823 * set the value to rx, otherwise set zero to rx. Finally make pc point
824 * to next bundle and return.
827 if (EX1_PL(regs->ex1) != USER_PL) {
829 unsigned long rx = 0;
830 unsigned long x = 0, ret = 0;
832 if (y1_br || y1_lr || x1_add ||
833 (load_store_signed !=
834 (load_n_store && load_store_size == 4))) {
835 /* No branch, link, wrong sign-ext or load/store add. */
837 } else if (!unexpected) {
838 if (bundle & TILEGX_BUNDLE_MODE_MASK) {
840 * Fault bundle is Y mode.
841 * Check if the Y1 and Y0 is the form of
842 * { movei rx, 0; nop/fnop }, if yes,
846 if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
847 && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
848 (get_Imm8_Y1(bundle) == 0) &&
849 is_bundle_y0_nop(bundle)) {
850 rx = get_Dest_Y1(bundle);
851 } else if ((get_Opcode_Y0(bundle) ==
853 (get_SrcA_Y0(bundle) == TREG_ZERO) &&
854 (get_Imm8_Y0(bundle) == 0) &&
855 is_bundle_y1_nop(bundle)) {
856 rx = get_Dest_Y0(bundle);
862 * Fault bundle is X mode.
863 * Check if the X0 is 'movei rx, 0',
864 * if yes, find the rx.
867 if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
868 && (get_Imm8OpcodeExtension_X0(bundle) ==
869 ADDI_IMM8_OPCODE_X0) &&
870 (get_SrcA_X0(bundle) == TREG_ZERO) &&
871 (get_Imm8_X0(bundle) == 0)) {
872 rx = get_Dest_X0(bundle);
878 /* rx should be less than 56. */
879 if (!unexpected && (rx >= 56))
883 if (!search_exception_tables(regs->pc)) {
884 /* No fixup in the exception tables for the pc. */
889 /* Unexpected unalign kernel fault. */
890 struct task_struct *tsk = validate_current();
896 if (unlikely(tsk->pid < 2)) {
897 panic("Kernel unalign fault running %s!",
898 tsk->pid ? "init" : "the idle task");
905 do_group_exit(SIGKILL);
908 unsigned long i, b = 0;
910 (unsigned char *)regs->regs[ra];
912 /* handle get_user(x, ptr) */
913 for (i = 0; i < load_store_size; i++) {
914 ret = get_user(b, ptr++);
916 /* Success! update x. */
917 #ifdef __LITTLE_ENDIAN
922 #endif /* __LITTLE_ENDIAN */
929 /* Sign-extend 4-byte loads. */
930 if (load_store_size == 4)
933 /* Set register rd. */
936 /* Set register rx. */
937 regs->regs[rx] = ret;
943 /* Handle put_user(x, ptr) */
945 #ifdef __LITTLE_ENDIAN
949 * Swap x in order to store x from low
950 * to high memory same as the
951 * little-endian case.
953 switch (load_store_size) {
964 #endif /* __LITTLE_ENDIAN */
965 for (i = 0; i < load_store_size; i++) {
966 ret = put_user(b, ptr++);
969 /* Success! shift 1 byte. */
972 /* Set register rx. */
973 regs->regs[rx] = ret;
980 unaligned_fixup_count++;
982 if (unaligned_printk) {
983 pr_info("%s/%d. Unalign fixup for kernel access "
985 current->comm, current->pid, regs->regs[ra]);
988 /* Done! Return to the exception handler. */
992 if ((align_ctl == 0) || unexpected) {
995 .si_code = BUS_ADRALN,
996 .si_addr = (unsigned char __user *)0
998 if (unaligned_printk)
999 pr_info("Unalign bundle: unexp @%llx, %llx",
1000 (unsigned long long)regs->pc,
1001 (unsigned long long)bundle);
1004 unsigned long uaa = (unsigned long)regs->regs[ra];
1005 /* Set bus Address. */
1006 info.si_addr = (unsigned char __user *)uaa;
1009 unaligned_fixup_count++;
1011 trace_unhandled_signal("unaligned fixup trap", regs,
1012 (unsigned long)info.si_addr, SIGBUS);
1013 force_sig_info(info.si_signo, &info, current);
1017 #ifdef __LITTLE_ENDIAN
1018 #define UA_FIXUP_ADDR_DELTA 1
1019 #define UA_FIXUP_BFEXT_START(_B_) 0
1020 #define UA_FIXUP_BFEXT_END(_B_) (8 * (_B_) - 1)
1021 #else /* __BIG_ENDIAN */
1022 #define UA_FIXUP_ADDR_DELTA -1
1023 #define UA_FIXUP_BFEXT_START(_B_) (64 - 8 * (_B_))
1024 #define UA_FIXUP_BFEXT_END(_B_) 63
1025 #endif /* __LITTLE_ENDIAN */
1029 if ((ra != rb) && (rd != TREG_SP) && !alias &&
1030 !y1_br && !y1_lr && !x1_add) {
1032 * Simple case: ra != rb and no register alias found,
1033 * and no branch or link. This will be the majority.
1034 * We can do a little better for simplae case than the
1035 * generic scheme below.
1037 if (!load_n_store) {
1039 * Simple store: ra != rb, no need for scratch register.
1040 * Just store and rotate to right bytewise.
1044 jit_x0_addi(ra, ra, load_store_size - 1) |
1046 #endif /* __BIG_ENDIAN */
1047 for (k = 0; k < load_store_size; k++) {
1050 jit_x0_rotli(rb, rb, 56) |
1051 jit_x1_st1_add(ra, rb,
1052 UA_FIXUP_ADDR_DELTA);
1055 frag.insn[n] = jit_x1_addi(ra, ra, 1);
1057 frag.insn[n] = jit_x1_addi(ra, ra,
1058 -1 * load_store_size);
1059 #endif /* __LITTLE_ENDIAN */
1061 if (load_store_size == 8) {
1062 frag.insn[n] |= jit_x0_fnop();
1063 } else if (load_store_size == 4) {
1064 frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1066 frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1069 if (bundle_2_enable)
1070 frag.insn[n++] = bundle_2;
1071 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1074 /* Use two clobber registers: clob1/2. */
1076 jit_x0_addi(TREG_SP, TREG_SP, -16) |
1079 jit_x0_addi(clob1, ra, 7) |
1080 jit_x1_st_add(TREG_SP, clob1, -8);
1082 jit_x0_addi(clob2, ra, 0) |
1083 jit_x1_st(TREG_SP, clob2);
1086 jit_x1_ldna(rd, ra);
1089 jit_x1_ldna(clob1, clob1);
1091 * Note: we must make sure that rd must not
1092 * be sp. Recover clob1/2 from stack.
1095 jit_x0_dblalign(rd, clob1, clob2) |
1096 jit_x1_ld_add(clob2, TREG_SP, 8);
1099 jit_x1_ld_add(clob1, TREG_SP, 16);
1101 /* Use one clobber register: clob1 only. */
1103 jit_x0_addi(TREG_SP, TREG_SP, -16) |
1106 jit_x0_addi(clob1, ra, 7) |
1107 jit_x1_st(TREG_SP, clob1);
1110 jit_x1_ldna(rd, ra);
1113 jit_x1_ldna(clob1, clob1);
1115 * Note: we must make sure that rd must not
1116 * be sp. Recover clob1 from stack.
1119 jit_x0_dblalign(rd, clob1, ra) |
1120 jit_x1_ld_add(clob1, TREG_SP, 16);
1123 if (bundle_2_enable)
1124 frag.insn[n++] = bundle_2;
1126 * For non 8-byte load, extract corresponding bytes and
1129 if (load_store_size == 4) {
1130 if (load_store_signed)
1134 UA_FIXUP_BFEXT_START(4),
1135 UA_FIXUP_BFEXT_END(4)) |
1141 UA_FIXUP_BFEXT_START(4),
1142 UA_FIXUP_BFEXT_END(4)) |
1144 } else if (load_store_size == 2) {
1145 if (load_store_signed)
1149 UA_FIXUP_BFEXT_START(2),
1150 UA_FIXUP_BFEXT_END(2)) |
1156 UA_FIXUP_BFEXT_START(2),
1157 UA_FIXUP_BFEXT_END(2)) |
1165 } else if (!load_n_store) {
1168 * Generic memory store cases: use 3 clobber registers.
1170 * Alloc space for saveing clob2,1,3 on user's stack.
1171 * register clob3 points to where clob2 saved, followed by
1172 * clob1 and 3 from high to low memory.
1175 jit_x0_addi(TREG_SP, TREG_SP, -32) |
1178 jit_x0_addi(clob3, TREG_SP, 16) |
1179 jit_x1_st_add(TREG_SP, clob3, 8);
1180 #ifdef __LITTLE_ENDIAN
1182 jit_x0_addi(clob1, ra, 0) |
1183 jit_x1_st_add(TREG_SP, clob1, 8);
1186 jit_x0_addi(clob1, ra, load_store_size - 1) |
1187 jit_x1_st_add(TREG_SP, clob1, 8);
1189 if (load_store_size == 8) {
1191 * We save one byte a time, not for fast, but compact
1192 * code. After each store, data source register shift
1193 * right one byte. unchanged after 8 stores.
1196 jit_x0_addi(clob2, TREG_ZERO, 7) |
1197 jit_x1_st_add(TREG_SP, clob2, 16);
1199 jit_x0_rotli(rb, rb, 56) |
1200 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1202 jit_x0_addi(clob2, clob2, -1) |
1203 jit_x1_bnezt(clob2, -1);
1206 jit_x1_addi(clob2, y1_br_reg, 0);
1207 } else if (load_store_size == 4) {
1209 jit_x0_addi(clob2, TREG_ZERO, 3) |
1210 jit_x1_st_add(TREG_SP, clob2, 16);
1212 jit_x0_rotli(rb, rb, 56) |
1213 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1215 jit_x0_addi(clob2, clob2, -1) |
1216 jit_x1_bnezt(clob2, -1);
1218 * same as 8-byte case, but need shift another 4
1219 * byte to recover rb for 4-byte store.
1221 frag.insn[n++] = jit_x0_rotli(rb, rb, 32) |
1222 jit_x1_addi(clob2, y1_br_reg, 0);
1225 jit_x0_addi(clob2, rb, 0) |
1226 jit_x1_st_add(TREG_SP, clob2, 16);
1227 for (k = 0; k < 2; k++) {
1229 jit_x0_shrui(rb, rb, 8) |
1230 jit_x1_st1_add(clob1, rb,
1231 UA_FIXUP_ADDR_DELTA);
1234 jit_x0_addi(rb, clob2, 0) |
1235 jit_x1_addi(clob2, y1_br_reg, 0);
1238 if (bundle_2_enable)
1239 frag.insn[n++] = bundle_2;
1244 jit_x1_mfspr(y1_lr_reg,
1245 SPR_EX_CONTEXT_0_0);
1250 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1255 jit_x0_addi(ra, ra, x1_add_imm8) |
1256 jit_x1_ld_add(clob2, clob3, -8);
1260 jit_x1_ld_add(clob2, clob3, -8);
1264 jit_x1_ld_add(clob1, clob3, -8);
1265 frag.insn[n++] = jit_x0_fnop() | jit_x1_ld(clob3, clob3);
1266 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1270 * Generic memory load cases.
1272 * Alloc space for saveing clob1,2,3 on user's stack.
1273 * register clob3 points to where clob1 saved, followed
1274 * by clob2 and 3 from high to low memory.
1278 jit_x0_addi(TREG_SP, TREG_SP, -32) |
1281 jit_x0_addi(clob3, TREG_SP, 16) |
1282 jit_x1_st_add(TREG_SP, clob3, 8);
1284 jit_x0_addi(clob2, ra, 0) |
1285 jit_x1_st_add(TREG_SP, clob2, 8);
1289 jit_x0_addi(clob1, y1_br_reg, 0) |
1290 jit_x1_st_add(TREG_SP, clob1, 16);
1294 jit_x1_st_add(TREG_SP, clob1, 16);
1297 if (bundle_2_enable)
1298 frag.insn[n++] = bundle_2;
1303 jit_x1_mfspr(y1_lr_reg,
1304 SPR_EX_CONTEXT_0_0);
1310 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1315 jit_x0_addi(clob1, clob2, 7) |
1316 jit_x1_ldna(rd, clob2);
1319 jit_x1_ldna(clob1, clob1);
1321 jit_x0_dblalign(rd, clob1, clob2) |
1322 jit_x1_ld_add(clob1, clob3, -8);
1325 jit_x0_addi(ra, ra, x1_add_imm8) |
1326 jit_x1_ld_add(clob2, clob3, -8);
1330 jit_x1_ld_add(clob2, clob3, -8);
1335 jit_x1_ld(clob3, clob3);
1337 if (load_store_size == 4) {
1338 if (load_store_signed)
1342 UA_FIXUP_BFEXT_START(4),
1343 UA_FIXUP_BFEXT_END(4)) |
1349 UA_FIXUP_BFEXT_START(4),
1350 UA_FIXUP_BFEXT_END(4)) |
1352 } else if (load_store_size == 2) {
1353 if (load_store_signed)
1357 UA_FIXUP_BFEXT_START(2),
1358 UA_FIXUP_BFEXT_END(2)) |
1364 UA_FIXUP_BFEXT_START(2),
1365 UA_FIXUP_BFEXT_END(2)) |
1369 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1372 /* Max JIT bundle count is 14. */
1377 int idx = (regs->pc >> 3) &
1378 ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1381 frag.bundle = bundle;
1383 if (unaligned_printk) {
1384 pr_info("%s/%d, Unalign fixup: pc=%lx "
1385 "bundle=%lx %d %d %d %d %d %d %d %d.",
1386 current->comm, current->pid,
1387 (unsigned long)frag.pc,
1388 (unsigned long)frag.bundle,
1389 (int)alias, (int)rd, (int)ra,
1390 (int)rb, (int)bundle_2_enable,
1391 (int)y1_lr, (int)y1_br, (int)x1_add);
1393 for (k = 0; k < n; k += 2)
1394 pr_info("[%d] %016llx %016llx", k,
1395 (unsigned long long)frag.insn[k],
1396 (unsigned long long)frag.insn[k+1]);
1399 /* Swap bundle byte order for big endian sys. */
1401 frag.bundle = GX_INSN_BSWAP(frag.bundle);
1402 for (k = 0; k < n; k++)
1403 frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1404 #endif /* __BIG_ENDIAN */
1406 status = copy_to_user((void __user *)&jit_code_area[idx],
1407 &frag, sizeof(frag));
1409 /* Fail to copy JIT into user land. send SIGSEGV. */
1411 .si_signo = SIGSEGV,
1412 .si_code = SEGV_MAPERR,
1413 .si_addr = (void __user *)&jit_code_area[idx]
1416 pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx",
1417 current->pid, current->comm,
1418 (unsigned long long)&jit_code_area[idx]);
1420 trace_unhandled_signal("segfault in unalign fixup",
1422 (unsigned long)info.si_addr,
1424 force_sig_info(info.si_signo, &info, current);
1429 /* Do a cheaper increment, not accurate. */
1430 unaligned_fixup_count++;
1431 __flush_icache_range((unsigned long)&jit_code_area[idx],
1432 (unsigned long)&jit_code_area[idx] +
1435 /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1436 __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1437 __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1439 /* Modify pc at the start of new JIT. */
1440 regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1441 /* Set ICS in SPR_EX_CONTEXT_K_1. */
1442 regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1448 * C function to generate unalign data JIT. Called from unalign data
1449 * interrupt handler.
1451 * First check if unalign fix is disabled or exception did not not come from
1452 * user space or sp register points to unalign address, if true, generate a
1453 * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1454 * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1455 * back to exception handler.
1457 * The exception handler will "iret" to new generated JIT code after
1458 * restoring caller saved registers. In theory, the JIT code will perform
1459 * another "iret" to resume user's program.
1462 void do_unaligned(struct pt_regs *regs, int vecnum)
1464 tilegx_bundle_bits __user *pc;
1465 tilegx_bundle_bits bundle;
1466 struct thread_info *info = current_thread_info();
1469 /* Checks the per-process unaligned JIT flags */
1470 align_ctl = unaligned_fixup;
1471 switch (task_thread_info(current)->align_ctl) {
1472 case PR_UNALIGN_NOPRINT:
1475 case PR_UNALIGN_SIGBUS:
1480 /* Enable iterrupt in order to access user land. */
1484 * The fault came from kernel space. Two choices:
1485 * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1486 * to return -EFAULT. If no fixup, simply panic the kernel.
1487 * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1488 * if it was triggered by get_user/put_user() macros. Panic the
1489 * kernel if it is not fixable.
1492 if (EX1_PL(regs->ex1) != USER_PL) {
1494 if (align_ctl < 1) {
1495 unaligned_fixup_count++;
1496 /* If exception came from kernel, try fix it up. */
1497 if (fixup_exception(regs)) {
1498 if (unaligned_printk)
1499 pr_info("Unalign fixup: %d %llx @%llx",
1500 (int)unaligned_fixup,
1501 (unsigned long long)regs->ex1,
1502 (unsigned long long)regs->pc);
1505 /* Not fixable. Go panic. */
1506 panic("Unalign exception in Kernel. pc=%lx",
1511 * Try to fix the exception. If we can't, panic the
1514 bundle = GX_INSN_BSWAP(
1515 *((tilegx_bundle_bits *)(regs->pc)));
1516 jit_bundle_gen(regs, bundle, align_ctl);
1522 * Fault came from user with ICS or stack is not aligned.
1523 * If so, we will trigger SIGBUS.
1525 if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1528 .si_code = BUS_ADRALN,
1529 .si_addr = (unsigned char __user *)0
1532 if (unaligned_printk)
1533 pr_info("Unalign fixup: %d %llx @%llx",
1534 (int)unaligned_fixup,
1535 (unsigned long long)regs->ex1,
1536 (unsigned long long)regs->pc);
1538 unaligned_fixup_count++;
1540 trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1541 force_sig_info(info.si_signo, &info, current);
1546 /* Read the bundle casued the exception! */
1547 pc = (tilegx_bundle_bits __user *)(regs->pc);
1548 if (get_user(bundle, pc) != 0) {
1549 /* Probably never be here since pc is valid user address.*/
1551 .si_signo = SIGSEGV,
1552 .si_code = SEGV_MAPERR,
1553 .si_addr = (void __user *)pc
1555 pr_err("Couldn't read instruction at %p trying to step\n", pc);
1556 trace_unhandled_signal("segfault in unalign fixup", regs,
1557 (unsigned long)info.si_addr, SIGSEGV);
1558 force_sig_info(info.si_signo, &info, current);
1562 if (!info->unalign_jit_base) {
1563 void __user *user_page;
1566 * Allocate a page in userland.
1567 * For 64-bit processes we try to place the mapping far
1568 * from anything else that might be going on (specifically
1569 * 64 GB below the top of the user address space). If it
1570 * happens not to be possible to put it there, it's OK;
1571 * the kernel will choose another location and we'll
1572 * remember it for later.
1574 if (is_compat_task())
1577 user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1578 (current->pid << PAGE_SHIFT);
1580 user_page = (void __user *) vm_mmap(NULL,
1581 (unsigned long)user_page,
1583 PROT_EXEC | PROT_READ |
1585 #ifdef CONFIG_HOMECACHE
1586 MAP_CACHE_HOME_TASK |
1592 if (IS_ERR((void __force *)user_page)) {
1593 pr_err("Out of kernel pages trying do_mmap.\n");
1597 /* Save the address in the thread_info struct */
1598 info->unalign_jit_base = user_page;
1599 if (unaligned_printk)
1600 pr_info("Unalign bundle: %d:%d, allocate page @%llx",
1601 raw_smp_processor_id(), current->pid,
1602 (unsigned long long)user_page);
1605 /* Generate unalign JIT */
1606 jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1609 #endif /* __tilegx__ */