q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
}
-/* Given the value i in 0..255 as the byte overflow when a field element
- in GHASH is multiplied by x^8, this function will return the values that
- are generated in the lo 16-bit word of the field value by applying the
- modular polynomial. The values lo_byte and hi_byte are returned via the
- macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
- memory as required by a suitable definition of this macro operating on
- the table above
-*/
-
-#define xx(p, q) 0x##p##q
+/*
+ * Given a value i in 0..255 as the byte overflow when a field element
+ * in GF(2^128) is multiplied by x^8, the following macro returns the
+ * 16-bit value that must be XOR-ed into the low-degree end of the
+ * product to reduce it modulo the irreducible polynomial x^128 + x^7 +
+ * x^2 + x + 1.
+ *
+ * There are two versions of the macro, and hence two tables: one for
+ * the "be" convention where the highest-order bit is the coefficient of
+ * the highest-degree polynomial term, and one for the "le" convention
+ * where the highest-order bit is the coefficient of the lowest-degree
+ * polynomial term. In both cases the values are stored in CPU byte
+ * endianness such that the coefficients are ordered consistently across
+ * bytes, i.e. in the "be" table bits 15..0 of the stored value
+ * correspond to the coefficients of x^15..x^0, and in the "le" table
+ * bits 15..0 correspond to the coefficients of x^0..x^15.
+ *
+ * Therefore, provided that the appropriate byte endianness conversions
+ * are done by the multiplication functions (and these must be in place
+ * anyway to support both little endian and big endian CPUs), the "be"
+ * table can be used for multiplications of both "bbe" and "ble"
+ * elements, and the "le" table can be used for multiplications of both
+ * "lle" and "lbe" elements.
+ */
-#define xda_bbe(i) ( \
- (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
- (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
- (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
- (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
+#define xda_be(i) ( \
+ (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
+ (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
+ (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
+ (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
)
-#define xda_lle(i) ( \
- (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
- (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
- (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
- (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
+#define xda_le(i) ( \
+ (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
+ (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
+ (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
+ (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
)
-static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
-static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
+static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
+static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
/* These functions multiply a field element by x, by x^4 and by x^8
* in the polynomial field representation. It uses 32-bit word operations
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
+ u64 _tt = gf128mul_table_le[(b << 7) & 0xff];
r->b = cpu_to_be64((b >> 1) | (a << 63));
r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_bbe[a >> 63];
+ u64 _tt = gf128mul_table_be[a >> 63];
r->a = cpu_to_be64((a << 1) | (b >> 63));
r->b = cpu_to_be64((b << 1) ^ _tt);
{
u64 a = le64_to_cpu(x->a);
u64 b = le64_to_cpu(x->b);
- u64 _tt = gf128mul_table_bbe[b >> 63];
+ u64 _tt = gf128mul_table_be[b >> 63];
r->a = cpu_to_le64((a << 1) ^ _tt);
r->b = cpu_to_le64((b << 1) | (a >> 63));
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_lle[b & 0xff];
+ u64 _tt = gf128mul_table_le[b & 0xff];
x->b = cpu_to_be64((b >> 8) | (a << 56));
x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_bbe[a >> 56];
+ u64 _tt = gf128mul_table_be[a >> 56];
x->a = cpu_to_be64((a << 8) | (b >> 56));
x->b = cpu_to_be64((b << 8) ^ _tt);