diff --git a/packages/orderable-bytes/src/chrono.rs b/packages/orderable-bytes/src/chrono.rs index a0f05d8..233ac34 100644 --- a/packages/orderable-bytes/src/chrono.rs +++ b/packages/orderable-bytes/src/chrono.rs @@ -1,20 +1,18 @@ //! Canonical, order-preserving fixed-length byte encodings for the //! `chrono` types `NaiveDate` and `DateTime`. //! -//! Each submodule exposes a `to_orderable_bytes` function and an -//! `ENCODED_LEN` constant. The bytes returned have the property that -//! byte-wise lex comparison agrees with chronological ordering (and byte -//! equality with value equality), so any comparison-as-bytes scheme -//! (`ore-rs` BlockORE, OPE, an ordered hash) inherits those properties on -//! the resulting digest. +//! Each submodule exposes an `ENCODED_LEN` constant and an +//! [`crate::ToOrderableBytes`] impl on its target type. The bytes +//! returned have the property that byte-wise lex comparison agrees with +//! chronological ordering (and byte equality with value equality), so +//! any comparison-as-bytes scheme (`ore-rs` BlockORE, OPE, an ordered +//! hash) inherits those properties on the resulting digest. /// Order-preserving byte encoding for [`::chrono::NaiveDate`]. pub mod naive_date { + use crate::ToOrderableBytes; use ::chrono::{Datelike, NaiveDate}; - /// Number of bytes in the canonical orderable-bytes form. - pub const ENCODED_LEN: usize = 4; - /// Build the canonical, order-preserving byte encoding of a `NaiveDate`. /// /// `NaiveDate::num_days_from_ce()` returns an `i32` whose ordering @@ -22,9 +20,14 @@ pub mod naive_date { /// `1u32 << 31`) preserves order while making the value unsigned, then /// big-endian byte serialisation gives a 4-byte sequence whose lex /// order matches the natural date order. - pub fn to_orderable_bytes(d: &NaiveDate) -> [u8; ENCODED_LEN] { - let biased = (d.num_days_from_ce() as u32) ^ (1u32 << 31); - biased.to_be_bytes() + impl ToOrderableBytes for NaiveDate { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + let biased = (self.num_days_from_ce() as u32) ^ (1u32 << 31); + biased.to_be_bytes() + } } #[cfg(test)] @@ -38,7 +41,7 @@ pub mod naive_date { #[test] fn year_one_biases_to_known_u32() { // Year 1 day 1 has num_days_from_ce = 1 ⇒ sign-flipped u32 = 0x8000_0001. - assert_eq!(to_orderable_bytes(&ymd(1, 1, 1)), [0x80, 0x00, 0x00, 0x01]); + assert_eq!(ymd(1, 1, 1).to_orderable_bytes(), [0x80, 0x00, 0x00, 0x01]); } #[test] @@ -55,8 +58,8 @@ pub mod naive_date { NaiveDate::MAX, ]; for window in ascending.windows(2) { - let a = to_orderable_bytes(&window[0]); - let b = to_orderable_bytes(&window[1]); + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); assert!( a < b, "to_orderable_bytes({}) < to_orderable_bytes({}) failed", @@ -70,9 +73,12 @@ pub mod naive_date { /// Order-preserving byte encoding for [`::chrono::DateTime<::chrono::Utc>`]. pub mod datetime_utc { + use crate::ToOrderableBytes; use ::chrono::{DateTime, Utc}; - /// Number of bytes in the canonical orderable-bytes form. + /// Number of bytes in the canonical orderable-bytes form. Mirrors + /// ` as ToOrderableBytes>::ENCODED_LEN` for use in + /// const contexts that can't easily name the trait impl. pub const ENCODED_LEN: usize = 12; /// Build the canonical, order-preserving byte encoding of a @@ -86,14 +92,19 @@ pub mod datetime_utc { /// whole second. `timestamp_subsec_nanos` returns values in /// `0..2_000_000_000` (the upper half encodes leap-second moments), /// which fits in `u32` and preserves chronological order. - pub fn to_orderable_bytes(dt: &DateTime) -> [u8; ENCODED_LEN] { - let secs = dt.timestamp(); - let nanos = dt.timestamp_subsec_nanos(); - let secs_biased = (secs as u64) ^ (1u64 << 63); - let mut out = [0u8; ENCODED_LEN]; - out[..8].copy_from_slice(&secs_biased.to_be_bytes()); - out[8..].copy_from_slice(&nanos.to_be_bytes()); - out + impl ToOrderableBytes for DateTime { + const ENCODED_LEN: usize = ENCODED_LEN; + type Bytes = [u8; ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; ENCODED_LEN] { + let secs = self.timestamp(); + let nanos = self.timestamp_subsec_nanos(); + let secs_biased = (secs as u64) ^ (1u64 << 63); + let mut out = [0u8; ENCODED_LEN]; + out[..8].copy_from_slice(&secs_biased.to_be_bytes()); + out[8..].copy_from_slice(&nanos.to_be_bytes()); + out + } } #[cfg(test)] @@ -110,7 +121,7 @@ pub mod datetime_utc { // 1970-01-01T00:00:00Z: timestamp = 0, subsec = 0. Sign-flip on // `0_i64` gives `0x8000_0000_0000_0000`. assert_eq!( - to_orderable_bytes(&dt(0, 0)), + dt(0, 0).to_orderable_bytes(), [0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ); } @@ -132,8 +143,8 @@ pub mod datetime_utc { DateTime::::MAX_UTC, ]; for window in ascending.windows(2) { - let a = to_orderable_bytes(&window[0]); - let b = to_orderable_bytes(&window[1]); + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); assert!( a < b, "to_orderable_bytes({}) < to_orderable_bytes({}) failed", diff --git a/packages/orderable-bytes/src/decimal.rs b/packages/orderable-bytes/src/decimal.rs index 7cab404..9b54dd9 100644 --- a/packages/orderable-bytes/src/decimal.rs +++ b/packages/orderable-bytes/src/decimal.rs @@ -60,17 +60,16 @@ //! //! ## Constant-time //! -//! `to_orderable_bytes` is straight-line code with fixed-iteration loops -//! and branchless mask arithmetic. It does not call `Decimal::normalize` -//! (which loops while `scale > 0`) and does not branch on sign or -//! zero-ness. Timing does not distinguish the input's sign, zero-ness, -//! digit count, trailing-zero count, or scale. - +//! [`::to_orderable_bytes`](crate::ToOrderableBytes::to_orderable_bytes) +//! is straight-line code with fixed-iteration loops and branchless mask +//! arithmetic. It does not call `Decimal::normalize` (which loops while +//! `scale > 0`) and does not branch on sign or zero-ness. Timing does +//! not distinguish the input's sign, zero-ness, digit count, +//! trailing-zero count, or scale. + +use crate::ToOrderableBytes; use rust_decimal::Decimal; -/// Number of bytes in the canonical orderable-bytes form. -pub const ENCODED_LEN: usize = 14; - /// Width of the padded-significand field in bytes (13 bytes = 104 bits). const MANTISSA_BYTES: usize = 13; @@ -94,138 +93,144 @@ const EXP_MASK: u8 = 0x7F; /// Build the canonical, order-preserving fixed-length byte encoding of a /// `Decimal`. Two `Decimal`s that compare equal under `Decimal::cmp` /// produce identical byte arrays. -pub fn to_orderable_bytes(d: &Decimal) -> [u8; ENCODED_LEN] { - let mut out = [0u8; ENCODED_LEN]; - - // The pipeline runs unconditionally — no early return for zero inputs. - // A `d.is_zero()` short-circuit at the top would distinguish zero from - // non-zero plaintexts via timing. Instead we feed zero through the same - // sequence of operations as every other value (the helpers tolerate - // `m == 0` and produce `(significand=0, digits=0, trailing=0)`) and - // canonicalise the resulting byte 0 to the zero plaintext at the end - // via a branchless mask. - // - // We deliberately don't call `Decimal::normalize()` here. `normalize` - // strips trailing zeros from the mantissa via a `while scale > 0` loop - // whose iteration count depends on the secret value's trailing-zero - // count — a timing side channel. Our own `strip_trailing_zeros` already - // strips *all* trailing zeros (a strict superset of what `normalize` - // would remove, since it doesn't stop at scale=0), so the leading-digit - // exponent we compute below is identical whether the input has been - // normalised first or not. Skipping the call removes the leak. - let raw_mantissa = d.mantissa(); - let scale = d.scale() as i32; - // Branchless absolute value via the standard two's-complement identity - // `abs(x) = (x ^ s) - s` where `s` is the arithmetic right-shift of the - // sign bit (`-1` if `x` is negative, `0` otherwise). For positives this - // collapses to `x - 0 = x`; for negatives to `~x + 1 = -x`. Equivalent - // in value to `i128::unsigned_abs`, which compiles to a CMOV on tier-1 - // ISAs but is not language-guaranteed constant-time. The explicit form - // here removes the dependency on optimiser behaviour. - let sign_extension = raw_mantissa >> 127; - let abs_mantissa = ((raw_mantissa ^ sign_extension).wrapping_sub(sign_extension)) as u128; - let (significand, trailing) = strip_trailing_zeros(abs_mantissa); - let digits = digit_count(significand); - - // value = ±significand × 10^trailing × 10^(-scale) - // leading_exp = decimal exponent of the leading significant digit. - // - // For non-zero `Decimal`s `leading_exp` lies in `[-28, 28]`. The - // pipeline also runs for zero inputs (significand = 0, digits = 0, - // trailing = 0), where the formula collapses to `-1 - scale` and - // `leading_exp` lands in `[-29, -1]`; this produces a perfectly valid - // — though arbitrary — non-zero positive plaintext that we'll - // overwrite at the end with the canonical zero. The widened range - // `[-29, 28]` covers both branches without leaking the zero/non-zero - // distinction in debug builds either. - let leading_exp = digits as i32 - 1 + trailing - scale; - debug_assert!( - (-29..=28).contains(&leading_exp), - "leading_exp {} out of bounds — mantissa or scale corrupted", - leading_exp, - ); - let biased_exp = (leading_exp + EXP_BIAS) as u8; - debug_assert!(biased_exp <= EXP_MASK, "biased_exp overflowed 7 bits"); - - // Pad the significand out to 29 decimal digits so same-exponent compares - // across different significand lengths are byte-wise correct. - // - // We can't write this as `significand * 10u128.pow(PADDED_DIGITS - digits)` - // — `u128::pow` is square-and-multiply on the bits of its exponent, with - // both the iteration count and the conditional `acc * base` step driven - // by the exponent value. Since the exponent here is `PADDED_DIGITS − - // digits` and `digits` is derived from the secret mantissa, that would - // leak the digit count via timing. - // - // Instead, run a fixed `PADDED_DIGITS`-iteration loop that multiplies - // `padded_mantissa` by 10 under a branchless mask. The mask is `1` while - // we still have padding to apply (`digits + i < PADDED_DIGITS`) and `0` - // afterwards; the multiplication itself is computed unconditionally each - // iteration so the instruction sequence doesn't depend on `digits`. - // - // No overflow concern: in any iteration where the mask is `1` we have - // `padded_mantissa < 10^(PADDED_DIGITS-1) ≤ 10^28`, so `× 10` stays - // under `10^29`. In iterations where the mask is `0`, `padded_mantissa` - // sits at its final value (≤ `10^29`) and the unstored `× 10` product - // is at most `10^30 ≈ 2^99.7`, well inside `u128`. - let mut padded_mantissa = significand; - for i in 0..PADDED_DIGITS { - let do_step = ((digits + i) < PADDED_DIGITS) as u128; - let mask = 0u128.wrapping_sub(do_step); - let stepped = padded_mantissa.wrapping_mul(10); - padded_mantissa = (padded_mantissa & !mask) | (stepped & mask); - } - let mant_be = padded_mantissa.to_be_bytes(); - debug_assert!( - mant_be[..16 - MANTISSA_BYTES].iter().all(|&b| b == 0), - "padded mantissa overflowed 104 bits", - ); - let mant_field = &mant_be[16 - MANTISSA_BYTES..]; - - // Sign-class handling is folded into a single branchless mask so the - // function executes the same instructions regardless of the input's - // sign. `neg_mask` is `0xFF` for negatives and `0x00` for positives - // (and zero, which lives in the positive sign-class), formed from the - // arithmetic shift of the sign bit and a u8 truncation. - // - // - byte 0: positives want `SIGN_BIT | biased_exp`; negatives want - // `(!biased_exp) & EXP_MASK`. Expressed as one expression: - // (SIGN_BIT & !neg_mask) — keep sign bit only when positive - // | (biased_exp ^ (neg_mask & EXP_MASK)) - // — XOR the 7 exp bits with `neg_mask`, - // which is a no-op for positives and - // a 7-bit complement for negatives. - // - // - mantissa bytes: positives want the bytes unchanged; negatives want - // the bitwise complement. `b ^ neg_mask` does both: XOR with `0x00` - // is a no-op, XOR with `0xFF` is bitwise NOT. - let neg_mask = (raw_mantissa >> 127) as u8; - out[0] = (SIGN_BIT & !neg_mask) | (biased_exp ^ (neg_mask & EXP_MASK)); - for (i, &b) in mant_field.iter().enumerate() { - out[1 + i] = b ^ neg_mask; - } +impl ToOrderableBytes for Decimal { + const ENCODED_LEN: usize = 14; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + let d = self; + let mut out = [0u8; Self::ENCODED_LEN]; + + // The pipeline runs unconditionally — no early return for zero inputs. + // A `d.is_zero()` short-circuit at the top would distinguish zero from + // non-zero plaintexts via timing. Instead we feed zero through the same + // sequence of operations as every other value (the helpers tolerate + // `m == 0` and produce `(significand=0, digits=0, trailing=0)`) and + // canonicalise the resulting byte 0 to the zero plaintext at the end + // via a branchless mask. + // + // We deliberately don't call `Decimal::normalize()` here. `normalize` + // strips trailing zeros from the mantissa via a `while scale > 0` loop + // whose iteration count depends on the secret value's trailing-zero + // count — a timing side channel. Our own `strip_trailing_zeros` already + // strips *all* trailing zeros (a strict superset of what `normalize` + // would remove, since it doesn't stop at scale=0), so the leading-digit + // exponent we compute below is identical whether the input has been + // normalised first or not. Skipping the call removes the leak. + let raw_mantissa = d.mantissa(); + let scale = d.scale() as i32; + // Branchless absolute value via the standard two's-complement identity + // `abs(x) = (x ^ s) - s` where `s` is the arithmetic right-shift of the + // sign bit (`-1` if `x` is negative, `0` otherwise). For positives this + // collapses to `x - 0 = x`; for negatives to `~x + 1 = -x`. Equivalent + // in value to `i128::unsigned_abs`, which compiles to a CMOV on tier-1 + // ISAs but is not language-guaranteed constant-time. The explicit form + // here removes the dependency on optimiser behaviour. + let sign_extension = raw_mantissa >> 127; + let abs_mantissa = ((raw_mantissa ^ sign_extension).wrapping_sub(sign_extension)) as u128; + let (significand, trailing) = strip_trailing_zeros(abs_mantissa); + let digits = digit_count(significand); + + // value = ±significand × 10^trailing × 10^(-scale) + // leading_exp = decimal exponent of the leading significant digit. + // + // For non-zero `Decimal`s `leading_exp` lies in `[-28, 28]`. The + // pipeline also runs for zero inputs (significand = 0, digits = 0, + // trailing = 0), where the formula collapses to `-1 - scale` and + // `leading_exp` lands in `[-29, -1]`; this produces a perfectly valid + // — though arbitrary — non-zero positive plaintext that we'll + // overwrite at the end with the canonical zero. The widened range + // `[-29, 28]` covers both branches without leaking the zero/non-zero + // distinction in debug builds either. + let leading_exp = digits as i32 - 1 + trailing - scale; + debug_assert!( + (-29..=28).contains(&leading_exp), + "leading_exp {} out of bounds — mantissa or scale corrupted", + leading_exp, + ); + let biased_exp = (leading_exp + EXP_BIAS) as u8; + debug_assert!(biased_exp <= EXP_MASK, "biased_exp overflowed 7 bits"); + + // Pad the significand out to 29 decimal digits so same-exponent compares + // across different significand lengths are byte-wise correct. + // + // We can't write this as `significand * 10u128.pow(PADDED_DIGITS - digits)` + // — `u128::pow` is square-and-multiply on the bits of its exponent, with + // both the iteration count and the conditional `acc * base` step driven + // by the exponent value. Since the exponent here is `PADDED_DIGITS − + // digits` and `digits` is derived from the secret mantissa, that would + // leak the digit count via timing. + // + // Instead, run a fixed `PADDED_DIGITS`-iteration loop that multiplies + // `padded_mantissa` by 10 under a branchless mask. The mask is `1` while + // we still have padding to apply (`digits + i < PADDED_DIGITS`) and `0` + // afterwards; the multiplication itself is computed unconditionally each + // iteration so the instruction sequence doesn't depend on `digits`. + // + // No overflow concern: in any iteration where the mask is `1` we have + // `padded_mantissa < 10^(PADDED_DIGITS-1) ≤ 10^28`, so `× 10` stays + // under `10^29`. In iterations where the mask is `0`, `padded_mantissa` + // sits at its final value (≤ `10^29`) and the unstored `× 10` product + // is at most `10^30 ≈ 2^99.7`, well inside `u128`. + let mut padded_mantissa = significand; + for i in 0..PADDED_DIGITS { + let do_step = ((digits + i) < PADDED_DIGITS) as u128; + let mask = 0u128.wrapping_sub(do_step); + let stepped = padded_mantissa.wrapping_mul(10); + padded_mantissa = (padded_mantissa & !mask) | (stepped & mask); + } + let mant_be = padded_mantissa.to_be_bytes(); + debug_assert!( + mant_be[..16 - MANTISSA_BYTES].iter().all(|&b| b == 0), + "padded mantissa overflowed 104 bits", + ); + let mant_field = &mant_be[16 - MANTISSA_BYTES..]; + + // Sign-class handling is folded into a single branchless mask so the + // function executes the same instructions regardless of the input's + // sign. `neg_mask` is `0xFF` for negatives and `0x00` for positives + // (and zero, which lives in the positive sign-class), formed from the + // arithmetic shift of the sign bit and a u8 truncation. + // + // - byte 0: positives want `SIGN_BIT | biased_exp`; negatives want + // `(!biased_exp) & EXP_MASK`. Expressed as one expression: + // (SIGN_BIT & !neg_mask) — keep sign bit only when positive + // | (biased_exp ^ (neg_mask & EXP_MASK)) + // — XOR the 7 exp bits with `neg_mask`, + // which is a no-op for positives and + // a 7-bit complement for negatives. + // + // - mantissa bytes: positives want the bytes unchanged; negatives want + // the bitwise complement. `b ^ neg_mask` does both: XOR with `0x00` + // is a no-op, XOR with `0xFF` is bitwise NOT. + let neg_mask = (raw_mantissa >> 127) as u8; + out[0] = (SIGN_BIT & !neg_mask) | (biased_exp ^ (neg_mask & EXP_MASK)); + for (i, &b) in mant_field.iter().enumerate() { + out[1 + i] = b ^ neg_mask; + } - // Final canonicalisation for the zero plaintext, applied branchlessly - // so the function's timing doesn't reveal whether the input was zero. - // - // The non-zero pipeline ran end-to-end on the zero input too. With - // `significand = 0` the padded mantissa is also `0`, so `out[1..]` is - // already the all-zero canonical zero tail; we only need to fix up - // `out[0]`, which currently holds some valid-looking positive - // `SIGN_BIT | biased_exp` byte. - // - // Build a full-byte mask `zero_mask` that is `0xFF` when `abs_mantissa - // == 0` and `0x00` otherwise: - // - `(x | -x) >> 127` is `1` if `x != 0`, `0` if `x == 0` (standard - // u128 nonzero-detection idiom). - // - XOR with `1` flips it to "is zero". - // - Subtract from `0u8` to broadcast the bit across all 8 bits. - // Then merge: keep `out[0]` for non-zero, replace with `SIGN_BIT` for - // zero. - let mant_nonzero_bit = ((abs_mantissa | abs_mantissa.wrapping_neg()) >> 127) as u8; - let zero_mask = 0u8.wrapping_sub(mant_nonzero_bit ^ 1); - out[0] = (out[0] & !zero_mask) | (SIGN_BIT & zero_mask); - out + // Final canonicalisation for the zero plaintext, applied branchlessly + // so the function's timing doesn't reveal whether the input was zero. + // + // The non-zero pipeline ran end-to-end on the zero input too. With + // `significand = 0` the padded mantissa is also `0`, so `out[1..]` is + // already the all-zero canonical zero tail; we only need to fix up + // `out[0]`, which currently holds some valid-looking positive + // `SIGN_BIT | biased_exp` byte. + // + // Build a full-byte mask `zero_mask` that is `0xFF` when `abs_mantissa + // == 0` and `0x00` otherwise: + // - `(x | -x) >> 127` is `1` if `x != 0`, `0` if `x == 0` (standard + // u128 nonzero-detection idiom). + // - XOR with `1` flips it to "is zero". + // - Subtract from `0u8` to broadcast the bit across all 8 bits. + // Then merge: keep `out[0]` for non-zero, replace with `SIGN_BIT` for + // zero. + let mant_nonzero_bit = ((abs_mantissa | abs_mantissa.wrapping_neg()) >> 127) as u8; + let zero_mask = 0u8.wrapping_sub(mant_nonzero_bit ^ 1); + out[0] = (out[0] & !zero_mask) | (SIGN_BIT & zero_mask); + out + } } /// `5⁻¹ mod 2¹²⁸`. Verified: `5 * INV5 ≡ 1 (mod 2¹²⁸)`. Used to substitute @@ -333,33 +338,33 @@ mod tests { #[test] fn zero_canonicalises_to_sign_bit_only() { - let mut expected = [0u8; ENCODED_LEN]; + let mut expected = [0u8; 14]; expected[0] = SIGN_BIT; - assert_eq!(to_orderable_bytes(&dec!(0)), expected); - assert_eq!(to_orderable_bytes(&dec!(0.0)), expected); - assert_eq!(to_orderable_bytes(&dec!(0.000)), expected); + assert_eq!(dec!(0).to_orderable_bytes(), expected); + assert_eq!(dec!(0.0).to_orderable_bytes(), expected); + assert_eq!(dec!(0.000).to_orderable_bytes(), expected); } #[test] fn negative_zero_canonicalises_with_zero() { let neg_zero = -dec!(0); - assert_eq!(to_orderable_bytes(&neg_zero), to_orderable_bytes(&dec!(0))); + assert_eq!(neg_zero.to_orderable_bytes(), dec!(0).to_orderable_bytes()); } #[test] fn equivalent_forms_canonicalise_identically() { - let one = to_orderable_bytes(&dec!(1)); - assert_eq!(to_orderable_bytes(&dec!(1.0)), one); - assert_eq!(to_orderable_bytes(&dec!(1.00)), one); - assert_eq!(to_orderable_bytes(&dec!(1.000)), one); + let one = dec!(1).to_orderable_bytes(); + assert_eq!(dec!(1.0).to_orderable_bytes(), one); + assert_eq!(dec!(1.00).to_orderable_bytes(), one); + assert_eq!(dec!(1.000).to_orderable_bytes(), one); } #[test] fn integer_trailing_zeros_share_significand_bytes() { // 100 strips to (sig=1, leading_exp=2). Same significand as 1, so the // padded-mantissa region must match. - let one = to_orderable_bytes(&dec!(1)); - let hundred = to_orderable_bytes(&dec!(100)); + let one = dec!(1).to_orderable_bytes(); + let hundred = dec!(100).to_orderable_bytes(); assert_eq!(&one[1..], &hundred[1..]); // Top bit (sign) matches; low 7 bits differ by leading_exp. assert_eq!(one[0] & SIGN_BIT, SIGN_BIT); @@ -370,27 +375,27 @@ mod tests { #[test] fn worked_positive_examples() { - let one = to_orderable_bytes(&dec!(1)); + let one = dec!(1).to_orderable_bytes(); assert_eq!(one[0], SIGN_BIT | (EXP_BIAS as u8)); - let half = to_orderable_bytes(&dec!(0.5)); + let half = dec!(0.5).to_orderable_bytes(); assert_eq!(half[0], SIGN_BIT | ((-1i32 + EXP_BIAS) as u8)); - let ten = to_orderable_bytes(&dec!(10)); + let ten = dec!(10).to_orderable_bytes(); assert_eq!(ten[0], SIGN_BIT | ((1i32 + EXP_BIAS) as u8)); } #[test] fn worked_negative_examples() { - let neg_one = to_orderable_bytes(&dec!(-1)); - let pos_one = to_orderable_bytes(&dec!(1)); + let neg_one = dec!(-1).to_orderable_bytes(); + let pos_one = dec!(1).to_orderable_bytes(); // Negative byte 0: sign bit clear, low 7 bits are inverted exp. assert_eq!(neg_one[0] & SIGN_BIT, 0); assert_eq!(neg_one[0] & EXP_MASK, !(EXP_BIAS as u8) & EXP_MASK); // Negative mantissa bytes are bitwise complements of the positive. - for i in 1..ENCODED_LEN { + for i in 1..::ENCODED_LEN { assert_eq!(neg_one[i], !pos_one[i]); } } @@ -415,8 +420,8 @@ mod tests { Decimal::MAX, ]; for window in values.windows(2) { - let a = to_orderable_bytes(&window[0]); - let b = to_orderable_bytes(&window[1]); + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); assert!( a < b, "to_orderable_bytes({}) < to_orderable_bytes({}) failed", diff --git a/packages/orderable-bytes/src/lib.rs b/packages/orderable-bytes/src/lib.rs index 7248b95..ab27ef3 100644 --- a/packages/orderable-bytes/src/lib.rs +++ b/packages/orderable-bytes/src/lib.rs @@ -2,14 +2,13 @@ //! Canonical, order-preserving fixed-length byte encodings for plaintext //! types. //! -//! Each module exposes a `to_orderable_bytes` function that maps a value of -//! its target type to a fixed-length byte array whose byte-wise -//! lexicographic order agrees with the type's natural total order, and -//! whose byte equality agrees with the type's value equality. The -//! resulting bytes are scheme-agnostic — they're intended for any -//! comparison-as-bytes scheme that wants to preserve plaintext order on -//! ciphertexts (e.g. `ore-rs` BlockORE, an OPE construction, an ordered -//! hash). +//! Each supported type implements [`ToOrderableBytes`], which maps a +//! value to a fixed-length byte array whose byte-wise lexicographic +//! order agrees with the type's natural total order, and whose byte +//! equality agrees with the type's value equality. The resulting bytes +//! are scheme-agnostic — they're intended for any comparison-as-bytes +//! scheme that wants to preserve plaintext order on ciphertexts (e.g. +//! `ore-rs` BlockORE, an OPE construction, an ordered hash). //! //! Encoders are gated behind per-type feature flags so callers only pay //! for the dependencies they actually use. @@ -18,7 +17,43 @@ pub mod chrono; #[cfg(feature = "decimal")] pub mod decimal; +pub mod primitive; #[cfg(test)] #[macro_use] extern crate quickcheck; + +/// Maps a value to its canonical, order-preserving fixed-length byte +/// encoding. +/// +/// Implementors guarantee, for any `a` and `b` of the implementing type: +/// +/// - **Equality:** byte equality of the outputs agrees with the type's +/// value equality (`a.to_orderable_bytes() == b.to_orderable_bytes()` +/// iff `a == b`). +/// - **Order:** byte-wise lexicographic comparison of the outputs agrees +/// with the type's natural total order +/// (`a.to_orderable_bytes() <= b.to_orderable_bytes()` iff `a <= b`). +/// +/// The encoded length is fixed per type and exposed via +/// [`ENCODED_LEN`](Self::ENCODED_LEN). Per-type modules also re-export +/// the same value as a free `pub const` for use in const contexts where +/// naming the impl would be unwieldy. +pub trait ToOrderableBytes { + /// Length, in bytes, of the canonical encoding produced by + /// [`to_orderable_bytes`](Self::to_orderable_bytes). + const ENCODED_LEN: usize; + + /// The fixed-length byte array type returned by + /// [`to_orderable_bytes`](Self::to_orderable_bytes). By convention + /// every impl sets this to `[u8; Self::ENCODED_LEN]`; the + /// indirection through an associated type is only needed because + /// stable Rust does not yet allow naming `[u8; Self::ENCODED_LEN]` + /// directly in a method signature (that requires + /// `feature(generic_const_exprs)`). + type Bytes: AsRef<[u8]>; + + /// Build the canonical, order-preserving fixed-length byte encoding + /// of `self`. + fn to_orderable_bytes(&self) -> Self::Bytes; +} diff --git a/packages/orderable-bytes/src/primitive.rs b/packages/orderable-bytes/src/primitive.rs new file mode 100644 index 0000000..fb085ae --- /dev/null +++ b/packages/orderable-bytes/src/primitive.rs @@ -0,0 +1,634 @@ +//! Canonical, order-preserving fixed-length byte encodings for the +//! primitives `bool`, `char`, `u8`, `i8`, `i16`, `i32`, `i64`, `u128`, +//! `i128`, and the IEEE 754 floats `f32` and `f64`. +//! +//! Each impl emits the type's native byte width — no padding: +//! +//! - `bool`, `u8`, `i8` → `[u8; 1]` +//! - `i16`, `u16` → `[u8; 2]` +//! - `i32`, `u32`, `char`, `f32` → `[u8; 4]` +//! - `i64`, `u64`, `f64` → `[u8; 8]` +//! - `u128`, `i128` → `[u8; 16]` +//! +//! Consumers that need a fixed wider encoding (e.g. an ORE construction +//! whose plaintext block size is `[u8; 8]`) should zero-extend the +//! orderable bytes upstream of the encrypter; widening is monotonic on +//! lex order so it preserves the encoding's guarantees. +//! +//! ## `bool` +//! +//! Encoded as `false → 0x00`, `true → 0x01`. Already in lex order. +//! +//! ## Unsigned integers (`u8`, `u16`, `u32`, `u64`, `u128`) +//! +//! Already in lex order — no sign-flip needed. Native big-endian. +//! +//! ## Signed integers (`i8`, `i16`, `i32`, `i64`, `i128`) +//! +//! Each two's-complement input is mapped to its unsigned equivalent by +//! flipping the sign bit at its native width (`x ^ (1 << (N-1))`), +//! then serialised big-endian. Sign-flipping moves negatives below +//! positives (the sign bit `1` for negatives clears to `0`, vice versa +//! for positives) and preserves order within each sign class. +//! +//! ## `char` +//! +//! Encoded as the big-endian bytes of the underlying `u32` Unicode +//! scalar value (`*self as u32`). Rust's `Ord` impl for `char` compares +//! by code point, and surrogate code points (`U+D800`..=`U+DFFF`) are +//! not representable as `char`, so the native `u32` lex order is +//! exactly the order we need. +//! +//! ## IEEE 754 floats (`f32`, `f64`) +//! +//! Each float is mapped to a lex-orderable unsigned integer of the +//! same width (`u32` for `f32`, `u64` for `f64`) using the standard +//! monotonic encoding: +//! +//! - Negatives flip every bit (their bit pattern's lex order is the +//! reverse of magnitude order, so flipping inverts it). +//! - Positives (and `+0.0`) flip only the sign bit (bringing them above +//! negatives in lex order). +//! +//! `-0.0` is canonicalised to `+0.0` before encoding so the two compare +//! byte-equal — matching `-0.0 == 0.0` for IEEE 754. +//! +//! NaN handling is unspecified. Floats implement `PartialOrd` rather +//! than `Ord` (NaN compares unordered against every value, including +//! itself), so the trait's order/equality guarantees only apply to +//! non-NaN inputs. Different NaN bit patterns will produce different +//! bytes; consumers that need a canonical NaN must canonicalise +//! upstream. + +use crate::ToOrderableBytes; + +impl ToOrderableBytes for bool { + const ENCODED_LEN: usize = 1; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + // `false as u8 == 0`, `true as u8 == 1`. `false` sorts strictly + // below `true`. + [*self as u8] + } +} + +impl ToOrderableBytes for u8 { + const ENCODED_LEN: usize = 1; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + [*self] + } +} + +impl ToOrderableBytes for i8 { + const ENCODED_LEN: usize = 1; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + [(*self as u8) ^ (1u8 << 7)] + } +} + +impl ToOrderableBytes for u16 { + const ENCODED_LEN: usize = 2; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + self.to_be_bytes() + } +} + +impl ToOrderableBytes for i16 { + const ENCODED_LEN: usize = 2; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + ((*self as u16) ^ (1u16 << 15)).to_be_bytes() + } +} + +impl ToOrderableBytes for u32 { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + self.to_be_bytes() + } +} + +impl ToOrderableBytes for i32 { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + ((*self as u32) ^ (1u32 << 31)).to_be_bytes() + } +} + +impl ToOrderableBytes for u64 { + const ENCODED_LEN: usize = 8; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + self.to_be_bytes() + } +} + +impl ToOrderableBytes for i64 { + const ENCODED_LEN: usize = 8; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + ((*self as u64) ^ (1u64 << 63)).to_be_bytes() + } +} + +impl ToOrderableBytes for u128 { + const ENCODED_LEN: usize = 16; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + self.to_be_bytes() + } +} + +impl ToOrderableBytes for i128 { + const ENCODED_LEN: usize = 16; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + ((*self as u128) ^ (1u128 << 127)).to_be_bytes() + } +} + +impl ToOrderableBytes for char { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + (*self as u32).to_be_bytes() + } +} + +impl ToOrderableBytes for f32 { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + // Canonicalise -0.0 → 0.0 so the two share one byte encoding + // (their f32 equality demands byte equality under our contract). + let value = if *self == -0.0 { 0.0 } else { *self }; + let bits = value.to_bits(); + // Branchless monotonic mapping (see `f64` impl for derivation). + let sign_extension = (bits as i32 >> 31) as u32; + let mask = sign_extension | (1u32 << 31); + (bits ^ mask).to_be_bytes() + } +} + +impl ToOrderableBytes for f64 { + const ENCODED_LEN: usize = 8; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + // Canonicalise -0.0 → 0.0 so the two share one byte encoding + // (their f64 equality demands byte equality under our contract). + let value = if *self == -0.0 { 0.0 } else { *self }; + let bits = value.to_bits(); + // Branchless monotonic mapping. `sign_extension` is `u64::MAX` + // when the input is negative (sign bit `1`) and `0` when + // positive. ORing in `1 << 63` makes the mask `u64::MAX` for + // negatives (XOR-flip every bit) and `1 << 63` for positives + // (XOR-flip just the sign bit). + let sign_extension = (bits as i64 >> 63) as u64; + let mask = sign_extension | (1u64 << 63); + (bits ^ mask).to_be_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // --- bool --- + + #[test] + fn bool_known_anchors() { + assert_eq!(false.to_orderable_bytes(), [0x00]); + assert_eq!(true.to_orderable_bytes(), [0x01]); + } + + #[test] + fn bool_byte_order_matches_natural_order() { + assert!(false.to_orderable_bytes() < true.to_orderable_bytes()); + } + + // --- u8 --- + + #[test] + fn u8_known_anchors() { + // Native: u8 is already in lex order, no transform. + assert_eq!(u8::MIN.to_orderable_bytes(), [0x00]); + assert_eq!(0x42u8.to_orderable_bytes(), [0x42]); + assert_eq!(u8::MAX.to_orderable_bytes(), [0xFF]); + } + + #[test] + fn u8_byte_order_matches_natural_order() { + let ascending = [u8::MIN, 1, 100, 200, u8::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- i8 --- + + #[test] + fn i8_known_anchors() { + // Sign-flip at u8 (XOR 0x80) so MIN→0x00, 0→0x80, MAX→0xFF. + assert_eq!(i8::MIN.to_orderable_bytes(), [0x00]); + assert_eq!(0i8.to_orderable_bytes(), [0x80]); + assert_eq!(i8::MAX.to_orderable_bytes(), [0xFF]); + } + + #[test] + fn i8_byte_order_matches_natural_order() { + let ascending = [i8::MIN, -100, -1, 0, 1, 100, i8::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- u16 --- + + #[test] + fn u16_known_anchors() { + assert_eq!(u16::MIN.to_orderable_bytes(), [0x00, 0x00]); + assert_eq!(u16::MAX.to_orderable_bytes(), [0xFF, 0xFF]); + assert_eq!(0x1234u16.to_orderable_bytes(), [0x12, 0x34]); + } + + #[test] + fn u16_byte_order_matches_natural_order() { + let ascending = [u16::MIN, 1, 256, 10000, u16::MAX - 1, u16::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- i16 --- + + #[test] + fn i16_known_anchors() { + // Sign-flip at u16 (XOR 0x8000), then BE. + assert_eq!(i16::MIN.to_orderable_bytes(), [0x00, 0x00]); + assert_eq!(0i16.to_orderable_bytes(), [0x80, 0x00]); + assert_eq!(i16::MAX.to_orderable_bytes(), [0xFF, 0xFF]); + } + + #[test] + fn i16_byte_order_matches_natural_order() { + let ascending = [i16::MIN, -10000, -1, 0, 1, 10000, i16::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- u32 --- + + #[test] + fn u32_known_anchors() { + assert_eq!(u32::MIN.to_orderable_bytes(), [0x00; 4]); + assert_eq!(u32::MAX.to_orderable_bytes(), [0xFF; 4]); + assert_eq!( + 0x1234_5678u32.to_orderable_bytes(), + [0x12, 0x34, 0x56, 0x78] + ); + } + + #[test] + fn u32_byte_order_matches_natural_order() { + let ascending = [ + u32::MIN, + 1, + 1 << 8, + 1 << 16, + 1 << 24, + u32::MAX - 1, + u32::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- i32 --- + + #[test] + fn i32_known_anchors() { + // Sign-flip at u32 (XOR 0x8000_0000), then BE. + assert_eq!(i32::MIN.to_orderable_bytes(), [0x00, 0x00, 0x00, 0x00]); + assert_eq!(0i32.to_orderable_bytes(), [0x80, 0x00, 0x00, 0x00]); + assert_eq!(i32::MAX.to_orderable_bytes(), [0xFF, 0xFF, 0xFF, 0xFF]); + } + + #[test] + fn i32_byte_order_matches_natural_order() { + let ascending = [i32::MIN, -1_000_000_000, -1, 0, 1, 1_000_000_000, i32::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- u64 --- + + #[test] + fn u64_known_anchors() { + assert_eq!(u64::MIN.to_orderable_bytes(), [0x00; 8]); + assert_eq!(u64::MAX.to_orderable_bytes(), [0xFF; 8]); + let one = 1u64.to_orderable_bytes(); + let mut expected_one = [0u8; 8]; + expected_one[7] = 1; + assert_eq!(one, expected_one); + } + + #[test] + fn u64_byte_order_matches_natural_order() { + let ascending = [ + u64::MIN, + 1, + 1 << 16, + 1 << 32, + 1 << 48, + u64::MAX - 1, + u64::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- i64 --- + + #[test] + fn i64_known_anchors() { + assert_eq!(i64::MIN.to_orderable_bytes(), [0x00; 8]); + assert_eq!( + 0i64.to_orderable_bytes(), + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] + ); + assert_eq!(i64::MAX.to_orderable_bytes(), [0xFF; 8]); + } + + #[test] + fn i64_byte_order_matches_natural_order() { + let ascending = [ + i64::MIN, + -1_000_000_000_000, + -1, + 0, + 1, + 1_000_000_000_000, + i64::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- u128 --- + + #[test] + fn u128_known_anchors() { + assert_eq!(u128::MIN.to_orderable_bytes(), [0; 16]); + assert_eq!(u128::MAX.to_orderable_bytes(), [0xFF; 16]); + let one = 1u128.to_orderable_bytes(); + let mut expected_one = [0u8; 16]; + expected_one[15] = 1; + assert_eq!(one, expected_one); + } + + #[test] + fn u128_byte_order_matches_natural_order() { + let ascending = [ + u128::MIN, + 1, + (1u128 << 32), + (1u128 << 64), + (1u128 << 96), + u128::MAX - 1, + u128::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- i128 --- + + #[test] + fn i128_known_anchors() { + assert_eq!(i128::MIN.to_orderable_bytes(), [0; 16]); + assert_eq!(i128::MAX.to_orderable_bytes(), [0xFF; 16]); + let mut expected_zero = [0u8; 16]; + expected_zero[0] = 0x80; + assert_eq!(0i128.to_orderable_bytes(), expected_zero); + } + + #[test] + fn i128_byte_order_matches_natural_order() { + let ascending = [ + i128::MIN, + -(1i128 << 96), + -(1i128 << 64), + -1, + 0, + 1, + (1i128 << 64), + (1i128 << 96), + i128::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- char --- + + #[test] + fn char_known_anchors() { + // 'A' = U+0041 = 0x0000_0041 BE. + assert_eq!('A'.to_orderable_bytes(), [0x00, 0x00, 0x00, 0x41]); + // '\0' = U+0000 (lowest code point). + assert_eq!('\0'.to_orderable_bytes(), [0x00, 0x00, 0x00, 0x00]); + // char::MAX = U+10FFFF (highest valid scalar value). + assert_eq!(char::MAX.to_orderable_bytes(), [0x00, 0x10, 0xFF, 0xFF]); + } + + #[test] + fn char_byte_order_matches_natural_order() { + // Spans ASCII, BMP, and supplementary planes (above the surrogate gap). + let ascending = [ + '\0', + '0', + 'A', + 'a', + '\u{7F}', + '\u{D7FF}', + '\u{E000}', + '\u{1F600}', + char::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{:?} < {:?} failed", + window[0], + window[1] + ); + } + } + + // --- f32 --- + + #[test] + fn f32_zero_canonical_bytes() { + // +0.0 → 0x8000_0000 (sign-bit-only flip on all-zero bits). + assert_eq!(0.0f32.to_orderable_bytes(), [0x80, 0x00, 0x00, 0x00]); + } + + #[test] + fn f32_negative_zero_canonicalises_with_zero() { + assert_eq!((-0.0f32).to_orderable_bytes(), 0.0f32.to_orderable_bytes()); + } + + #[test] + fn f32_byte_order_matches_natural_order() { + let ascending = [ + f32::NEG_INFINITY, + f32::MIN, + -1e30, + -1.0, + -f32::MIN_POSITIVE, + 0.0, + f32::MIN_POSITIVE, + 1.0, + 1e30, + f32::MAX, + f32::INFINITY, + ]; + for window in ascending.windows(2) { + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); + assert!(a < b, "{} < {} failed", window[0], window[1]); + } + } + + #[test] + fn f32_subnormals_sort_above_zero_below_normals() { + // Smallest positive subnormal (`f32::from_bits(1)`) must land + // strictly between 0.0 and the smallest positive normal. + let subnormal = f32::from_bits(1); + assert!(0.0f32.to_orderable_bytes() < subnormal.to_orderable_bytes()); + assert!(subnormal.to_orderable_bytes() < f32::MIN_POSITIVE.to_orderable_bytes()); + } + + // --- f64 --- + + #[test] + fn f64_zero_canonical_bytes() { + // +0.0 → 0x8000_0000_0000_0000 (sign-bit-only flip on all-zero bits). + assert_eq!( + 0.0f64.to_orderable_bytes(), + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] + ); + } + + #[test] + fn f64_negative_zero_canonicalises_with_zero() { + assert_eq!((-0.0f64).to_orderable_bytes(), 0.0f64.to_orderable_bytes()); + } + + #[test] + fn f64_byte_order_matches_natural_order() { + let ascending = [ + f64::NEG_INFINITY, + f64::MIN, + -1e100, + -1.0, + -f64::MIN_POSITIVE, + 0.0, + f64::MIN_POSITIVE, + 1.0, + 1e100, + f64::MAX, + f64::INFINITY, + ]; + for window in ascending.windows(2) { + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); + assert!(a < b, "{} < {} failed", window[0], window[1]); + } + } + + #[test] + fn f64_subnormals_sort_above_zero_below_normals() { + // Smallest positive subnormal (`f64::from_bits(1)`) must land + // strictly between 0.0 and the smallest positive normal. + let subnormal = f64::from_bits(1); + assert!(0.0f64.to_orderable_bytes() < subnormal.to_orderable_bytes()); + assert!(subnormal.to_orderable_bytes() < f64::MIN_POSITIVE.to_orderable_bytes()); + } +} diff --git a/packages/ore-rs/src/chrono.rs b/packages/ore-rs/src/chrono.rs index 6ecaf13..06d770e 100644 --- a/packages/ore-rs/src/chrono.rs +++ b/packages/ore-rs/src/chrono.rs @@ -11,24 +11,21 @@ use crate::ciphertext::*; use crate::{OreCipher, OreEncrypt, OreError}; use ::chrono::{DateTime, NaiveDate, Utc}; +use orderable_bytes::ToOrderableBytes; -const NAIVE_DATE_LEN: usize = orderable_bytes::chrono::naive_date::ENCODED_LEN; -const DATETIME_UTC_LEN: usize = orderable_bytes::chrono::datetime_utc::ENCODED_LEN; +const NAIVE_DATE_LEN: usize = ::ENCODED_LEN; +const DATETIME_UTC_LEN: usize = as ToOrderableBytes>::ENCODED_LEN; impl OreEncrypt for NaiveDate { type LeftOutput = Left; type FullOutput = CipherText; fn encrypt_left(&self, cipher: &T) -> Result { - cipher.encrypt_left(&orderable_bytes::chrono::naive_date::to_orderable_bytes( - self, - )) + cipher.encrypt_left(&self.to_orderable_bytes()) } fn encrypt(&self, cipher: &T) -> Result { - cipher.encrypt(&orderable_bytes::chrono::naive_date::to_orderable_bytes( - self, - )) + cipher.encrypt(&self.to_orderable_bytes()) } } @@ -37,15 +34,11 @@ impl OreEncrypt for DateTime { type FullOutput = CipherText; fn encrypt_left(&self, cipher: &T) -> Result { - cipher.encrypt_left(&orderable_bytes::chrono::datetime_utc::to_orderable_bytes( - self, - )) + cipher.encrypt_left(&self.to_orderable_bytes()) } fn encrypt(&self, cipher: &T) -> Result { - cipher.encrypt(&orderable_bytes::chrono::datetime_utc::to_orderable_bytes( - self, - )) + cipher.encrypt(&self.to_orderable_bytes()) } } diff --git a/packages/ore-rs/src/convert.rs b/packages/ore-rs/src/convert.rs deleted file mode 100644 index 1741340..0000000 --- a/packages/ore-rs/src/convert.rs +++ /dev/null @@ -1,72 +0,0 @@ -/* - This following function implement an order-preserving translation of 64 bit - floats to 64 bit doubles (and the reverse operation - although that is just - used for verifying correctness). - - The 64 bit integer that is produced is a plaintext that will be ORE encrypted - later on. - - The mapping is such that the ordering of the floats will be preserved when - mapped to an unsigned integer, for example, an array of unsigned integers - dervived from a sorted array of doubles will result in no change to its - ordering when it itself is sorted. - - The mapping does not preserve any notion of the previous value after the - conversion - only ordering is preserved. - - Caveat: NaN and -ve & +ve infinity will also be mapped and ordering is not - well-defined with those values. Those values should be discarded before - converting arrays of those values. - - This post was used as a reference for building this implementation: - https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order -*/ - -pub(crate) trait ToOrderedInteger { - fn map_to(&self) -> T; -} - -#[allow(dead_code)] -trait FromOrderedInteger { - fn map_from(input: T) -> Self; -} - -impl ToOrderedInteger for f64 { - fn map_to(&self) -> u64 { - // Canonicalise -0.0 to +0.0 so equal-comparing floats produce equal - // ciphertexts (IEEE-754: -0.0 == +0.0). - let num: u64 = if *self == 0.0 { 0 } else { self.to_bits() }; - // `num >> 63` is the sign bit (0 or 1). Cast to i64 (no bit change, - // both 0 and 1 fit), negate (→ 0 or -1), reinterpret as u64 (→ 0 or - // 0xFFFF_FFFF_FFFF_FFFF, the broadcast sign mask). Equivalent to - // the previous `mem::transmute` pair but `as`-cast safe. - let signed: i64 = -((num >> 63) as i64); - let mut mask: u64 = signed as u64; - mask |= 0x8000000000000000; - num ^ mask - } -} - -impl FromOrderedInteger for f64 { - fn map_from(input: u64) -> f64 { - let i = (((input >> 63) as i64) - 1) as u64; - let mask: u64 = i | 0x8000000000000000; - f64::from_bits(input ^ mask) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use quickcheck::TestResult; - - quickcheck! { - fn roundtrip(x: f64) -> TestResult { - if !x.is_nan() && x.is_finite() { - TestResult::from_bool(x == f64::map_from(x.map_to())) - } else { - TestResult::discard() - } - } - } -} diff --git a/packages/ore-rs/src/decimal.rs b/packages/ore-rs/src/decimal.rs index 8a07f06..2253bde 100644 --- a/packages/ore-rs/src/decimal.rs +++ b/packages/ore-rs/src/decimal.rs @@ -10,19 +10,21 @@ use crate::ciphertext::{CipherText, Left}; use crate::encrypt::OreEncrypt; use crate::{OreCipher, OreError}; -use orderable_bytes::decimal::{to_orderable_bytes, ENCODED_LEN}; +use orderable_bytes::ToOrderableBytes; use rust_decimal::Decimal; +const ENCODED_LEN: usize = ::ENCODED_LEN; + impl OreEncrypt for Decimal { type LeftOutput = Left; type FullOutput = CipherText; fn encrypt_left(&self, cipher: &T) -> Result { - cipher.encrypt_left(&to_orderable_bytes(self)) + cipher.encrypt_left(&self.to_orderable_bytes()) } fn encrypt(&self, cipher: &T) -> Result { - cipher.encrypt(&to_orderable_bytes(self)) + cipher.encrypt(&self.to_orderable_bytes()) } } diff --git a/packages/ore-rs/src/encrypt.rs b/packages/ore-rs/src/encrypt.rs index e9517e5..ed37ddb 100644 --- a/packages/ore-rs/src/encrypt.rs +++ b/packages/ore-rs/src/encrypt.rs @@ -1,17 +1,19 @@ use crate::ciphertext::*; -use crate::convert::ToOrderedInteger; use crate::PlainText; use crate::{OreCipher, OreError}; +use orderable_bytes::ToOrderableBytes; /// Type-directed entry point for encrypting plaintext values with a given /// [`OreCipher`]. /// /// Each implementation knows how to canonicalise its target type into the -/// fixed-size byte plaintext expected by the cipher (e.g. big-endian bytes -/// for `u64`, an order-preserving 8-byte mapping for `f64`, the 14-byte -/// scientific-form encoding for `Decimal`). The associated output types -/// pin the resulting ciphertext shape, with `LeftOutput` the query-only -/// half and `FullOutput` the full ciphertext suitable for storage. +/// fixed-size byte plaintext expected by the cipher. For primitives and +/// the `chrono` / `decimal` value types the canonicalisation is delegated +/// to [`orderable_bytes::ToOrderableBytes`], which guarantees the encoded +/// bytes preserve the type's natural total order under lexicographic +/// comparison. The associated output types pin the resulting ciphertext +/// shape, with `LeftOutput` the query-only half and `FullOutput` the full +/// ciphertext suitable for storage. pub trait OreEncrypt { /// Output type produced by [`encrypt_left`](Self::encrypt_left). type LeftOutput: OreOutput; @@ -26,59 +28,58 @@ pub trait OreEncrypt { fn encrypt(&self, input: &T) -> Result; } -impl OreEncrypt for u64 { - /* Note that Rust currently doesn't allow - * generic associated types so this ia a bit verbose! */ - type LeftOutput = Left; - type FullOutput = CipherText; +// `Left` and `CipherText` need a const-generic `N` known at +// the type level. Stable Rust can't accept `::ENCODED_LEN` +// directly in that position (it would require `generic_const_exprs`), so +// we lift each primitive's encoded length into a free `const` and name +// that const in the associated type. Same idiom as `chrono.rs` / +// `decimal.rs`. +const BOOL_LEN: usize = ::ENCODED_LEN; +const U8_LEN: usize = ::ENCODED_LEN; +const I8_LEN: usize = ::ENCODED_LEN; +const U16_LEN: usize = ::ENCODED_LEN; +const I16_LEN: usize = ::ENCODED_LEN; +const U32_LEN: usize = ::ENCODED_LEN; +const I32_LEN: usize = ::ENCODED_LEN; +const U64_LEN: usize = ::ENCODED_LEN; +const I64_LEN: usize = ::ENCODED_LEN; +const U128_LEN: usize = ::ENCODED_LEN; +const I128_LEN: usize = ::ENCODED_LEN; +const CHAR_LEN: usize = ::ENCODED_LEN; +const F32_LEN: usize = ::ENCODED_LEN; +const F64_LEN: usize = ::ENCODED_LEN; - fn encrypt_left(&self, cipher: &T) -> Result - where - T::LeftBlockType: CipherTextBlock, - { - let bytes = self.to_be_bytes(); - cipher.encrypt_left(&bytes) - } - - fn encrypt(&self, cipher: &T) -> Result - where - T::LeftBlockType: CipherTextBlock, - T::RightBlockType: CipherTextBlock, - { - let bytes = self.to_be_bytes(); - cipher.encrypt(&bytes) - } -} +macro_rules! impl_ore_encrypt_via_orderable_bytes { + ($type:ty, $len_const:ident) => { + impl OreEncrypt for $type { + type LeftOutput = Left; + type FullOutput = CipherText; -impl OreEncrypt for u32 { - type LeftOutput = Left; - type FullOutput = CipherText; + fn encrypt_left(&self, cipher: &T) -> Result { + cipher.encrypt_left(&self.to_orderable_bytes()) + } - fn encrypt_left(&self, cipher: &T) -> Result { - let bytes = self.to_be_bytes(); - cipher.encrypt_left(&bytes) - } - - fn encrypt(&self, cipher: &T) -> Result { - let bytes = self.to_be_bytes(); - cipher.encrypt(&bytes) - } + fn encrypt(&self, cipher: &T) -> Result { + cipher.encrypt(&self.to_orderable_bytes()) + } + } + }; } -impl OreEncrypt for f64 { - type LeftOutput = Left; - type FullOutput = CipherText; - - fn encrypt_left(&self, cipher: &T) -> Result { - let plaintext: u64 = self.map_to(); - plaintext.encrypt_left(cipher) - } - - fn encrypt(&self, cipher: &T) -> Result { - let plaintext: u64 = self.map_to(); - plaintext.encrypt(cipher) - } -} +impl_ore_encrypt_via_orderable_bytes!(bool, BOOL_LEN); +impl_ore_encrypt_via_orderable_bytes!(u8, U8_LEN); +impl_ore_encrypt_via_orderable_bytes!(i8, I8_LEN); +impl_ore_encrypt_via_orderable_bytes!(u16, U16_LEN); +impl_ore_encrypt_via_orderable_bytes!(i16, I16_LEN); +impl_ore_encrypt_via_orderable_bytes!(u32, U32_LEN); +impl_ore_encrypt_via_orderable_bytes!(i32, I32_LEN); +impl_ore_encrypt_via_orderable_bytes!(u64, U64_LEN); +impl_ore_encrypt_via_orderable_bytes!(i64, I64_LEN); +impl_ore_encrypt_via_orderable_bytes!(u128, U128_LEN); +impl_ore_encrypt_via_orderable_bytes!(i128, I128_LEN); +impl_ore_encrypt_via_orderable_bytes!(char, CHAR_LEN); +impl_ore_encrypt_via_orderable_bytes!(f32, F32_LEN); +impl_ore_encrypt_via_orderable_bytes!(f64, F64_LEN); impl OreEncrypt for PlainText { type LeftOutput = Left; diff --git a/packages/ore-rs/src/lib.rs b/packages/ore-rs/src/lib.rs index 6640949..09e2af5 100644 --- a/packages/ore-rs/src/lib.rs +++ b/packages/ore-rs/src/lib.rs @@ -139,7 +139,6 @@ #[cfg(feature = "chrono")] mod chrono; mod ciphertext; -mod convert; #[cfg(feature = "decimal")] mod decimal; mod encrypt;