From 64a8b1c4a7e2ffa763959ceedc9143005fb68570 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 4 May 2026 15:34:56 +1000 Subject: [PATCH 01/11] refactor(orderable-bytes): introduce ToOrderableBytes trait Replace the per-type free `to_orderable_bytes` functions with impls of a new top-level `ToOrderableBytes` trait that exposes the encoded length as an associated `const ENCODED_LEN` and the byte array as an associated `type Bytes: AsRef<[u8]>`. Migrate the in-tree `ore-rs` consumer to the trait API. --- packages/orderable-bytes/src/chrono.rs | 65 +++-- packages/orderable-bytes/src/decimal.rs | 323 ++++++++++++------------ packages/orderable-bytes/src/lib.rs | 50 +++- packages/ore-rs/src/chrono.rs | 21 +- packages/ore-rs/src/decimal.rs | 8 +- 5 files changed, 256 insertions(+), 211 deletions(-) diff --git a/packages/orderable-bytes/src/chrono.rs b/packages/orderable-bytes/src/chrono.rs index a0f05d8..233ac34 100644 --- a/packages/orderable-bytes/src/chrono.rs +++ b/packages/orderable-bytes/src/chrono.rs @@ -1,20 +1,18 @@ //! Canonical, order-preserving fixed-length byte encodings for the //! `chrono` types `NaiveDate` and `DateTime`. //! -//! Each submodule exposes a `to_orderable_bytes` function and an -//! `ENCODED_LEN` constant. The bytes returned have the property that -//! byte-wise lex comparison agrees with chronological ordering (and byte -//! equality with value equality), so any comparison-as-bytes scheme -//! (`ore-rs` BlockORE, OPE, an ordered hash) inherits those properties on -//! the resulting digest. +//! Each submodule exposes an `ENCODED_LEN` constant and an +//! [`crate::ToOrderableBytes`] impl on its target type. The bytes +//! returned have the property that byte-wise lex comparison agrees with +//! chronological ordering (and byte equality with value equality), so +//! any comparison-as-bytes scheme (`ore-rs` BlockORE, OPE, an ordered +//! hash) inherits those properties on the resulting digest. /// Order-preserving byte encoding for [`::chrono::NaiveDate`]. pub mod naive_date { + use crate::ToOrderableBytes; use ::chrono::{Datelike, NaiveDate}; - /// Number of bytes in the canonical orderable-bytes form. - pub const ENCODED_LEN: usize = 4; - /// Build the canonical, order-preserving byte encoding of a `NaiveDate`. /// /// `NaiveDate::num_days_from_ce()` returns an `i32` whose ordering @@ -22,9 +20,14 @@ pub mod naive_date { /// `1u32 << 31`) preserves order while making the value unsigned, then /// big-endian byte serialisation gives a 4-byte sequence whose lex /// order matches the natural date order. - pub fn to_orderable_bytes(d: &NaiveDate) -> [u8; ENCODED_LEN] { - let biased = (d.num_days_from_ce() as u32) ^ (1u32 << 31); - biased.to_be_bytes() + impl ToOrderableBytes for NaiveDate { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + let biased = (self.num_days_from_ce() as u32) ^ (1u32 << 31); + biased.to_be_bytes() + } } #[cfg(test)] @@ -38,7 +41,7 @@ pub mod naive_date { #[test] fn year_one_biases_to_known_u32() { // Year 1 day 1 has num_days_from_ce = 1 ⇒ sign-flipped u32 = 0x8000_0001. - assert_eq!(to_orderable_bytes(&ymd(1, 1, 1)), [0x80, 0x00, 0x00, 0x01]); + assert_eq!(ymd(1, 1, 1).to_orderable_bytes(), [0x80, 0x00, 0x00, 0x01]); } #[test] @@ -55,8 +58,8 @@ pub mod naive_date { NaiveDate::MAX, ]; for window in ascending.windows(2) { - let a = to_orderable_bytes(&window[0]); - let b = to_orderable_bytes(&window[1]); + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); assert!( a < b, "to_orderable_bytes({}) < to_orderable_bytes({}) failed", @@ -70,9 +73,12 @@ pub mod naive_date { /// Order-preserving byte encoding for [`::chrono::DateTime<::chrono::Utc>`]. pub mod datetime_utc { + use crate::ToOrderableBytes; use ::chrono::{DateTime, Utc}; - /// Number of bytes in the canonical orderable-bytes form. + /// Number of bytes in the canonical orderable-bytes form. Mirrors + /// ` as ToOrderableBytes>::ENCODED_LEN` for use in + /// const contexts that can't easily name the trait impl. pub const ENCODED_LEN: usize = 12; /// Build the canonical, order-preserving byte encoding of a @@ -86,14 +92,19 @@ pub mod datetime_utc { /// whole second. `timestamp_subsec_nanos` returns values in /// `0..2_000_000_000` (the upper half encodes leap-second moments), /// which fits in `u32` and preserves chronological order. - pub fn to_orderable_bytes(dt: &DateTime) -> [u8; ENCODED_LEN] { - let secs = dt.timestamp(); - let nanos = dt.timestamp_subsec_nanos(); - let secs_biased = (secs as u64) ^ (1u64 << 63); - let mut out = [0u8; ENCODED_LEN]; - out[..8].copy_from_slice(&secs_biased.to_be_bytes()); - out[8..].copy_from_slice(&nanos.to_be_bytes()); - out + impl ToOrderableBytes for DateTime { + const ENCODED_LEN: usize = ENCODED_LEN; + type Bytes = [u8; ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; ENCODED_LEN] { + let secs = self.timestamp(); + let nanos = self.timestamp_subsec_nanos(); + let secs_biased = (secs as u64) ^ (1u64 << 63); + let mut out = [0u8; ENCODED_LEN]; + out[..8].copy_from_slice(&secs_biased.to_be_bytes()); + out[8..].copy_from_slice(&nanos.to_be_bytes()); + out + } } #[cfg(test)] @@ -110,7 +121,7 @@ pub mod datetime_utc { // 1970-01-01T00:00:00Z: timestamp = 0, subsec = 0. Sign-flip on // `0_i64` gives `0x8000_0000_0000_0000`. assert_eq!( - to_orderable_bytes(&dt(0, 0)), + dt(0, 0).to_orderable_bytes(), [0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ); } @@ -132,8 +143,8 @@ pub mod datetime_utc { DateTime::::MAX_UTC, ]; for window in ascending.windows(2) { - let a = to_orderable_bytes(&window[0]); - let b = to_orderable_bytes(&window[1]); + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); assert!( a < b, "to_orderable_bytes({}) < to_orderable_bytes({}) failed", diff --git a/packages/orderable-bytes/src/decimal.rs b/packages/orderable-bytes/src/decimal.rs index 7cab404..9b54dd9 100644 --- a/packages/orderable-bytes/src/decimal.rs +++ b/packages/orderable-bytes/src/decimal.rs @@ -60,17 +60,16 @@ //! //! ## Constant-time //! -//! `to_orderable_bytes` is straight-line code with fixed-iteration loops -//! and branchless mask arithmetic. It does not call `Decimal::normalize` -//! (which loops while `scale > 0`) and does not branch on sign or -//! zero-ness. Timing does not distinguish the input's sign, zero-ness, -//! digit count, trailing-zero count, or scale. - +//! [`::to_orderable_bytes`](crate::ToOrderableBytes::to_orderable_bytes) +//! is straight-line code with fixed-iteration loops and branchless mask +//! arithmetic. It does not call `Decimal::normalize` (which loops while +//! `scale > 0`) and does not branch on sign or zero-ness. Timing does +//! not distinguish the input's sign, zero-ness, digit count, +//! trailing-zero count, or scale. + +use crate::ToOrderableBytes; use rust_decimal::Decimal; -/// Number of bytes in the canonical orderable-bytes form. -pub const ENCODED_LEN: usize = 14; - /// Width of the padded-significand field in bytes (13 bytes = 104 bits). const MANTISSA_BYTES: usize = 13; @@ -94,138 +93,144 @@ const EXP_MASK: u8 = 0x7F; /// Build the canonical, order-preserving fixed-length byte encoding of a /// `Decimal`. Two `Decimal`s that compare equal under `Decimal::cmp` /// produce identical byte arrays. -pub fn to_orderable_bytes(d: &Decimal) -> [u8; ENCODED_LEN] { - let mut out = [0u8; ENCODED_LEN]; - - // The pipeline runs unconditionally — no early return for zero inputs. - // A `d.is_zero()` short-circuit at the top would distinguish zero from - // non-zero plaintexts via timing. Instead we feed zero through the same - // sequence of operations as every other value (the helpers tolerate - // `m == 0` and produce `(significand=0, digits=0, trailing=0)`) and - // canonicalise the resulting byte 0 to the zero plaintext at the end - // via a branchless mask. - // - // We deliberately don't call `Decimal::normalize()` here. `normalize` - // strips trailing zeros from the mantissa via a `while scale > 0` loop - // whose iteration count depends on the secret value's trailing-zero - // count — a timing side channel. Our own `strip_trailing_zeros` already - // strips *all* trailing zeros (a strict superset of what `normalize` - // would remove, since it doesn't stop at scale=0), so the leading-digit - // exponent we compute below is identical whether the input has been - // normalised first or not. Skipping the call removes the leak. - let raw_mantissa = d.mantissa(); - let scale = d.scale() as i32; - // Branchless absolute value via the standard two's-complement identity - // `abs(x) = (x ^ s) - s` where `s` is the arithmetic right-shift of the - // sign bit (`-1` if `x` is negative, `0` otherwise). For positives this - // collapses to `x - 0 = x`; for negatives to `~x + 1 = -x`. Equivalent - // in value to `i128::unsigned_abs`, which compiles to a CMOV on tier-1 - // ISAs but is not language-guaranteed constant-time. The explicit form - // here removes the dependency on optimiser behaviour. - let sign_extension = raw_mantissa >> 127; - let abs_mantissa = ((raw_mantissa ^ sign_extension).wrapping_sub(sign_extension)) as u128; - let (significand, trailing) = strip_trailing_zeros(abs_mantissa); - let digits = digit_count(significand); - - // value = ±significand × 10^trailing × 10^(-scale) - // leading_exp = decimal exponent of the leading significant digit. - // - // For non-zero `Decimal`s `leading_exp` lies in `[-28, 28]`. The - // pipeline also runs for zero inputs (significand = 0, digits = 0, - // trailing = 0), where the formula collapses to `-1 - scale` and - // `leading_exp` lands in `[-29, -1]`; this produces a perfectly valid - // — though arbitrary — non-zero positive plaintext that we'll - // overwrite at the end with the canonical zero. The widened range - // `[-29, 28]` covers both branches without leaking the zero/non-zero - // distinction in debug builds either. - let leading_exp = digits as i32 - 1 + trailing - scale; - debug_assert!( - (-29..=28).contains(&leading_exp), - "leading_exp {} out of bounds — mantissa or scale corrupted", - leading_exp, - ); - let biased_exp = (leading_exp + EXP_BIAS) as u8; - debug_assert!(biased_exp <= EXP_MASK, "biased_exp overflowed 7 bits"); - - // Pad the significand out to 29 decimal digits so same-exponent compares - // across different significand lengths are byte-wise correct. - // - // We can't write this as `significand * 10u128.pow(PADDED_DIGITS - digits)` - // — `u128::pow` is square-and-multiply on the bits of its exponent, with - // both the iteration count and the conditional `acc * base` step driven - // by the exponent value. Since the exponent here is `PADDED_DIGITS − - // digits` and `digits` is derived from the secret mantissa, that would - // leak the digit count via timing. - // - // Instead, run a fixed `PADDED_DIGITS`-iteration loop that multiplies - // `padded_mantissa` by 10 under a branchless mask. The mask is `1` while - // we still have padding to apply (`digits + i < PADDED_DIGITS`) and `0` - // afterwards; the multiplication itself is computed unconditionally each - // iteration so the instruction sequence doesn't depend on `digits`. - // - // No overflow concern: in any iteration where the mask is `1` we have - // `padded_mantissa < 10^(PADDED_DIGITS-1) ≤ 10^28`, so `× 10` stays - // under `10^29`. In iterations where the mask is `0`, `padded_mantissa` - // sits at its final value (≤ `10^29`) and the unstored `× 10` product - // is at most `10^30 ≈ 2^99.7`, well inside `u128`. - let mut padded_mantissa = significand; - for i in 0..PADDED_DIGITS { - let do_step = ((digits + i) < PADDED_DIGITS) as u128; - let mask = 0u128.wrapping_sub(do_step); - let stepped = padded_mantissa.wrapping_mul(10); - padded_mantissa = (padded_mantissa & !mask) | (stepped & mask); - } - let mant_be = padded_mantissa.to_be_bytes(); - debug_assert!( - mant_be[..16 - MANTISSA_BYTES].iter().all(|&b| b == 0), - "padded mantissa overflowed 104 bits", - ); - let mant_field = &mant_be[16 - MANTISSA_BYTES..]; - - // Sign-class handling is folded into a single branchless mask so the - // function executes the same instructions regardless of the input's - // sign. `neg_mask` is `0xFF` for negatives and `0x00` for positives - // (and zero, which lives in the positive sign-class), formed from the - // arithmetic shift of the sign bit and a u8 truncation. - // - // - byte 0: positives want `SIGN_BIT | biased_exp`; negatives want - // `(!biased_exp) & EXP_MASK`. Expressed as one expression: - // (SIGN_BIT & !neg_mask) — keep sign bit only when positive - // | (biased_exp ^ (neg_mask & EXP_MASK)) - // — XOR the 7 exp bits with `neg_mask`, - // which is a no-op for positives and - // a 7-bit complement for negatives. - // - // - mantissa bytes: positives want the bytes unchanged; negatives want - // the bitwise complement. `b ^ neg_mask` does both: XOR with `0x00` - // is a no-op, XOR with `0xFF` is bitwise NOT. - let neg_mask = (raw_mantissa >> 127) as u8; - out[0] = (SIGN_BIT & !neg_mask) | (biased_exp ^ (neg_mask & EXP_MASK)); - for (i, &b) in mant_field.iter().enumerate() { - out[1 + i] = b ^ neg_mask; - } +impl ToOrderableBytes for Decimal { + const ENCODED_LEN: usize = 14; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + let d = self; + let mut out = [0u8; Self::ENCODED_LEN]; + + // The pipeline runs unconditionally — no early return for zero inputs. + // A `d.is_zero()` short-circuit at the top would distinguish zero from + // non-zero plaintexts via timing. Instead we feed zero through the same + // sequence of operations as every other value (the helpers tolerate + // `m == 0` and produce `(significand=0, digits=0, trailing=0)`) and + // canonicalise the resulting byte 0 to the zero plaintext at the end + // via a branchless mask. + // + // We deliberately don't call `Decimal::normalize()` here. `normalize` + // strips trailing zeros from the mantissa via a `while scale > 0` loop + // whose iteration count depends on the secret value's trailing-zero + // count — a timing side channel. Our own `strip_trailing_zeros` already + // strips *all* trailing zeros (a strict superset of what `normalize` + // would remove, since it doesn't stop at scale=0), so the leading-digit + // exponent we compute below is identical whether the input has been + // normalised first or not. Skipping the call removes the leak. + let raw_mantissa = d.mantissa(); + let scale = d.scale() as i32; + // Branchless absolute value via the standard two's-complement identity + // `abs(x) = (x ^ s) - s` where `s` is the arithmetic right-shift of the + // sign bit (`-1` if `x` is negative, `0` otherwise). For positives this + // collapses to `x - 0 = x`; for negatives to `~x + 1 = -x`. Equivalent + // in value to `i128::unsigned_abs`, which compiles to a CMOV on tier-1 + // ISAs but is not language-guaranteed constant-time. The explicit form + // here removes the dependency on optimiser behaviour. + let sign_extension = raw_mantissa >> 127; + let abs_mantissa = ((raw_mantissa ^ sign_extension).wrapping_sub(sign_extension)) as u128; + let (significand, trailing) = strip_trailing_zeros(abs_mantissa); + let digits = digit_count(significand); + + // value = ±significand × 10^trailing × 10^(-scale) + // leading_exp = decimal exponent of the leading significant digit. + // + // For non-zero `Decimal`s `leading_exp` lies in `[-28, 28]`. The + // pipeline also runs for zero inputs (significand = 0, digits = 0, + // trailing = 0), where the formula collapses to `-1 - scale` and + // `leading_exp` lands in `[-29, -1]`; this produces a perfectly valid + // — though arbitrary — non-zero positive plaintext that we'll + // overwrite at the end with the canonical zero. The widened range + // `[-29, 28]` covers both branches without leaking the zero/non-zero + // distinction in debug builds either. + let leading_exp = digits as i32 - 1 + trailing - scale; + debug_assert!( + (-29..=28).contains(&leading_exp), + "leading_exp {} out of bounds — mantissa or scale corrupted", + leading_exp, + ); + let biased_exp = (leading_exp + EXP_BIAS) as u8; + debug_assert!(biased_exp <= EXP_MASK, "biased_exp overflowed 7 bits"); + + // Pad the significand out to 29 decimal digits so same-exponent compares + // across different significand lengths are byte-wise correct. + // + // We can't write this as `significand * 10u128.pow(PADDED_DIGITS - digits)` + // — `u128::pow` is square-and-multiply on the bits of its exponent, with + // both the iteration count and the conditional `acc * base` step driven + // by the exponent value. Since the exponent here is `PADDED_DIGITS − + // digits` and `digits` is derived from the secret mantissa, that would + // leak the digit count via timing. + // + // Instead, run a fixed `PADDED_DIGITS`-iteration loop that multiplies + // `padded_mantissa` by 10 under a branchless mask. The mask is `1` while + // we still have padding to apply (`digits + i < PADDED_DIGITS`) and `0` + // afterwards; the multiplication itself is computed unconditionally each + // iteration so the instruction sequence doesn't depend on `digits`. + // + // No overflow concern: in any iteration where the mask is `1` we have + // `padded_mantissa < 10^(PADDED_DIGITS-1) ≤ 10^28`, so `× 10` stays + // under `10^29`. In iterations where the mask is `0`, `padded_mantissa` + // sits at its final value (≤ `10^29`) and the unstored `× 10` product + // is at most `10^30 ≈ 2^99.7`, well inside `u128`. + let mut padded_mantissa = significand; + for i in 0..PADDED_DIGITS { + let do_step = ((digits + i) < PADDED_DIGITS) as u128; + let mask = 0u128.wrapping_sub(do_step); + let stepped = padded_mantissa.wrapping_mul(10); + padded_mantissa = (padded_mantissa & !mask) | (stepped & mask); + } + let mant_be = padded_mantissa.to_be_bytes(); + debug_assert!( + mant_be[..16 - MANTISSA_BYTES].iter().all(|&b| b == 0), + "padded mantissa overflowed 104 bits", + ); + let mant_field = &mant_be[16 - MANTISSA_BYTES..]; + + // Sign-class handling is folded into a single branchless mask so the + // function executes the same instructions regardless of the input's + // sign. `neg_mask` is `0xFF` for negatives and `0x00` for positives + // (and zero, which lives in the positive sign-class), formed from the + // arithmetic shift of the sign bit and a u8 truncation. + // + // - byte 0: positives want `SIGN_BIT | biased_exp`; negatives want + // `(!biased_exp) & EXP_MASK`. Expressed as one expression: + // (SIGN_BIT & !neg_mask) — keep sign bit only when positive + // | (biased_exp ^ (neg_mask & EXP_MASK)) + // — XOR the 7 exp bits with `neg_mask`, + // which is a no-op for positives and + // a 7-bit complement for negatives. + // + // - mantissa bytes: positives want the bytes unchanged; negatives want + // the bitwise complement. `b ^ neg_mask` does both: XOR with `0x00` + // is a no-op, XOR with `0xFF` is bitwise NOT. + let neg_mask = (raw_mantissa >> 127) as u8; + out[0] = (SIGN_BIT & !neg_mask) | (biased_exp ^ (neg_mask & EXP_MASK)); + for (i, &b) in mant_field.iter().enumerate() { + out[1 + i] = b ^ neg_mask; + } - // Final canonicalisation for the zero plaintext, applied branchlessly - // so the function's timing doesn't reveal whether the input was zero. - // - // The non-zero pipeline ran end-to-end on the zero input too. With - // `significand = 0` the padded mantissa is also `0`, so `out[1..]` is - // already the all-zero canonical zero tail; we only need to fix up - // `out[0]`, which currently holds some valid-looking positive - // `SIGN_BIT | biased_exp` byte. - // - // Build a full-byte mask `zero_mask` that is `0xFF` when `abs_mantissa - // == 0` and `0x00` otherwise: - // - `(x | -x) >> 127` is `1` if `x != 0`, `0` if `x == 0` (standard - // u128 nonzero-detection idiom). - // - XOR with `1` flips it to "is zero". - // - Subtract from `0u8` to broadcast the bit across all 8 bits. - // Then merge: keep `out[0]` for non-zero, replace with `SIGN_BIT` for - // zero. - let mant_nonzero_bit = ((abs_mantissa | abs_mantissa.wrapping_neg()) >> 127) as u8; - let zero_mask = 0u8.wrapping_sub(mant_nonzero_bit ^ 1); - out[0] = (out[0] & !zero_mask) | (SIGN_BIT & zero_mask); - out + // Final canonicalisation for the zero plaintext, applied branchlessly + // so the function's timing doesn't reveal whether the input was zero. + // + // The non-zero pipeline ran end-to-end on the zero input too. With + // `significand = 0` the padded mantissa is also `0`, so `out[1..]` is + // already the all-zero canonical zero tail; we only need to fix up + // `out[0]`, which currently holds some valid-looking positive + // `SIGN_BIT | biased_exp` byte. + // + // Build a full-byte mask `zero_mask` that is `0xFF` when `abs_mantissa + // == 0` and `0x00` otherwise: + // - `(x | -x) >> 127` is `1` if `x != 0`, `0` if `x == 0` (standard + // u128 nonzero-detection idiom). + // - XOR with `1` flips it to "is zero". + // - Subtract from `0u8` to broadcast the bit across all 8 bits. + // Then merge: keep `out[0]` for non-zero, replace with `SIGN_BIT` for + // zero. + let mant_nonzero_bit = ((abs_mantissa | abs_mantissa.wrapping_neg()) >> 127) as u8; + let zero_mask = 0u8.wrapping_sub(mant_nonzero_bit ^ 1); + out[0] = (out[0] & !zero_mask) | (SIGN_BIT & zero_mask); + out + } } /// `5⁻¹ mod 2¹²⁸`. Verified: `5 * INV5 ≡ 1 (mod 2¹²⁸)`. Used to substitute @@ -333,33 +338,33 @@ mod tests { #[test] fn zero_canonicalises_to_sign_bit_only() { - let mut expected = [0u8; ENCODED_LEN]; + let mut expected = [0u8; 14]; expected[0] = SIGN_BIT; - assert_eq!(to_orderable_bytes(&dec!(0)), expected); - assert_eq!(to_orderable_bytes(&dec!(0.0)), expected); - assert_eq!(to_orderable_bytes(&dec!(0.000)), expected); + assert_eq!(dec!(0).to_orderable_bytes(), expected); + assert_eq!(dec!(0.0).to_orderable_bytes(), expected); + assert_eq!(dec!(0.000).to_orderable_bytes(), expected); } #[test] fn negative_zero_canonicalises_with_zero() { let neg_zero = -dec!(0); - assert_eq!(to_orderable_bytes(&neg_zero), to_orderable_bytes(&dec!(0))); + assert_eq!(neg_zero.to_orderable_bytes(), dec!(0).to_orderable_bytes()); } #[test] fn equivalent_forms_canonicalise_identically() { - let one = to_orderable_bytes(&dec!(1)); - assert_eq!(to_orderable_bytes(&dec!(1.0)), one); - assert_eq!(to_orderable_bytes(&dec!(1.00)), one); - assert_eq!(to_orderable_bytes(&dec!(1.000)), one); + let one = dec!(1).to_orderable_bytes(); + assert_eq!(dec!(1.0).to_orderable_bytes(), one); + assert_eq!(dec!(1.00).to_orderable_bytes(), one); + assert_eq!(dec!(1.000).to_orderable_bytes(), one); } #[test] fn integer_trailing_zeros_share_significand_bytes() { // 100 strips to (sig=1, leading_exp=2). Same significand as 1, so the // padded-mantissa region must match. - let one = to_orderable_bytes(&dec!(1)); - let hundred = to_orderable_bytes(&dec!(100)); + let one = dec!(1).to_orderable_bytes(); + let hundred = dec!(100).to_orderable_bytes(); assert_eq!(&one[1..], &hundred[1..]); // Top bit (sign) matches; low 7 bits differ by leading_exp. assert_eq!(one[0] & SIGN_BIT, SIGN_BIT); @@ -370,27 +375,27 @@ mod tests { #[test] fn worked_positive_examples() { - let one = to_orderable_bytes(&dec!(1)); + let one = dec!(1).to_orderable_bytes(); assert_eq!(one[0], SIGN_BIT | (EXP_BIAS as u8)); - let half = to_orderable_bytes(&dec!(0.5)); + let half = dec!(0.5).to_orderable_bytes(); assert_eq!(half[0], SIGN_BIT | ((-1i32 + EXP_BIAS) as u8)); - let ten = to_orderable_bytes(&dec!(10)); + let ten = dec!(10).to_orderable_bytes(); assert_eq!(ten[0], SIGN_BIT | ((1i32 + EXP_BIAS) as u8)); } #[test] fn worked_negative_examples() { - let neg_one = to_orderable_bytes(&dec!(-1)); - let pos_one = to_orderable_bytes(&dec!(1)); + let neg_one = dec!(-1).to_orderable_bytes(); + let pos_one = dec!(1).to_orderable_bytes(); // Negative byte 0: sign bit clear, low 7 bits are inverted exp. assert_eq!(neg_one[0] & SIGN_BIT, 0); assert_eq!(neg_one[0] & EXP_MASK, !(EXP_BIAS as u8) & EXP_MASK); // Negative mantissa bytes are bitwise complements of the positive. - for i in 1..ENCODED_LEN { + for i in 1..::ENCODED_LEN { assert_eq!(neg_one[i], !pos_one[i]); } } @@ -415,8 +420,8 @@ mod tests { Decimal::MAX, ]; for window in values.windows(2) { - let a = to_orderable_bytes(&window[0]); - let b = to_orderable_bytes(&window[1]); + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); assert!( a < b, "to_orderable_bytes({}) < to_orderable_bytes({}) failed", diff --git a/packages/orderable-bytes/src/lib.rs b/packages/orderable-bytes/src/lib.rs index 7248b95..4a4e8e4 100644 --- a/packages/orderable-bytes/src/lib.rs +++ b/packages/orderable-bytes/src/lib.rs @@ -2,14 +2,13 @@ //! Canonical, order-preserving fixed-length byte encodings for plaintext //! types. //! -//! Each module exposes a `to_orderable_bytes` function that maps a value of -//! its target type to a fixed-length byte array whose byte-wise -//! lexicographic order agrees with the type's natural total order, and -//! whose byte equality agrees with the type's value equality. The -//! resulting bytes are scheme-agnostic — they're intended for any -//! comparison-as-bytes scheme that wants to preserve plaintext order on -//! ciphertexts (e.g. `ore-rs` BlockORE, an OPE construction, an ordered -//! hash). +//! Each supported type implements [`ToOrderableBytes`], which maps a +//! value to a fixed-length byte array whose byte-wise lexicographic +//! order agrees with the type's natural total order, and whose byte +//! equality agrees with the type's value equality. The resulting bytes +//! are scheme-agnostic — they're intended for any comparison-as-bytes +//! scheme that wants to preserve plaintext order on ciphertexts (e.g. +//! `ore-rs` BlockORE, an OPE construction, an ordered hash). //! //! Encoders are gated behind per-type feature flags so callers only pay //! for the dependencies they actually use. @@ -22,3 +21,38 @@ pub mod decimal; #[cfg(test)] #[macro_use] extern crate quickcheck; + +/// Maps a value to its canonical, order-preserving fixed-length byte +/// encoding. +/// +/// Implementors guarantee, for any `a` and `b` of the implementing type: +/// +/// - **Equality:** byte equality of the outputs agrees with the type's +/// value equality (`a.to_orderable_bytes() == b.to_orderable_bytes()` +/// iff `a == b`). +/// - **Order:** byte-wise lexicographic comparison of the outputs agrees +/// with the type's natural total order +/// (`a.to_orderable_bytes() <= b.to_orderable_bytes()` iff `a <= b`). +/// +/// The encoded length is fixed per type and exposed via +/// [`ENCODED_LEN`](Self::ENCODED_LEN). Per-type modules also re-export +/// the same value as a free `pub const` for use in const contexts where +/// naming the impl would be unwieldy. +pub trait ToOrderableBytes { + /// Length, in bytes, of the canonical encoding produced by + /// [`to_orderable_bytes`](Self::to_orderable_bytes). + const ENCODED_LEN: usize; + + /// The fixed-length byte array type returned by + /// [`to_orderable_bytes`](Self::to_orderable_bytes). By convention + /// every impl sets this to `[u8; Self::ENCODED_LEN]`; the + /// indirection through an associated type is only needed because + /// stable Rust does not yet allow naming `[u8; Self::ENCODED_LEN]` + /// directly in a method signature (that requires + /// `feature(generic_const_exprs)`). + type Bytes: AsRef<[u8]>; + + /// Build the canonical, order-preserving fixed-length byte encoding + /// of `self`. + fn to_orderable_bytes(&self) -> Self::Bytes; +} diff --git a/packages/ore-rs/src/chrono.rs b/packages/ore-rs/src/chrono.rs index 6ecaf13..06d770e 100644 --- a/packages/ore-rs/src/chrono.rs +++ b/packages/ore-rs/src/chrono.rs @@ -11,24 +11,21 @@ use crate::ciphertext::*; use crate::{OreCipher, OreEncrypt, OreError}; use ::chrono::{DateTime, NaiveDate, Utc}; +use orderable_bytes::ToOrderableBytes; -const NAIVE_DATE_LEN: usize = orderable_bytes::chrono::naive_date::ENCODED_LEN; -const DATETIME_UTC_LEN: usize = orderable_bytes::chrono::datetime_utc::ENCODED_LEN; +const NAIVE_DATE_LEN: usize = ::ENCODED_LEN; +const DATETIME_UTC_LEN: usize = as ToOrderableBytes>::ENCODED_LEN; impl OreEncrypt for NaiveDate { type LeftOutput = Left; type FullOutput = CipherText; fn encrypt_left(&self, cipher: &T) -> Result { - cipher.encrypt_left(&orderable_bytes::chrono::naive_date::to_orderable_bytes( - self, - )) + cipher.encrypt_left(&self.to_orderable_bytes()) } fn encrypt(&self, cipher: &T) -> Result { - cipher.encrypt(&orderable_bytes::chrono::naive_date::to_orderable_bytes( - self, - )) + cipher.encrypt(&self.to_orderable_bytes()) } } @@ -37,15 +34,11 @@ impl OreEncrypt for DateTime { type FullOutput = CipherText; fn encrypt_left(&self, cipher: &T) -> Result { - cipher.encrypt_left(&orderable_bytes::chrono::datetime_utc::to_orderable_bytes( - self, - )) + cipher.encrypt_left(&self.to_orderable_bytes()) } fn encrypt(&self, cipher: &T) -> Result { - cipher.encrypt(&orderable_bytes::chrono::datetime_utc::to_orderable_bytes( - self, - )) + cipher.encrypt(&self.to_orderable_bytes()) } } diff --git a/packages/ore-rs/src/decimal.rs b/packages/ore-rs/src/decimal.rs index 8a07f06..2253bde 100644 --- a/packages/ore-rs/src/decimal.rs +++ b/packages/ore-rs/src/decimal.rs @@ -10,19 +10,21 @@ use crate::ciphertext::{CipherText, Left}; use crate::encrypt::OreEncrypt; use crate::{OreCipher, OreError}; -use orderable_bytes::decimal::{to_orderable_bytes, ENCODED_LEN}; +use orderable_bytes::ToOrderableBytes; use rust_decimal::Decimal; +const ENCODED_LEN: usize = ::ENCODED_LEN; + impl OreEncrypt for Decimal { type LeftOutput = Left; type FullOutput = CipherText; fn encrypt_left(&self, cipher: &T) -> Result { - cipher.encrypt_left(&to_orderable_bytes(self)) + cipher.encrypt_left(&self.to_orderable_bytes()) } fn encrypt(&self, cipher: &T) -> Result { - cipher.encrypt(&to_orderable_bytes(self)) + cipher.encrypt(&self.to_orderable_bytes()) } } From 9ab4eeacf08a4ae95d0d119614ee4b4c69592745 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 4 May 2026 15:58:46 +1000 Subject: [PATCH 02/11] feat(orderable-bytes): impl ToOrderableBytes for i16/i32/i64/f64 Adds a `numeric` module with `ToOrderableBytes` impls for the signed integer primitives `i16`, `i32`, `i64` and the IEEE 754 double `f64`, each emitting the type's native byte width: - Integers: sign-flip the top bit, then big-endian. Moves negatives below positives in lex order while preserving order within each sign class. - f64: standard IEEE 754 monotonic mapping (flip all bits for negatives, sign bit only for positives), with `-0.0` canonicalised to `+0.0` so the two share an encoding. NaN handling is unspecified (NaN is unordered under `PartialOrd`). Mirrors the `IntoOrePlaintext` impls in cipherstash-suite::ope_indexer::conversion, but at native widths rather than always widening to u64. --- packages/orderable-bytes/src/lib.rs | 1 + packages/orderable-bytes/src/numeric.rs | 213 ++++++++++++++++++++++++ 2 files changed, 214 insertions(+) create mode 100644 packages/orderable-bytes/src/numeric.rs diff --git a/packages/orderable-bytes/src/lib.rs b/packages/orderable-bytes/src/lib.rs index 4a4e8e4..70ae78f 100644 --- a/packages/orderable-bytes/src/lib.rs +++ b/packages/orderable-bytes/src/lib.rs @@ -17,6 +17,7 @@ pub mod chrono; #[cfg(feature = "decimal")] pub mod decimal; +pub mod numeric; #[cfg(test)] #[macro_use] diff --git a/packages/orderable-bytes/src/numeric.rs b/packages/orderable-bytes/src/numeric.rs new file mode 100644 index 0000000..f8a1305 --- /dev/null +++ b/packages/orderable-bytes/src/numeric.rs @@ -0,0 +1,213 @@ +//! Canonical, order-preserving fixed-length byte encodings for the +//! signed-integer primitives `i16`, `i32`, `i64` and the IEEE 754 +//! double `f64`. +//! +//! Each impl emits the type's native byte width (no widening). Byte-wise +//! lex compare on the output agrees with the type's natural total order +//! (or partial order, in the f64 case — see below). +//! +//! ## Signed integers (`i16`, `i32`, `i64`) +//! +//! Two's-complement signed integers are mapped to their unsigned +//! equivalent by flipping the sign bit (`x ^ (1 << (N-1))`), then +//! serialised big-endian. Sign-flipping moves negatives below positives +//! (sign bit `1` for negatives clears to `0`, vice versa for positives) +//! and preserves order within each sign class. +//! +//! ## `f64` +//! +//! IEEE 754 doubles are mapped to a lex-orderable `u64` using the +//! standard monotonic encoding: +//! +//! - Negatives flip every bit (their bit pattern's lex order is the +//! reverse of magnitude order, so flipping inverts it). +//! - Positives (and `+0.0`) flip only the sign bit (bringing them above +//! negatives in lex order). +//! +//! `-0.0` is canonicalised to `+0.0` before encoding so the two compare +//! byte-equal — matching `-0.0 == 0.0` on `f64`. +//! +//! NaN handling is unspecified. `f64` is `PartialOrd` rather than `Ord` +//! (NaN compares unordered against every value, including itself), so +//! the trait's order/equality guarantees only apply to non-NaN inputs. +//! Different NaN bit patterns will produce different bytes; consumers +//! that need a canonical NaN must canonicalise upstream. + +use crate::ToOrderableBytes; + +impl ToOrderableBytes for i16 { + const ENCODED_LEN: usize = 2; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + ((*self as u16) ^ (1u16 << 15)).to_be_bytes() + } +} + +impl ToOrderableBytes for i32 { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + ((*self as u32) ^ (1u32 << 31)).to_be_bytes() + } +} + +impl ToOrderableBytes for i64 { + const ENCODED_LEN: usize = 8; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + ((*self as u64) ^ (1u64 << 63)).to_be_bytes() + } +} + +impl ToOrderableBytes for f64 { + const ENCODED_LEN: usize = 8; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + // Canonicalise -0.0 → 0.0 so the two share one byte encoding + // (their f64 equality demands byte equality under our contract). + let value = if *self == -0.0 { 0.0 } else { *self }; + let bits = value.to_bits(); + // Branchless monotonic mapping. `sign_extension` is `u64::MAX` + // when the input is negative (sign bit `1`) and `0` when + // positive. ORing in `1 << 63` makes the mask `u64::MAX` for + // negatives (XOR-flip every bit) and `1 << 63` for positives + // (XOR-flip just the sign bit). + let sign_extension = (bits as i64 >> 63) as u64; + let mask = sign_extension | (1u64 << 63); + (bits ^ mask).to_be_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // --- i16 --- + + #[test] + fn i16_known_anchors() { + // i16::MIN sign-flips to 0x0000, 0 to 0x8000, i16::MAX to 0xFFFF. + assert_eq!(i16::MIN.to_orderable_bytes(), [0x00, 0x00]); + assert_eq!(0i16.to_orderable_bytes(), [0x80, 0x00]); + assert_eq!(i16::MAX.to_orderable_bytes(), [0xFF, 0xFF]); + } + + #[test] + fn i16_byte_order_matches_natural_order() { + let ascending = [i16::MIN, -10000, -1, 0, 1, 10000, i16::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- i32 --- + + #[test] + fn i32_known_anchors() { + assert_eq!(i32::MIN.to_orderable_bytes(), [0x00, 0x00, 0x00, 0x00]); + assert_eq!(0i32.to_orderable_bytes(), [0x80, 0x00, 0x00, 0x00]); + assert_eq!(i32::MAX.to_orderable_bytes(), [0xFF, 0xFF, 0xFF, 0xFF]); + } + + #[test] + fn i32_byte_order_matches_natural_order() { + let ascending = [i32::MIN, -1_000_000_000, -1, 0, 1, 1_000_000_000, i32::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- i64 --- + + #[test] + fn i64_known_anchors() { + assert_eq!(i64::MIN.to_orderable_bytes(), [0x00; 8]); + assert_eq!( + 0i64.to_orderable_bytes(), + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] + ); + assert_eq!(i64::MAX.to_orderable_bytes(), [0xFF; 8]); + } + + #[test] + fn i64_byte_order_matches_natural_order() { + let ascending = [ + i64::MIN, + -1_000_000_000_000, + -1, + 0, + 1, + 1_000_000_000_000, + i64::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- f64 --- + + #[test] + fn f64_zero_canonical_bytes() { + // +0.0 → 0x8000_0000_0000_0000 (sign-bit-only flip on all-zero bits). + assert_eq!( + 0.0f64.to_orderable_bytes(), + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] + ); + } + + #[test] + fn f64_negative_zero_canonicalises_with_zero() { + assert_eq!((-0.0f64).to_orderable_bytes(), 0.0f64.to_orderable_bytes()); + } + + #[test] + fn f64_byte_order_matches_natural_order() { + let ascending = [ + f64::NEG_INFINITY, + f64::MIN, + -1e100, + -1.0, + -f64::MIN_POSITIVE, + 0.0, + f64::MIN_POSITIVE, + 1.0, + 1e100, + f64::MAX, + f64::INFINITY, + ]; + for window in ascending.windows(2) { + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); + assert!(a < b, "{} < {} failed", window[0], window[1]); + } + } + + #[test] + fn f64_subnormals_sort_above_zero_below_normals() { + // Smallest positive subnormal (`f64::from_bits(1)`) must land + // strictly between 0.0 and the smallest positive normal. + let subnormal = f64::from_bits(1); + assert!(0.0f64.to_orderable_bytes() < subnormal.to_orderable_bytes()); + assert!(subnormal.to_orderable_bytes() < f64::MIN_POSITIVE.to_orderable_bytes()); + } +} From e93c33ad77493cf13219b2eb2cfc94526d4bf380 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 4 May 2026 16:02:45 +1000 Subject: [PATCH 03/11] refactor(orderable-bytes): widen i16/i32 numeric impls to [u8; 8] Match the `IntoOrePlaintext` widening used by the cipherstash-suite ORE indexer: sign-flip at native width, then zero-extend to `u64` before BE serialisation. All four primitive impls now return `[u8; 8]` so they share the same downstream ORE ciphertext shape. --- packages/orderable-bytes/src/numeric.rs | 69 ++++++++++++++++++------- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/packages/orderable-bytes/src/numeric.rs b/packages/orderable-bytes/src/numeric.rs index f8a1305..4d1ed11 100644 --- a/packages/orderable-bytes/src/numeric.rs +++ b/packages/orderable-bytes/src/numeric.rs @@ -2,17 +2,28 @@ //! signed-integer primitives `i16`, `i32`, `i64` and the IEEE 754 //! double `f64`. //! -//! Each impl emits the type's native byte width (no widening). Byte-wise -//! lex compare on the output agrees with the type's natural total order -//! (or partial order, in the f64 case — see below). +//! All four impls emit a fixed `[u8; 8]`. Narrower integer types are +//! sign-flipped within their native width and then zero-extended to +//! `u64` before big-endian serialisation, matching the +//! `IntoOrePlaintext` widening used by the cipherstash-suite ORE +//! indexer (so an `i16` value lands in the low two bytes of the +//! output, with the upper six bytes zero). +//! +//! Byte-wise lex compare on the output agrees with the type's natural +//! total order *within that type*. Cross-type comparison is not +//! meaningful — `i16(0)` and `i64(0)` both encode to non-equal byte +//! patterns, and the encodings of an `i16` value and the same value +//! held as `i64` differ. //! //! ## Signed integers (`i16`, `i32`, `i64`) //! -//! Two's-complement signed integers are mapped to their unsigned -//! equivalent by flipping the sign bit (`x ^ (1 << (N-1))`), then -//! serialised big-endian. Sign-flipping moves negatives below positives -//! (sign bit `1` for negatives clears to `0`, vice versa for positives) -//! and preserves order within each sign class. +//! Each two's-complement input is mapped to its unsigned equivalent by +//! flipping the sign bit at its native width (`x ^ (1 << (N-1))`), +//! widened to `u64` by zero-extension, and serialised big-endian. +//! Sign-flipping moves negatives below positives (the sign bit `1` for +//! negatives clears to `0`, vice versa for positives) and preserves +//! order within each sign class; the zero-extension is a no-op on lex +//! order because the high padding bytes are constant. //! //! ## `f64` //! @@ -36,20 +47,22 @@ use crate::ToOrderableBytes; impl ToOrderableBytes for i16 { - const ENCODED_LEN: usize = 2; + const ENCODED_LEN: usize = 8; type Bytes = [u8; Self::ENCODED_LEN]; fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { - ((*self as u16) ^ (1u16 << 15)).to_be_bytes() + let sign_flipped = (*self as u16) ^ (1u16 << 15); + u64::from(sign_flipped).to_be_bytes() } } impl ToOrderableBytes for i32 { - const ENCODED_LEN: usize = 4; + const ENCODED_LEN: usize = 8; type Bytes = [u8; Self::ENCODED_LEN]; fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { - ((*self as u32) ^ (1u32 << 31)).to_be_bytes() + let sign_flipped = (*self as u32) ^ (1u32 << 31); + u64::from(sign_flipped).to_be_bytes() } } @@ -90,10 +103,17 @@ mod tests { #[test] fn i16_known_anchors() { - // i16::MIN sign-flips to 0x0000, 0 to 0x8000, i16::MAX to 0xFFFF. - assert_eq!(i16::MIN.to_orderable_bytes(), [0x00, 0x00]); - assert_eq!(0i16.to_orderable_bytes(), [0x80, 0x00]); - assert_eq!(i16::MAX.to_orderable_bytes(), [0xFF, 0xFF]); + // Sign-flip at u16, then zero-extend to u64 BE: the i16 value + // lands in the low two bytes, with the upper six bytes zero. + assert_eq!( + i16::MIN.to_orderable_bytes(), + [0, 0, 0, 0, 0, 0, 0x00, 0x00] + ); + assert_eq!(0i16.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0x80, 0x00]); + assert_eq!( + i16::MAX.to_orderable_bytes(), + [0, 0, 0, 0, 0, 0, 0xFF, 0xFF] + ); } #[test] @@ -113,9 +133,20 @@ mod tests { #[test] fn i32_known_anchors() { - assert_eq!(i32::MIN.to_orderable_bytes(), [0x00, 0x00, 0x00, 0x00]); - assert_eq!(0i32.to_orderable_bytes(), [0x80, 0x00, 0x00, 0x00]); - assert_eq!(i32::MAX.to_orderable_bytes(), [0xFF, 0xFF, 0xFF, 0xFF]); + // Sign-flip at u32, then zero-extend to u64 BE: the i32 value + // lands in the low four bytes, with the upper four bytes zero. + assert_eq!( + i32::MIN.to_orderable_bytes(), + [0, 0, 0, 0, 0x00, 0x00, 0x00, 0x00] + ); + assert_eq!( + 0i32.to_orderable_bytes(), + [0, 0, 0, 0, 0x80, 0x00, 0x00, 0x00] + ); + assert_eq!( + i32::MAX.to_orderable_bytes(), + [0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF] + ); } #[test] From 86a1d16b5354d630d1091c98d08ffad025f6a113 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 4 May 2026 16:52:53 +1000 Subject: [PATCH 04/11] feat(orderable-bytes): impl ToOrderableBytes for bool Adds a `bool` impl in the `numeric` module, padded to `[u8; 8]` to match the other primitive impls. `false` encodes as `[0; 8]` and `true` as `[0, 0, 0, 0, 0, 0, 0, 1]`, mirroring the `IntoOrePlaintext` impl in cipherstash-suite::ope_indexer (`OrePlaintext(*x as u64)`). --- packages/orderable-bytes/src/numeric.rs | 40 ++++++++++++++++++++----- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/packages/orderable-bytes/src/numeric.rs b/packages/orderable-bytes/src/numeric.rs index 4d1ed11..190795d 100644 --- a/packages/orderable-bytes/src/numeric.rs +++ b/packages/orderable-bytes/src/numeric.rs @@ -1,13 +1,13 @@ //! Canonical, order-preserving fixed-length byte encodings for the -//! signed-integer primitives `i16`, `i32`, `i64` and the IEEE 754 -//! double `f64`. +//! primitives `bool`, `i16`, `i32`, `i64` and the IEEE 754 double +//! `f64`. //! -//! All four impls emit a fixed `[u8; 8]`. Narrower integer types are -//! sign-flipped within their native width and then zero-extended to -//! `u64` before big-endian serialisation, matching the -//! `IntoOrePlaintext` widening used by the cipherstash-suite ORE -//! indexer (so an `i16` value lands in the low two bytes of the -//! output, with the upper six bytes zero). +//! All five impls emit a fixed `[u8; 8]`. Narrower types are mapped to +//! `u64` (sign-flipping for signed integers, identity-cast for `bool`) +//! and serialised big-endian, matching the `IntoOrePlaintext` +//! widening used by the cipherstash-suite ORE indexer (so e.g. an +//! `i16` value lands in the low two bytes of the output, with the +//! upper six bytes zero). //! //! Byte-wise lex compare on the output agrees with the type's natural //! total order *within that type*. Cross-type comparison is not @@ -46,6 +46,17 @@ use crate::ToOrderableBytes; +impl ToOrderableBytes for bool { + const ENCODED_LEN: usize = 8; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + // `false as u64 == 0`, `true as u64 == 1`. Lex order on the + // BE-encoded `u64` then puts `false` strictly below `true`. + (*self as u64).to_be_bytes() + } +} + impl ToOrderableBytes for i16 { const ENCODED_LEN: usize = 8; type Bytes = [u8; Self::ENCODED_LEN]; @@ -99,6 +110,19 @@ impl ToOrderableBytes for f64 { mod tests { use super::*; + // --- bool --- + + #[test] + fn bool_known_anchors() { + assert_eq!(false.to_orderable_bytes(), [0; 8]); + assert_eq!(true.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0x01]); + } + + #[test] + fn bool_byte_order_matches_natural_order() { + assert!(false.to_orderable_bytes() < true.to_orderable_bytes()); + } + // --- i16 --- #[test] From c5fea8ce1acd02fe1f865252b60bb39622dcbaaa Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 4 May 2026 16:55:08 +1000 Subject: [PATCH 05/11] refactor(orderable-bytes): rename `numeric` module to `primitive` The module now hosts a `bool` impl alongside the integer and float impls; `primitive` describes the contents more accurately than `numeric`. --- packages/orderable-bytes/src/lib.rs | 2 +- packages/orderable-bytes/src/{numeric.rs => primitive.rs} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename packages/orderable-bytes/src/{numeric.rs => primitive.rs} (100%) diff --git a/packages/orderable-bytes/src/lib.rs b/packages/orderable-bytes/src/lib.rs index 70ae78f..ab27ef3 100644 --- a/packages/orderable-bytes/src/lib.rs +++ b/packages/orderable-bytes/src/lib.rs @@ -17,7 +17,7 @@ pub mod chrono; #[cfg(feature = "decimal")] pub mod decimal; -pub mod numeric; +pub mod primitive; #[cfg(test)] #[macro_use] diff --git a/packages/orderable-bytes/src/numeric.rs b/packages/orderable-bytes/src/primitive.rs similarity index 100% rename from packages/orderable-bytes/src/numeric.rs rename to packages/orderable-bytes/src/primitive.rs From a9b58f18fc04d72ec26d4263f2b926a25f8a4ef4 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 4 May 2026 16:58:36 +1000 Subject: [PATCH 06/11] feat(orderable-bytes): impl ToOrderableBytes for u8/i8 and u128/i128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the `primitive` module with four more impls: - `u8` → `[u8; 8]`, zero-extended to `u64` BE. - `i8` → `[u8; 8]`, sign-flipped at `u8` width then zero-extended. - `u128` → `[u8; 16]`, native BE (already lex-ordered, no sign-flip). - `i128` → `[u8; 16]`, sign-flipped at `u128` width then native BE. The 8-bit pair shares the `[u8; 8]` width with `bool`/`i16`/`i32`/ `i64`/`f64` so they all route through the same downstream ORE ciphertext shape. The 128-bit pair uses native width since there's no wider standard integer type to pad to. --- packages/orderable-bytes/src/primitive.rs | 190 ++++++++++++++++++++-- 1 file changed, 176 insertions(+), 14 deletions(-) diff --git a/packages/orderable-bytes/src/primitive.rs b/packages/orderable-bytes/src/primitive.rs index 190795d..7d3a001 100644 --- a/packages/orderable-bytes/src/primitive.rs +++ b/packages/orderable-bytes/src/primitive.rs @@ -1,13 +1,18 @@ //! Canonical, order-preserving fixed-length byte encodings for the -//! primitives `bool`, `i16`, `i32`, `i64` and the IEEE 754 double -//! `f64`. +//! primitives `bool`, `u8`, `i8`, `i16`, `i32`, `i64`, `u128`, `i128`, +//! and the IEEE 754 double `f64`. //! -//! All five impls emit a fixed `[u8; 8]`. Narrower types are mapped to -//! `u64` (sign-flipping for signed integers, identity-cast for `bool`) -//! and serialised big-endian, matching the `IntoOrePlaintext` -//! widening used by the cipherstash-suite ORE indexer (so e.g. an -//! `i16` value lands in the low two bytes of the output, with the -//! upper six bytes zero). +//! Encoded widths: +//! +//! - `bool`, `u8`, `i8`, `i16`, `i32`, `i64`, `f64` → `[u8; 8]` +//! - `u128`, `i128` → `[u8; 16]` +//! +//! Sub-`u64` integer types are widened to `u64` (sign-flipping for +//! signed integers, identity-cast for `bool`/`u8`) and serialised +//! big-endian, matching the `IntoOrePlaintext` widening used by +//! the cipherstash-suite ORE indexer (so e.g. an `i16` value lands in +//! the low two bytes of the output, with the upper six bytes zero). +//! 128-bit integers use their native width. //! //! Byte-wise lex compare on the output agrees with the type's natural //! total order *within that type*. Cross-type comparison is not @@ -15,15 +20,21 @@ //! patterns, and the encodings of an `i16` value and the same value //! held as `i64` differ. //! -//! ## Signed integers (`i16`, `i32`, `i64`) +//! ## Unsigned integers (`u8`, `u128`) +//! +//! Already in lex order — no sign-flip needed. `u8` is zero-extended +//! to `u64` before BE serialisation; `u128` uses its native width. +//! +//! ## Signed integers (`i8`, `i16`, `i32`, `i64`, `i128`) //! //! Each two's-complement input is mapped to its unsigned equivalent by //! flipping the sign bit at its native width (`x ^ (1 << (N-1))`), -//! widened to `u64` by zero-extension, and serialised big-endian. -//! Sign-flipping moves negatives below positives (the sign bit `1` for -//! negatives clears to `0`, vice versa for positives) and preserves -//! order within each sign class; the zero-extension is a no-op on lex -//! order because the high padding bytes are constant. +//! widened to `u64` by zero-extension (or kept at native width for +//! `i128`), and serialised big-endian. Sign-flipping moves negatives +//! below positives (the sign bit `1` for negatives clears to `0`, vice +//! versa for positives) and preserves order within each sign class; +//! the zero-extension is a no-op on lex order because the high padding +//! bytes are constant. //! //! ## `f64` //! @@ -57,6 +68,25 @@ impl ToOrderableBytes for bool { } } +impl ToOrderableBytes for u8 { + const ENCODED_LEN: usize = 8; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + u64::from(*self).to_be_bytes() + } +} + +impl ToOrderableBytes for i8 { + const ENCODED_LEN: usize = 8; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + let sign_flipped = (*self as u8) ^ (1u8 << 7); + u64::from(sign_flipped).to_be_bytes() + } +} + impl ToOrderableBytes for i16 { const ENCODED_LEN: usize = 8; type Bytes = [u8; Self::ENCODED_LEN]; @@ -86,6 +116,24 @@ impl ToOrderableBytes for i64 { } } +impl ToOrderableBytes for u128 { + const ENCODED_LEN: usize = 16; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + self.to_be_bytes() + } +} + +impl ToOrderableBytes for i128 { + const ENCODED_LEN: usize = 16; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + ((*self as u128) ^ (1u128 << 127)).to_be_bytes() + } +} + impl ToOrderableBytes for f64 { const ENCODED_LEN: usize = 8; type Bytes = [u8; Self::ENCODED_LEN]; @@ -123,6 +171,53 @@ mod tests { assert!(false.to_orderable_bytes() < true.to_orderable_bytes()); } + // --- u8 --- + + #[test] + fn u8_known_anchors() { + // Zero-extend to u64 BE: the u8 value lands in the last byte. + assert_eq!(u8::MIN.to_orderable_bytes(), [0; 8]); + assert_eq!(0x42u8.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0x42]); + assert_eq!(u8::MAX.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0xFF]); + } + + #[test] + fn u8_byte_order_matches_natural_order() { + let ascending = [u8::MIN, 1, 100, 200, u8::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- i8 --- + + #[test] + fn i8_known_anchors() { + // Sign-flip at u8 (XOR 0x80), then zero-extend to u64 BE: the + // i8 value lands in the last byte, upper seven bytes zero. + assert_eq!(i8::MIN.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0x00]); + assert_eq!(0i8.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0x80]); + assert_eq!(i8::MAX.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0xFF]); + } + + #[test] + fn i8_byte_order_matches_natural_order() { + let ascending = [i8::MIN, -100, -1, 0, 1, 100, i8::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + // --- i16 --- #[test] @@ -219,6 +314,73 @@ mod tests { } } + // --- u128 --- + + #[test] + fn u128_known_anchors() { + assert_eq!(u128::MIN.to_orderable_bytes(), [0; 16]); + assert_eq!(u128::MAX.to_orderable_bytes(), [0xFF; 16]); + let one = 1u128.to_orderable_bytes(); + let mut expected_one = [0u8; 16]; + expected_one[15] = 1; + assert_eq!(one, expected_one); + } + + #[test] + fn u128_byte_order_matches_natural_order() { + let ascending = [ + u128::MIN, + 1, + (1u128 << 32), + (1u128 << 64), + (1u128 << 96), + u128::MAX - 1, + u128::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + + // --- i128 --- + + #[test] + fn i128_known_anchors() { + assert_eq!(i128::MIN.to_orderable_bytes(), [0; 16]); + assert_eq!(i128::MAX.to_orderable_bytes(), [0xFF; 16]); + let mut expected_zero = [0u8; 16]; + expected_zero[0] = 0x80; + assert_eq!(0i128.to_orderable_bytes(), expected_zero); + } + + #[test] + fn i128_byte_order_matches_natural_order() { + let ascending = [ + i128::MIN, + -(1i128 << 96), + -(1i128 << 64), + -1, + 0, + 1, + (1i128 << 64), + (1i128 << 96), + i128::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + // --- f64 --- #[test] From bfcb516922b1a5075a5cf381646d56f00b59e27c Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 5 May 2026 09:15:00 +1000 Subject: [PATCH 07/11] refactor(orderable-bytes): emit native widths for narrow primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Padding to a fixed `[u8; 8]` is the consumer's concern, not the encoding's: ORE constructions that need a uniform 8-byte plaintext should zero-extend upstream of the encrypter (widening is monotonic on lex order and preserves the encoding's guarantees), while OPE schemes can consume the native width directly. Reverts the earlier widen-to-`[u8; 8]` decision for `bool`, `u8`, `i8`, `i16`, `i32`. New widths: - `bool`, `u8`, `i8` → `[u8; 1]` - `i16` → `[u8; 2]` - `i32` → `[u8; 4]` `i64`, `u128`, `i128`, `f64` were already at native width. --- packages/orderable-bytes/src/primitive.rs | 116 ++++++++-------------- 1 file changed, 43 insertions(+), 73 deletions(-) diff --git a/packages/orderable-bytes/src/primitive.rs b/packages/orderable-bytes/src/primitive.rs index 7d3a001..66a0435 100644 --- a/packages/orderable-bytes/src/primitive.rs +++ b/packages/orderable-bytes/src/primitive.rs @@ -2,39 +2,30 @@ //! primitives `bool`, `u8`, `i8`, `i16`, `i32`, `i64`, `u128`, `i128`, //! and the IEEE 754 double `f64`. //! -//! Encoded widths: +//! Each impl emits the type's native byte width — no padding: //! -//! - `bool`, `u8`, `i8`, `i16`, `i32`, `i64`, `f64` → `[u8; 8]` +//! - `bool`, `u8`, `i8` → `[u8; 1]` +//! - `i16` → `[u8; 2]` +//! - `i32` → `[u8; 4]` +//! - `i64`, `f64` → `[u8; 8]` //! - `u128`, `i128` → `[u8; 16]` //! -//! Sub-`u64` integer types are widened to `u64` (sign-flipping for -//! signed integers, identity-cast for `bool`/`u8`) and serialised -//! big-endian, matching the `IntoOrePlaintext` widening used by -//! the cipherstash-suite ORE indexer (so e.g. an `i16` value lands in -//! the low two bytes of the output, with the upper six bytes zero). -//! 128-bit integers use their native width. -//! -//! Byte-wise lex compare on the output agrees with the type's natural -//! total order *within that type*. Cross-type comparison is not -//! meaningful — `i16(0)` and `i64(0)` both encode to non-equal byte -//! patterns, and the encodings of an `i16` value and the same value -//! held as `i64` differ. +//! Consumers that need a fixed wider encoding (e.g. an ORE construction +//! whose plaintext block size is `[u8; 8]`) should zero-extend the +//! orderable bytes upstream of the encrypter; widening is monotonic on +//! lex order so it preserves the encoding's guarantees. //! //! ## Unsigned integers (`u8`, `u128`) //! -//! Already in lex order — no sign-flip needed. `u8` is zero-extended -//! to `u64` before BE serialisation; `u128` uses its native width. +//! Already in lex order — no sign-flip needed. Native big-endian. //! //! ## Signed integers (`i8`, `i16`, `i32`, `i64`, `i128`) //! //! Each two's-complement input is mapped to its unsigned equivalent by //! flipping the sign bit at its native width (`x ^ (1 << (N-1))`), -//! widened to `u64` by zero-extension (or kept at native width for -//! `i128`), and serialised big-endian. Sign-flipping moves negatives -//! below positives (the sign bit `1` for negatives clears to `0`, vice -//! versa for positives) and preserves order within each sign class; -//! the zero-extension is a no-op on lex order because the high padding -//! bytes are constant. +//! then serialised big-endian. Sign-flipping moves negatives below +//! positives (the sign bit `1` for negatives clears to `0`, vice versa +//! for positives) and preserves order within each sign class. //! //! ## `f64` //! @@ -58,52 +49,49 @@ use crate::ToOrderableBytes; impl ToOrderableBytes for bool { - const ENCODED_LEN: usize = 8; + const ENCODED_LEN: usize = 1; type Bytes = [u8; Self::ENCODED_LEN]; fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { - // `false as u64 == 0`, `true as u64 == 1`. Lex order on the - // BE-encoded `u64` then puts `false` strictly below `true`. - (*self as u64).to_be_bytes() + // `false as u8 == 0`, `true as u8 == 1`. `false` sorts strictly + // below `true`. + [*self as u8] } } impl ToOrderableBytes for u8 { - const ENCODED_LEN: usize = 8; + const ENCODED_LEN: usize = 1; type Bytes = [u8; Self::ENCODED_LEN]; fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { - u64::from(*self).to_be_bytes() + [*self] } } impl ToOrderableBytes for i8 { - const ENCODED_LEN: usize = 8; + const ENCODED_LEN: usize = 1; type Bytes = [u8; Self::ENCODED_LEN]; fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { - let sign_flipped = (*self as u8) ^ (1u8 << 7); - u64::from(sign_flipped).to_be_bytes() + [(*self as u8) ^ (1u8 << 7)] } } impl ToOrderableBytes for i16 { - const ENCODED_LEN: usize = 8; + const ENCODED_LEN: usize = 2; type Bytes = [u8; Self::ENCODED_LEN]; fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { - let sign_flipped = (*self as u16) ^ (1u16 << 15); - u64::from(sign_flipped).to_be_bytes() + ((*self as u16) ^ (1u16 << 15)).to_be_bytes() } } impl ToOrderableBytes for i32 { - const ENCODED_LEN: usize = 8; + const ENCODED_LEN: usize = 4; type Bytes = [u8; Self::ENCODED_LEN]; fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { - let sign_flipped = (*self as u32) ^ (1u32 << 31); - u64::from(sign_flipped).to_be_bytes() + ((*self as u32) ^ (1u32 << 31)).to_be_bytes() } } @@ -162,8 +150,8 @@ mod tests { #[test] fn bool_known_anchors() { - assert_eq!(false.to_orderable_bytes(), [0; 8]); - assert_eq!(true.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0x01]); + assert_eq!(false.to_orderable_bytes(), [0x00]); + assert_eq!(true.to_orderable_bytes(), [0x01]); } #[test] @@ -175,10 +163,10 @@ mod tests { #[test] fn u8_known_anchors() { - // Zero-extend to u64 BE: the u8 value lands in the last byte. - assert_eq!(u8::MIN.to_orderable_bytes(), [0; 8]); - assert_eq!(0x42u8.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0x42]); - assert_eq!(u8::MAX.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0xFF]); + // Native: u8 is already in lex order, no transform. + assert_eq!(u8::MIN.to_orderable_bytes(), [0x00]); + assert_eq!(0x42u8.to_orderable_bytes(), [0x42]); + assert_eq!(u8::MAX.to_orderable_bytes(), [0xFF]); } #[test] @@ -198,11 +186,10 @@ mod tests { #[test] fn i8_known_anchors() { - // Sign-flip at u8 (XOR 0x80), then zero-extend to u64 BE: the - // i8 value lands in the last byte, upper seven bytes zero. - assert_eq!(i8::MIN.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0x00]); - assert_eq!(0i8.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0x80]); - assert_eq!(i8::MAX.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0, 0xFF]); + // Sign-flip at u8 (XOR 0x80) so MIN→0x00, 0→0x80, MAX→0xFF. + assert_eq!(i8::MIN.to_orderable_bytes(), [0x00]); + assert_eq!(0i8.to_orderable_bytes(), [0x80]); + assert_eq!(i8::MAX.to_orderable_bytes(), [0xFF]); } #[test] @@ -222,17 +209,10 @@ mod tests { #[test] fn i16_known_anchors() { - // Sign-flip at u16, then zero-extend to u64 BE: the i16 value - // lands in the low two bytes, with the upper six bytes zero. - assert_eq!( - i16::MIN.to_orderable_bytes(), - [0, 0, 0, 0, 0, 0, 0x00, 0x00] - ); - assert_eq!(0i16.to_orderable_bytes(), [0, 0, 0, 0, 0, 0, 0x80, 0x00]); - assert_eq!( - i16::MAX.to_orderable_bytes(), - [0, 0, 0, 0, 0, 0, 0xFF, 0xFF] - ); + // Sign-flip at u16 (XOR 0x8000), then BE. + assert_eq!(i16::MIN.to_orderable_bytes(), [0x00, 0x00]); + assert_eq!(0i16.to_orderable_bytes(), [0x80, 0x00]); + assert_eq!(i16::MAX.to_orderable_bytes(), [0xFF, 0xFF]); } #[test] @@ -252,20 +232,10 @@ mod tests { #[test] fn i32_known_anchors() { - // Sign-flip at u32, then zero-extend to u64 BE: the i32 value - // lands in the low four bytes, with the upper four bytes zero. - assert_eq!( - i32::MIN.to_orderable_bytes(), - [0, 0, 0, 0, 0x00, 0x00, 0x00, 0x00] - ); - assert_eq!( - 0i32.to_orderable_bytes(), - [0, 0, 0, 0, 0x80, 0x00, 0x00, 0x00] - ); - assert_eq!( - i32::MAX.to_orderable_bytes(), - [0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF] - ); + // Sign-flip at u32 (XOR 0x8000_0000), then BE. + assert_eq!(i32::MIN.to_orderable_bytes(), [0x00, 0x00, 0x00, 0x00]); + assert_eq!(0i32.to_orderable_bytes(), [0x80, 0x00, 0x00, 0x00]); + assert_eq!(i32::MAX.to_orderable_bytes(), [0xFF, 0xFF, 0xFF, 0xFF]); } #[test] From 79af9d374deec9e563ae64aea8507ec81ca30b6b Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 5 May 2026 10:04:17 +1000 Subject: [PATCH 08/11] feat(orderable-bytes): impl ToOrderableBytes for u16/u32/u64 Adds the remaining native unsigned integer widths so the trait covers every primitive listed in the module doc. Each impl is the no-op big-endian path used for u8/u128 (already in lex order). Also adds a dedicated `bool` subsection to the module doc for symmetry with the other per-type encoding-strategy paragraphs. --- packages/orderable-bytes/src/primitive.rs | 127 +++++++++++++++++++++- 1 file changed, 123 insertions(+), 4 deletions(-) diff --git a/packages/orderable-bytes/src/primitive.rs b/packages/orderable-bytes/src/primitive.rs index 66a0435..02e4578 100644 --- a/packages/orderable-bytes/src/primitive.rs +++ b/packages/orderable-bytes/src/primitive.rs @@ -5,9 +5,9 @@ //! Each impl emits the type's native byte width — no padding: //! //! - `bool`, `u8`, `i8` → `[u8; 1]` -//! - `i16` → `[u8; 2]` -//! - `i32` → `[u8; 4]` -//! - `i64`, `f64` → `[u8; 8]` +//! - `i16`, `u16` → `[u8; 2]` +//! - `i32`, `u32` → `[u8; 4]` +//! - `i64`, `u64`, `f64` → `[u8; 8]` //! - `u128`, `i128` → `[u8; 16]` //! //! Consumers that need a fixed wider encoding (e.g. an ORE construction @@ -15,7 +15,11 @@ //! orderable bytes upstream of the encrypter; widening is monotonic on //! lex order so it preserves the encoding's guarantees. //! -//! ## Unsigned integers (`u8`, `u128`) +//! ## `bool` +//! +//! Encoded as `false → 0x00`, `true → 0x01`. Already in lex order. +//! +//! ## Unsigned integers (`u8`, `u16`, `u32`, `u64`, `u128`) //! //! Already in lex order — no sign-flip needed. Native big-endian. //! @@ -77,6 +81,15 @@ impl ToOrderableBytes for i8 { } } +impl ToOrderableBytes for u16 { + const ENCODED_LEN: usize = 2; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + self.to_be_bytes() + } +} + impl ToOrderableBytes for i16 { const ENCODED_LEN: usize = 2; type Bytes = [u8; Self::ENCODED_LEN]; @@ -86,6 +99,15 @@ impl ToOrderableBytes for i16 { } } +impl ToOrderableBytes for u32 { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + self.to_be_bytes() + } +} + impl ToOrderableBytes for i32 { const ENCODED_LEN: usize = 4; type Bytes = [u8; Self::ENCODED_LEN]; @@ -95,6 +117,15 @@ impl ToOrderableBytes for i32 { } } +impl ToOrderableBytes for u64 { + const ENCODED_LEN: usize = 8; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + self.to_be_bytes() + } +} + impl ToOrderableBytes for i64 { const ENCODED_LEN: usize = 8; type Bytes = [u8; Self::ENCODED_LEN]; @@ -205,6 +236,28 @@ mod tests { } } + // --- u16 --- + + #[test] + fn u16_known_anchors() { + assert_eq!(u16::MIN.to_orderable_bytes(), [0x00, 0x00]); + assert_eq!(u16::MAX.to_orderable_bytes(), [0xFF, 0xFF]); + assert_eq!(0x1234u16.to_orderable_bytes(), [0x12, 0x34]); + } + + #[test] + fn u16_byte_order_matches_natural_order() { + let ascending = [u16::MIN, 1, 256, 10000, u16::MAX - 1, u16::MAX]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + // --- i16 --- #[test] @@ -228,6 +281,39 @@ mod tests { } } + // --- u32 --- + + #[test] + fn u32_known_anchors() { + assert_eq!(u32::MIN.to_orderable_bytes(), [0x00; 4]); + assert_eq!(u32::MAX.to_orderable_bytes(), [0xFF; 4]); + assert_eq!( + 0x1234_5678u32.to_orderable_bytes(), + [0x12, 0x34, 0x56, 0x78] + ); + } + + #[test] + fn u32_byte_order_matches_natural_order() { + let ascending = [ + u32::MIN, + 1, + 1 << 8, + 1 << 16, + 1 << 24, + u32::MAX - 1, + u32::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + // --- i32 --- #[test] @@ -251,6 +337,39 @@ mod tests { } } + // --- u64 --- + + #[test] + fn u64_known_anchors() { + assert_eq!(u64::MIN.to_orderable_bytes(), [0x00; 8]); + assert_eq!(u64::MAX.to_orderable_bytes(), [0xFF; 8]); + let one = 1u64.to_orderable_bytes(); + let mut expected_one = [0u8; 8]; + expected_one[7] = 1; + assert_eq!(one, expected_one); + } + + #[test] + fn u64_byte_order_matches_natural_order() { + let ascending = [ + u64::MIN, + 1, + 1 << 16, + 1 << 32, + 1 << 48, + u64::MAX - 1, + u64::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{} < {} failed", + window[0], + window[1] + ); + } + } + // --- i64 --- #[test] From 271d27d62995231844736e66860f9c61691f9428 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 5 May 2026 10:10:38 +1000 Subject: [PATCH 09/11] feat(orderable-bytes): impl ToOrderableBytes for char and f32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `char` encodes as the big-endian bytes of its `u32` Unicode scalar value — Rust's `Ord` for `char` is by code point and surrogates aren't representable, so the native u32 lex order is exactly what we need. `f32` reuses the f64 monotonic mapping (sign-bit flip for positives, all-bits flip for negatives, -0.0 canonicalised to +0.0) narrowed to u32. Module doc gains a `char` section and folds the float docs into a single IEEE 754 section covering both widths. --- packages/orderable-bytes/src/primitive.rs | 139 ++++++++++++++++++++-- 1 file changed, 127 insertions(+), 12 deletions(-) diff --git a/packages/orderable-bytes/src/primitive.rs b/packages/orderable-bytes/src/primitive.rs index 02e4578..fb085ae 100644 --- a/packages/orderable-bytes/src/primitive.rs +++ b/packages/orderable-bytes/src/primitive.rs @@ -1,12 +1,12 @@ //! Canonical, order-preserving fixed-length byte encodings for the -//! primitives `bool`, `u8`, `i8`, `i16`, `i32`, `i64`, `u128`, `i128`, -//! and the IEEE 754 double `f64`. +//! primitives `bool`, `char`, `u8`, `i8`, `i16`, `i32`, `i64`, `u128`, +//! `i128`, and the IEEE 754 floats `f32` and `f64`. //! //! Each impl emits the type's native byte width — no padding: //! //! - `bool`, `u8`, `i8` → `[u8; 1]` //! - `i16`, `u16` → `[u8; 2]` -//! - `i32`, `u32` → `[u8; 4]` +//! - `i32`, `u32`, `char`, `f32` → `[u8; 4]` //! - `i64`, `u64`, `f64` → `[u8; 8]` //! - `u128`, `i128` → `[u8; 16]` //! @@ -31,10 +31,19 @@ //! positives (the sign bit `1` for negatives clears to `0`, vice versa //! for positives) and preserves order within each sign class. //! -//! ## `f64` +//! ## `char` //! -//! IEEE 754 doubles are mapped to a lex-orderable `u64` using the -//! standard monotonic encoding: +//! Encoded as the big-endian bytes of the underlying `u32` Unicode +//! scalar value (`*self as u32`). Rust's `Ord` impl for `char` compares +//! by code point, and surrogate code points (`U+D800`..=`U+DFFF`) are +//! not representable as `char`, so the native `u32` lex order is +//! exactly the order we need. +//! +//! ## IEEE 754 floats (`f32`, `f64`) +//! +//! Each float is mapped to a lex-orderable unsigned integer of the +//! same width (`u32` for `f32`, `u64` for `f64`) using the standard +//! monotonic encoding: //! //! - Negatives flip every bit (their bit pattern's lex order is the //! reverse of magnitude order, so flipping inverts it). @@ -42,13 +51,14 @@ //! negatives in lex order). //! //! `-0.0` is canonicalised to `+0.0` before encoding so the two compare -//! byte-equal — matching `-0.0 == 0.0` on `f64`. +//! byte-equal — matching `-0.0 == 0.0` for IEEE 754. //! -//! NaN handling is unspecified. `f64` is `PartialOrd` rather than `Ord` -//! (NaN compares unordered against every value, including itself), so -//! the trait's order/equality guarantees only apply to non-NaN inputs. -//! Different NaN bit patterns will produce different bytes; consumers -//! that need a canonical NaN must canonicalise upstream. +//! NaN handling is unspecified. Floats implement `PartialOrd` rather +//! than `Ord` (NaN compares unordered against every value, including +//! itself), so the trait's order/equality guarantees only apply to +//! non-NaN inputs. Different NaN bit patterns will produce different +//! bytes; consumers that need a canonical NaN must canonicalise +//! upstream. use crate::ToOrderableBytes; @@ -153,6 +163,31 @@ impl ToOrderableBytes for i128 { } } +impl ToOrderableBytes for char { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + (*self as u32).to_be_bytes() + } +} + +impl ToOrderableBytes for f32 { + const ENCODED_LEN: usize = 4; + type Bytes = [u8; Self::ENCODED_LEN]; + + fn to_orderable_bytes(&self) -> [u8; Self::ENCODED_LEN] { + // Canonicalise -0.0 → 0.0 so the two share one byte encoding + // (their f32 equality demands byte equality under our contract). + let value = if *self == -0.0 { 0.0 } else { *self }; + let bits = value.to_bits(); + // Branchless monotonic mapping (see `f64` impl for derivation). + let sign_extension = (bits as i32 >> 31) as u32; + let mask = sign_extension | (1u32 << 31); + (bits ^ mask).to_be_bytes() + } +} + impl ToOrderableBytes for f64 { const ENCODED_LEN: usize = 8; type Bytes = [u8; Self::ENCODED_LEN]; @@ -470,6 +505,86 @@ mod tests { } } + // --- char --- + + #[test] + fn char_known_anchors() { + // 'A' = U+0041 = 0x0000_0041 BE. + assert_eq!('A'.to_orderable_bytes(), [0x00, 0x00, 0x00, 0x41]); + // '\0' = U+0000 (lowest code point). + assert_eq!('\0'.to_orderable_bytes(), [0x00, 0x00, 0x00, 0x00]); + // char::MAX = U+10FFFF (highest valid scalar value). + assert_eq!(char::MAX.to_orderable_bytes(), [0x00, 0x10, 0xFF, 0xFF]); + } + + #[test] + fn char_byte_order_matches_natural_order() { + // Spans ASCII, BMP, and supplementary planes (above the surrogate gap). + let ascending = [ + '\0', + '0', + 'A', + 'a', + '\u{7F}', + '\u{D7FF}', + '\u{E000}', + '\u{1F600}', + char::MAX, + ]; + for window in ascending.windows(2) { + assert!( + window[0].to_orderable_bytes() < window[1].to_orderable_bytes(), + "{:?} < {:?} failed", + window[0], + window[1] + ); + } + } + + // --- f32 --- + + #[test] + fn f32_zero_canonical_bytes() { + // +0.0 → 0x8000_0000 (sign-bit-only flip on all-zero bits). + assert_eq!(0.0f32.to_orderable_bytes(), [0x80, 0x00, 0x00, 0x00]); + } + + #[test] + fn f32_negative_zero_canonicalises_with_zero() { + assert_eq!((-0.0f32).to_orderable_bytes(), 0.0f32.to_orderable_bytes()); + } + + #[test] + fn f32_byte_order_matches_natural_order() { + let ascending = [ + f32::NEG_INFINITY, + f32::MIN, + -1e30, + -1.0, + -f32::MIN_POSITIVE, + 0.0, + f32::MIN_POSITIVE, + 1.0, + 1e30, + f32::MAX, + f32::INFINITY, + ]; + for window in ascending.windows(2) { + let a = window[0].to_orderable_bytes(); + let b = window[1].to_orderable_bytes(); + assert!(a < b, "{} < {} failed", window[0], window[1]); + } + } + + #[test] + fn f32_subnormals_sort_above_zero_below_normals() { + // Smallest positive subnormal (`f32::from_bits(1)`) must land + // strictly between 0.0 and the smallest positive normal. + let subnormal = f32::from_bits(1); + assert!(0.0f32.to_orderable_bytes() < subnormal.to_orderable_bytes()); + assert!(subnormal.to_orderable_bytes() < f32::MIN_POSITIVE.to_orderable_bytes()); + } + // --- f64 --- #[test] From ad90f0bfb4c8881d3cada6dcebd34c98fae228c7 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 5 May 2026 10:35:11 +1000 Subject: [PATCH 10/11] feat(ore-rs): impl OreEncrypt for all primitives via ToOrderableBytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrates the existing `u32`/`u64`/`f64` `OreEncrypt` impls to call `orderable_bytes::ToOrderableBytes::to_orderable_bytes()` and adds impls for the remaining 11 primitives covered by the trait: `bool`, `u8`/`u16`/`u128`, `i8`/`i16`/`i32`/`i64`/`i128`, `char`, and `f32`. The new `f64` byte path is bit-for-bit identical to the previous `ToOrderedInteger::map_to::` mapping (same sign-bit canonicalisation, same XOR mask, same `-0.0 → +0.0` collapse), so on-disk ciphertexts remain compatible. Each impl follows the same trait-driven shape used by the chrono and decimal consumers, with encoded lengths lifted into module-level `const`s because stable Rust won't accept `::ENCODED_LEN` directly in const-generic position. --- packages/ore-rs/src/encrypt.rs | 109 +++++++++++++++++---------------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/packages/ore-rs/src/encrypt.rs b/packages/ore-rs/src/encrypt.rs index e9517e5..ed37ddb 100644 --- a/packages/ore-rs/src/encrypt.rs +++ b/packages/ore-rs/src/encrypt.rs @@ -1,17 +1,19 @@ use crate::ciphertext::*; -use crate::convert::ToOrderedInteger; use crate::PlainText; use crate::{OreCipher, OreError}; +use orderable_bytes::ToOrderableBytes; /// Type-directed entry point for encrypting plaintext values with a given /// [`OreCipher`]. /// /// Each implementation knows how to canonicalise its target type into the -/// fixed-size byte plaintext expected by the cipher (e.g. big-endian bytes -/// for `u64`, an order-preserving 8-byte mapping for `f64`, the 14-byte -/// scientific-form encoding for `Decimal`). The associated output types -/// pin the resulting ciphertext shape, with `LeftOutput` the query-only -/// half and `FullOutput` the full ciphertext suitable for storage. +/// fixed-size byte plaintext expected by the cipher. For primitives and +/// the `chrono` / `decimal` value types the canonicalisation is delegated +/// to [`orderable_bytes::ToOrderableBytes`], which guarantees the encoded +/// bytes preserve the type's natural total order under lexicographic +/// comparison. The associated output types pin the resulting ciphertext +/// shape, with `LeftOutput` the query-only half and `FullOutput` the full +/// ciphertext suitable for storage. pub trait OreEncrypt { /// Output type produced by [`encrypt_left`](Self::encrypt_left). type LeftOutput: OreOutput; @@ -26,59 +28,58 @@ pub trait OreEncrypt { fn encrypt(&self, input: &T) -> Result; } -impl OreEncrypt for u64 { - /* Note that Rust currently doesn't allow - * generic associated types so this ia a bit verbose! */ - type LeftOutput = Left; - type FullOutput = CipherText; +// `Left` and `CipherText` need a const-generic `N` known at +// the type level. Stable Rust can't accept `::ENCODED_LEN` +// directly in that position (it would require `generic_const_exprs`), so +// we lift each primitive's encoded length into a free `const` and name +// that const in the associated type. Same idiom as `chrono.rs` / +// `decimal.rs`. +const BOOL_LEN: usize = ::ENCODED_LEN; +const U8_LEN: usize = ::ENCODED_LEN; +const I8_LEN: usize = ::ENCODED_LEN; +const U16_LEN: usize = ::ENCODED_LEN; +const I16_LEN: usize = ::ENCODED_LEN; +const U32_LEN: usize = ::ENCODED_LEN; +const I32_LEN: usize = ::ENCODED_LEN; +const U64_LEN: usize = ::ENCODED_LEN; +const I64_LEN: usize = ::ENCODED_LEN; +const U128_LEN: usize = ::ENCODED_LEN; +const I128_LEN: usize = ::ENCODED_LEN; +const CHAR_LEN: usize = ::ENCODED_LEN; +const F32_LEN: usize = ::ENCODED_LEN; +const F64_LEN: usize = ::ENCODED_LEN; - fn encrypt_left(&self, cipher: &T) -> Result - where - T::LeftBlockType: CipherTextBlock, - { - let bytes = self.to_be_bytes(); - cipher.encrypt_left(&bytes) - } - - fn encrypt(&self, cipher: &T) -> Result - where - T::LeftBlockType: CipherTextBlock, - T::RightBlockType: CipherTextBlock, - { - let bytes = self.to_be_bytes(); - cipher.encrypt(&bytes) - } -} +macro_rules! impl_ore_encrypt_via_orderable_bytes { + ($type:ty, $len_const:ident) => { + impl OreEncrypt for $type { + type LeftOutput = Left; + type FullOutput = CipherText; -impl OreEncrypt for u32 { - type LeftOutput = Left; - type FullOutput = CipherText; + fn encrypt_left(&self, cipher: &T) -> Result { + cipher.encrypt_left(&self.to_orderable_bytes()) + } - fn encrypt_left(&self, cipher: &T) -> Result { - let bytes = self.to_be_bytes(); - cipher.encrypt_left(&bytes) - } - - fn encrypt(&self, cipher: &T) -> Result { - let bytes = self.to_be_bytes(); - cipher.encrypt(&bytes) - } + fn encrypt(&self, cipher: &T) -> Result { + cipher.encrypt(&self.to_orderable_bytes()) + } + } + }; } -impl OreEncrypt for f64 { - type LeftOutput = Left; - type FullOutput = CipherText; - - fn encrypt_left(&self, cipher: &T) -> Result { - let plaintext: u64 = self.map_to(); - plaintext.encrypt_left(cipher) - } - - fn encrypt(&self, cipher: &T) -> Result { - let plaintext: u64 = self.map_to(); - plaintext.encrypt(cipher) - } -} +impl_ore_encrypt_via_orderable_bytes!(bool, BOOL_LEN); +impl_ore_encrypt_via_orderable_bytes!(u8, U8_LEN); +impl_ore_encrypt_via_orderable_bytes!(i8, I8_LEN); +impl_ore_encrypt_via_orderable_bytes!(u16, U16_LEN); +impl_ore_encrypt_via_orderable_bytes!(i16, I16_LEN); +impl_ore_encrypt_via_orderable_bytes!(u32, U32_LEN); +impl_ore_encrypt_via_orderable_bytes!(i32, I32_LEN); +impl_ore_encrypt_via_orderable_bytes!(u64, U64_LEN); +impl_ore_encrypt_via_orderable_bytes!(i64, I64_LEN); +impl_ore_encrypt_via_orderable_bytes!(u128, U128_LEN); +impl_ore_encrypt_via_orderable_bytes!(i128, I128_LEN); +impl_ore_encrypt_via_orderable_bytes!(char, CHAR_LEN); +impl_ore_encrypt_via_orderable_bytes!(f32, F32_LEN); +impl_ore_encrypt_via_orderable_bytes!(f64, F64_LEN); impl OreEncrypt for PlainText { type LeftOutput = Left; From 3d27b7472fcebc51aa2a89657ae684921c229333 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 5 May 2026 10:35:25 +1000 Subject: [PATCH 11/11] refactor(ore-rs): drop dead convert module `ToOrderedInteger` / `FromOrderedInteger` were the legacy bridge from `f64` to a lex-orderable `u64` plaintext. Now that `OreEncrypt for f64` goes through `orderable_bytes::ToOrderableBytes`, the trait and its sole impl are unused. Remove the module and its `mod convert;` entry. --- packages/ore-rs/src/convert.rs | 72 ---------------------------------- packages/ore-rs/src/lib.rs | 1 - 2 files changed, 73 deletions(-) delete mode 100644 packages/ore-rs/src/convert.rs diff --git a/packages/ore-rs/src/convert.rs b/packages/ore-rs/src/convert.rs deleted file mode 100644 index 1741340..0000000 --- a/packages/ore-rs/src/convert.rs +++ /dev/null @@ -1,72 +0,0 @@ -/* - This following function implement an order-preserving translation of 64 bit - floats to 64 bit doubles (and the reverse operation - although that is just - used for verifying correctness). - - The 64 bit integer that is produced is a plaintext that will be ORE encrypted - later on. - - The mapping is such that the ordering of the floats will be preserved when - mapped to an unsigned integer, for example, an array of unsigned integers - dervived from a sorted array of doubles will result in no change to its - ordering when it itself is sorted. - - The mapping does not preserve any notion of the previous value after the - conversion - only ordering is preserved. - - Caveat: NaN and -ve & +ve infinity will also be mapped and ordering is not - well-defined with those values. Those values should be discarded before - converting arrays of those values. - - This post was used as a reference for building this implementation: - https://lemire.me/blog/2020/12/14/converting-floating-point-numbers-to-integers-while-preserving-order -*/ - -pub(crate) trait ToOrderedInteger { - fn map_to(&self) -> T; -} - -#[allow(dead_code)] -trait FromOrderedInteger { - fn map_from(input: T) -> Self; -} - -impl ToOrderedInteger for f64 { - fn map_to(&self) -> u64 { - // Canonicalise -0.0 to +0.0 so equal-comparing floats produce equal - // ciphertexts (IEEE-754: -0.0 == +0.0). - let num: u64 = if *self == 0.0 { 0 } else { self.to_bits() }; - // `num >> 63` is the sign bit (0 or 1). Cast to i64 (no bit change, - // both 0 and 1 fit), negate (→ 0 or -1), reinterpret as u64 (→ 0 or - // 0xFFFF_FFFF_FFFF_FFFF, the broadcast sign mask). Equivalent to - // the previous `mem::transmute` pair but `as`-cast safe. - let signed: i64 = -((num >> 63) as i64); - let mut mask: u64 = signed as u64; - mask |= 0x8000000000000000; - num ^ mask - } -} - -impl FromOrderedInteger for f64 { - fn map_from(input: u64) -> f64 { - let i = (((input >> 63) as i64) - 1) as u64; - let mask: u64 = i | 0x8000000000000000; - f64::from_bits(input ^ mask) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use quickcheck::TestResult; - - quickcheck! { - fn roundtrip(x: f64) -> TestResult { - if !x.is_nan() && x.is_finite() { - TestResult::from_bool(x == f64::map_from(x.map_to())) - } else { - TestResult::discard() - } - } - } -} diff --git a/packages/ore-rs/src/lib.rs b/packages/ore-rs/src/lib.rs index 6640949..09e2af5 100644 --- a/packages/ore-rs/src/lib.rs +++ b/packages/ore-rs/src/lib.rs @@ -139,7 +139,6 @@ #[cfg(feature = "chrono")] mod chrono; mod ciphertext; -mod convert; #[cfg(feature = "decimal")] mod decimal; mod encrypt;