Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions docs/plans/bigint_enhancement.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,11 +273,22 @@ hard-coded across `src/decimo/bigint/` — the `List[UInt32]` field and every
signature, the `1 << 32` / `0xFFFF_FFFF` / `>> 32` literals, the 4×UInt32
NEON width, `_count_leading_zeros`, the base-10 ↔ base-2^k chunking in
`from_string` / `to_string` (9 vs 19 digits per limb, the hard part), and
`BigInt10` bit-layout interop. If I do it, I will first introduce
`BigBase` / `DoubleBigBase` / `BITS` / `BASE` / `MASK` and replace every
literal while keeping the limb at uint32, a pure and testable refactor with
no behaviour change, then flip to uint64 and fix the base-conversion and
SIMD fallout behind the test suite.
`BigInt10` bit-layout interop. One place actually gets *simpler*:
`from_integral_scalar` today branches per input dtype
(uint8/16/32/64/128/256, signed variants) only because the word extraction
is hard-coded to 32 bits. With a `BITS`-parametric limb it collapses to one
generic peel loop over `N_LIMBS = ceil(bitwidthof(dtype) / BITS)`,
`@parameter for`-unrolled, that works for any input dtype and either limb
width. The one care point is the input-width == limb-width boundary (e.g.
a `UInt64` input with 64-bit limbs): guard the mask with `~0` and skip the
final `>> BITS` so it never shifts by the full width, and take the
magnitude via an unsigned negate so `Int.MIN` does not overflow. Probed
2026-06-19: the loop compiles and gives correct limbs for `u8`, `u64`,
`u128`, and negative `i64` at both `BITS = 32` and `BITS = 64`. If I do the
migration, I will first introduce `BigBase` / `DoubleBigBase` / `BITS` /
`BASE` / `MASK` and replace every literal while keeping the limb at uint32,
a pure and testable refactor with no behaviour change, then flip to uint64
and fix the base-conversion and SIMD fallout behind the test suite.

**T-W1 — base-2^64 limbs. Open, low priority, unproven.**

Expand Down
205 changes: 55 additions & 150 deletions src/decimo/bigint/bigint.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import decimo.bigint.number_theory as bigint_number_theory
import decimo.str as decimo_str
from decimo.bigint10.bigint10 import BigInt10
from decimo.biguint.biguint import BigUInt
from decimo.utility import unsigned_counterpart
from decimo.errors import (
ConversionError,
OverflowError,
Expand Down Expand Up @@ -281,161 +282,65 @@ struct BigInt(
if value == 0:
return Self()

# --- Unsigned types: direct word extraction via bit ops ---

comptime if dtype == DType.uint8 or dtype == DType.uint16:
# Fits in 1 word
return Self(raw_words=[UInt32(value)], sign=False)

elif dtype == DType.uint32:
return Self(raw_words=[UInt32(value)], sign=False)

elif dtype == DType.uint64:
var words = List[UInt32](capacity=2)
words.append(UInt32(value & 0xFFFF_FFFF))
var hi = UInt32(value >> 32)
if hi != 0:
words.append(hi)
return Self(raw_words=words^, sign=False)

elif dtype == DType.uint128:
var words = List[UInt32](capacity=4)
var remaining = value
while remaining != 0:
words.append(UInt32(remaining & 0xFFFF_FFFF))
remaining >>= 32
return Self(raw_words=words^, sign=False)

elif dtype == DType.uint256:
var words = List[UInt32](capacity=8)
var remaining = value
while remaining != 0:
words.append(UInt32(remaining & 0xFFFF_FFFF))
remaining >>= 32
return Self(raw_words=words^, sign=False)

# --- Platform-sized UInt (pointer width, 32- or 64-bit) ---

elif dtype == DType.uint:
comptime if size_of[Scalar[DType.uint]]() == 4:
# 32-bit platform: same as uint32
return Self(raw_words=[UInt32(value)], sign=False)
elif size_of[Scalar[DType.uint]]() == 8:
# 64-bit platform: same as uint64
var words = List[UInt32](capacity=2)
words.append(UInt32(value & 0xFFFF_FFFF))
var hi = UInt32(value >> 32)
if hi != 0:
words.append(hi)
return Self(raw_words=words^, sign=False)
else:
comptime assert False, "unsupported platform UInt size"

# --- Signed types <= 64 bits: convert magnitude to UInt64 ---

elif dtype == DType.int8 or dtype == DType.int16:
# Magnitude fits in 1 word
if value < 0:
return Self(raw_words=[UInt32(-Int32(value))], sign=True)
else:
return Self(raw_words=[UInt32(value)], sign=False)

elif dtype == DType.int32:
if value < 0:
var magnitude = UInt64(0) - UInt64(value)
var words = List[UInt32](capacity=2)
words.append(UInt32(magnitude & 0xFFFF_FFFF))
var hi = UInt32(magnitude >> 32)
if hi != 0:
words.append(hi)
return Self(raw_words=words^, sign=True)
else:
return Self(raw_words=[UInt32(value)], sign=False)

elif dtype == DType.int64:
var sign = value < 0
var magnitude: UInt64
if sign:
magnitude = UInt64(0) - UInt64(value)
else:
magnitude = UInt64(value)
var words = List[UInt32](capacity=2)
words.append(UInt32(magnitude & 0xFFFF_FFFF))
var hi = UInt32(magnitude >> 32)
if hi != 0:
words.append(hi)
return Self(raw_words=words^, sign=sign)

# --- Platform-sized Int (pointer width, 32- or 64-bit) ---

elif dtype == DType.int:
comptime if size_of[Scalar[DType.int]]() == 4:
# 32-bit platform: same as int32
if value < 0:
var magnitude = UInt64(0) - UInt64(value)
var words = List[UInt32](capacity=2)
words.append(UInt32(magnitude & 0xFFFF_FFFF))
var hi = UInt32(magnitude >> 32)
if hi != 0:
words.append(hi)
return Self(raw_words=words^, sign=True)
else:
return Self(raw_words=[UInt32(value)], sign=False)
elif size_of[Scalar[DType.int]]() == 8:
# 64-bit platform: same as int64
var sign = value < 0
var magnitude: UInt64
if sign:
magnitude = UInt64(0) - UInt64(value)
else:
magnitude = UInt64(value)
var words = List[UInt32](capacity=2)
words.append(UInt32(magnitude & 0xFFFF_FFFF))
var hi = UInt32(magnitude >> 32)
if hi != 0:
words.append(hi)
return Self(raw_words=words^, sign=sign)
else:
comptime assert False, "unsupported platform Int size"

# --- Int128: use division to extract 32-bit chunks ---

elif dtype == DType.int128:
var sign = value < 0
var words = List[UInt32](capacity=4)
var rem = Int128(value)
# Determine the sign of the value
var sign = False
comptime if dtype.is_signed():
sign = value < 0

# Keep the magnitude in an unsigned word of the same width.
# The unsigned counterpart has the same bit width, just a larger range.
comptime unsigned_dtype = unsigned_counterpart[dtype]()
var magnitude: Scalar[unsigned_dtype]

# [Mojo Miji]
# Use the overflow trick here:
# Bit at position SIGNED_MAX + 1 will be interpreted by SIGNED type
# as SIGNED_MIN, and then it increases until it reaches -1.
# So bit position of SIGNED negative value x (x < 0) is
# SIGNED_MAX + 1 + |SIGNED_MIN| - |x|
# = SIGNED_MAX + 1 + (SIGNED_MAX + 1) + x
# = 2 * SIGNED_MAX + 2 + x
# So UNSIGNED 0 - (bit position of SIGNED x)
# = UNSIGNED_MAX + 1 - (2 * SIGNED_MAX + 2 + x)
# = UNSIGNED_MAX + 1 - 2 * (UNSIGNED_MAX - 1) / 2 - 2 -x
# = UNSIGNED_MAX + 1 - UNSIGNED_MAX + 1 - 2 - x
# = - x
# = |x|
# Yes, it is the magnitude of the signed negative value x.
comptime if dtype.is_signed():
if sign:
while rem != 0:
var quotient = rem // Int128(-0x1_0000_0000)
var word_val = rem % Int128(-0x1_0000_0000)
words.append(UInt32(-word_val))
rem = -quotient
magnitude = Scalar[unsigned_dtype](0) - Scalar[unsigned_dtype](
value
)
else:
while rem != 0:
words.append(UInt32(rem & 0xFFFF_FFFF))
rem >>= 32
return Self(raw_words=words^, sign=sign)
magnitude = Scalar[unsigned_dtype](value)
else:
magnitude = Scalar[unsigned_dtype](value)

# Split the magnitude into base-2^32 words, least significant first.
# The peeling loop below is parameterized by `BITS_PER_WORD` for
# future extension to other word sizes (e.g. 64-bit words).
comptime value_bits = size_of[Scalar[unsigned_dtype]]() * 8
comptime number_of_words = (
value_bits + Self.BITS_PER_WORD - 1
) // Self.BITS_PER_WORD # Trick to round up division
var words = List[UInt32](capacity=number_of_words)

comptime for i in range(number_of_words):
words.append(
UInt32(magnitude & Scalar[unsigned_dtype](Self.WORD_MAX))
)

# --- Int256: use division to extract 32-bit chunks ---
comptime if i < number_of_words - 1: # No need after reading the last word
magnitude >>= (
Self.BITS_PER_WORD
) # Pop the least significant bits (word)

elif dtype == DType.int256:
var sign = value < 0
var words = List[UInt32](capacity=8)
var rem = Int256(value)
if sign:
while rem != 0:
var quotient = rem // Int256(-0x1_0000_0000)
var word_val = rem % Int256(-0x1_0000_0000)
words.append(UInt32(-word_val))
rem = -quotient
else:
while rem != 0:
words.append(UInt32(rem & 0xFFFF_FFFF))
rem >>= 32
return Self(raw_words=words^, sign=sign)
# Trim the leading zero words, but keep at least one.
while len(words) > 1 and words[len(words) - 1] == 0:
_ = words.pop()

else:
comptime assert False, "unsupported integral dtype"
return Self(raw_words=words^, sign=sign)

@staticmethod
def from_string(value: String) raises -> Self:
Expand Down
58 changes: 58 additions & 0 deletions src/decimo/utility.mojo
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# ===----------------------------------------------------------------------=== #
# Copyright 2025-2026 Yuhao Zhu
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #

"""Implements small utilities and helpers that are used in multiple places
in the codebase.
"""


def unsigned_counterpart[dtype: DType]() -> DType where dtype.is_integral():
"""The unsigned dtype with the same bit width as `dtype`.

A signed dtype maps to its unsigned sibling of equal width; an already
unsigned dtype maps to itself. Handy when a value's magnitude has to
live in an unsigned word so that the most negative value still fits.

Constraints:
`dtype` must be an integral dtype.

Parameters:
dtype: The integral dtype to find the unsigned counterpart for.

Returns:
The unsigned dtype with the same bit width as `dtype`.
"""
comptime if dtype == DType.int8:
return DType.uint8
elif dtype == DType.int16:
return DType.uint16
elif dtype == DType.int32:
return DType.uint32
elif dtype == DType.int64:
return DType.uint64
elif dtype == DType.int128:
return DType.uint128
elif dtype == DType.int256:
return DType.uint256
elif dtype == DType.int:
return DType.uint
else:
# Already unsigned: uint8 / uint16 / uint32 / uint64 / uint128 /
# uint256 and the platform-sized `uint` are their own counterpart.
comptime assert (
dtype.is_unsigned()
), "unsigned_counterpart: unexpected signed integral dtype"
return dtype
Loading