Binary Data

Packable Values

i
/-- A value that can be packed into binary form. -/
inductive Value where
  | int32 : UInt32  Value
  | str   : String  Value
  deriving Repr, DecidableEq

/-- Format descriptor for unpacking: one tag per field in the byte stream. -/
inductive Fmt where
  | int32 : Fmt
  | str   : Fmt
  deriving Repr, DecidableEq

Packing Integers

i
/-- Pack a 32-bit unsigned integer as 4 bytes, least-significant byte first (little-endian),
    matching Python's struct.pack("<I", n). -/
def packInt32 (n : UInt32) : ByteArray :=
  ByteArray.mk #[
    (n           &&& 0xFF).toUInt8,
    ((n >>>  8)  &&& 0xFF).toUInt8,
    ((n >>> 16)  &&& 0xFF).toUInt8,
    ((n >>> 24)  &&& 0xFF).toUInt8
  ]

#guard packInt32 31 == ByteArray.mk #[0x1F, 0x00, 0x00, 0x00]
#guard packInt32 65 == ByteArray.mk #[0x41, 0x00, 0x00, 0x00]

Unpacking Integers

i
/-- Read a little-endian 32-bit unsigned integer from data at offset.
    Returns the value and the offset of the next byte. -/
def unpackInt32 (data : ByteArray) (offset : Nat) : Option (UInt32 × Nat) :=
  if offset + 4  data.size then
    let b0 := data[offset]!.toUInt32
    let b1 := data[offset + 1]!.toUInt32
    let b2 := data[offset + 2]!.toUInt32
    let b3 := data[offset + 3]!.toUInt32
    some (b0 ||| (b1 <<< 8) ||| (b2 <<< 16) ||| (b3 <<< 24), offset + 4)
  else
    none

#guard unpackInt32 (packInt32 31) 0 == some (31, 4)
#guard unpackInt32 (packInt32 0xDEADBEEF) 0 == some (0xDEADBEEF, 4)

Packing Strings

i
/-- Pack a String as a 4-byte little-endian length followed by its UTF-8 bytes,
    matching Python's struct.pack("i", len(s)) + s.encode("utf-8"). -/
def packStr (s : String) : ByteArray :=
  let bytes := s.toUTF8
  packInt32 bytes.size.toUInt32 ++ bytes

#guard packStr "hi" == ByteArray.mk #[0x02, 0x00, 0x00, 0x00, 0x68, 0x69]

Unpacking Strings

i
/-- Read a length-prefixed string from data at offset. -/
def unpackStr (data : ByteArray) (offset : Nat) : Option (String × Nat) :=
  match unpackInt32 data offset with
  | none             => none
  | some (len, next) =>
    let n := len.toNat
    if next + n  data.size then
      some (String.fromUTF8! (data.extract next (next + n)), next + n)
    else
      none

#guard unpackStr (packStr "hello") 0 == some ("hello", 9)

Packing a List

i
/-- Pack a single Value into bytes. -/
def packValue : Value  ByteArray
  | .int32 n => packInt32 n
  | .str s   => packStr s

/-- Pack a list of heterogeneous values into a ByteArray. -/
def pack (values : List Value) : ByteArray :=
  values.foldl (init := ByteArray.empty) fun acc v => acc ++ packValue v

Unpacking a List

i
/-- Unpack values from a ByteArray given a list of format descriptors.
    Returns none if the buffer is too short or malformed. -/
def unpack (fmts : List Fmt) (data : ByteArray) : Option (List Value) :=
  let rec go : List Fmt  Nat  List Value  Option (List Value)
    | [],             _,   acc => some acc.reverse
    | .int32 :: rest, off, acc =>
      match unpackInt32 data off with
      | none            => none
      | some (n, off')  => go rest off' (.int32 n :: acc)
    | .str :: rest,   off, acc =>
      match unpackStr data off with
      | none            => none
      | some (s, off')  => go rest off' (.str s :: acc)
  go fmts 0 []

Round-Trip Tests

i
#guard
  let values := [Value.int32 31, .int32 65]
  unpack [.int32, .int32] (pack values) == some values

#guard
  let values := [Value.str "hello", .str "Python"]
  unpack [.str, .str] (pack values) == some values

#guard
  let values := [Value.int32 31, .str "hello", .int32 65, .str "Python"]
  unpack [.int32, .str, .int32, .str] (pack values) == some values

Example Program

i
def main : IO Unit := do
  let values : List Value := [
    .int32 31,
    .int32 65,
    .str "hello",
    .str "Python"
  ]
  let packed := pack values
  IO.println s!"Packed {packed.size} bytes"
  let fmts : List Fmt := [.int32, .int32, .str, .str]
  match unpack fmts packed with
  | none    => IO.println "Error: unpack failed"
  | some vs =>
    for v in vs do
      match v with
      | .int32 n => IO.println s!"  int32: {n}"
      | .str s   => IO.println s!"  str:   {s}"

Exercises

Pack a Bool (15 min)

Add a | bool : Bool → Value constructor to Value and a matching | bool : Fmt constructor to Fmt. Represent true as the byte 1 and false as 0. Implement packBool and unpackBool, then extend packValue and unpack to handle the new constructor. Add #guard tests for round-trips of both true and false.

Detect Format Mismatch (15 min)

What does unpack [.str] (pack [Value.int32 42]) return, and why? Write a #guard that confirms the answer, then explain in a comment why that behavior could be dangerous in a real system.

Count Bytes (10 min)

Write a pure function packedSize (fmts : List Fmt) (values : List Value) : Nat that computes the number of bytes pack values would produce without actually calling pack. For int32, the size is always 4. For str, it is 4 plus the byte length of the string's UTF-8 encoding. Add a #guard that checks the result against (pack someValues).size for a mixed list.

Fix: Unpack Reads Big-Endian

i
-- unpackInt32 reads bytes in big-endian order, but packInt32 writes
-- little-endian. The values don't round-trip. Fix the shift order.
def packInt32 (n : UInt32) : ByteArray :=
  ByteArray.mk #[
    (n           &&& 0xFF).toUInt8,
    ((n >>>  8)  &&& 0xFF).toUInt8,
    ((n >>> 16)  &&& 0xFF).toUInt8,
    ((n >>> 24)  &&& 0xFF).toUInt8
  ]

def unpackInt32 (data : ByteArray) (offset : Nat) : Option (UInt32 × Nat) :=
  if offset + 4  data.size then
    let b0 := data[offset]!.toUInt32
    let b1 := data[offset + 1]!.toUInt32
    let b2 := data[offset + 2]!.toUInt32
    let b3 := data[offset + 3]!.toUInt32
    some ((b0 <<< 24) ||| (b1 <<< 16) ||| (b2 <<< 8) ||| b3, offset + 4)
  else
    none

#guard unpackInt32 (packInt32 31) 0 == some (31, 4)
hint
  • packInt32 writes the least-significant byte first (little-endian): b0=n&0xFF, b1=(n>>>8)&0xFF, …
  • unpackInt32 reads the first byte as the most-significant (b0 <<< 24), which is big-endian
  • To match, unpack should read b0 as least-significant: no shift, then b1 <<< 8, b2 <<< 16, b3 <<< 24

Fix: Pack Writes Big-Endian

i
-- packInt32 writes bytes in big-endian order (MSB first) instead of
-- the expected little-endian. Fix the byte order.
def packInt32 (n : UInt32) : ByteArray :=
  ByteArray.mk #[
    ((n >>> 24)  &&& 0xFF).toUInt8,
    ((n >>> 16)  &&& 0xFF).toUInt8,
    ((n >>>  8)  &&& 0xFF).toUInt8,
    (n           &&& 0xFF).toUInt8
  ]

def unpackInt32 (data : ByteArray) (offset : Nat) : Option (UInt32 × Nat) :=
  if offset + 4  data.size then
    let b0 := data[offset]!.toUInt32
    let b1 := data[offset + 1]!.toUInt32
    let b2 := data[offset + 2]!.toUInt32
    let b3 := data[offset + 3]!.toUInt32
    some (b0 ||| (b1 <<< 8) ||| (b2 <<< 16) ||| (b3 <<< 24), offset + 4)
  else
    none

#guard unpackInt32 (packInt32 31) 0 == some (31, 4)
hint
  • The byte array puts n>>>24 first and n&0xFF last, which is big-endian (most-significant byte first)
  • Little-endian means least-significant byte first: swap the array order

Fix: Empty Format List Returns None

i
-- unpack of an empty format list should return some [] but it returns
-- none. Fix the base case.
inductive Value where | int32 : UInt32  Value | str : String  Value
inductive Fmt where | int32 : Fmt | str : Fmt

def unpack (fmts : List Fmt) (_data : ByteArray) : Option (List Value) :=
  match fmts with
  | []     => none
  | _ :: _ => none   -- stub; real implementation would recurse

#guard unpack [] (ByteArray.mk #[1, 2, 3]) == some []
hint
  • Unpacking nothing from any byte array should succeed and return an empty list
  • The base case [] => none should be [] => some []
  • Like reading zero fields from a buffer: there's nothing to fail

Fix: Unpack String Without Bounds Check

i
-- unpackStr doesn't check if the string body extends past the end of
-- the byte array. Add a bounds check so it returns none for truncated data.
def unpackInt32 (data : ByteArray) (offset : Nat) : Option (UInt32 × Nat) :=
  if offset + 4  data.size then
    let b0 := data[offset]!.toUInt32
    let b1 := data[offset + 1]!.toUInt32
    let b2 := data[offset + 2]!.toUInt32
    let b3 := data[offset + 3]!.toUInt32
    some (b0 ||| (b1 <<< 8) ||| (b2 <<< 16) ||| (b3 <<< 24), offset + 4)
  else
    none

def unpackStr (data : ByteArray) (offset : Nat) : Option (String × Nat) :=
  match unpackInt32 data offset with
  | none             => none
  | some (len, next) =>
    let n := len.toNat
    some (String.fromUTF8! (data.extract next (next + n)), next + n)

-- This should return none because the data is shorter than the length header claims.
#guard unpackStr (ByteArray.mk #[0xFF, 0x00, 0x00, 0x00]) 0 == none
hint
  • unpackInt32 returns a length, but that length might point past the end of the buffer
  • After reading len, check that next + n ≤ data.size before calling data.extract
  • If the bounds check fails, return none to signal corrupted data

Fix: Pack Value Truncates Length

i
-- packValue truncates the string length to 16 bits instead of 32 bits.
-- The length header is wrong for strings longer than 65535 bytes. Fix the
-- length type so it uses the full 32-bit range.
inductive Value where | int32 : UInt32  Value | str : String  Value

def packInt32 (n : UInt32) : ByteArray :=
  ByteArray.mk #[
    (n           &&& 0xFF).toUInt8,
    ((n >>>  8)  &&& 0xFF).toUInt8,
    ((n >>> 16)  &&& 0xFF).toUInt8,
    ((n >>> 24)  &&& 0xFF).toUInt8
  ]

def packValue : Value  ByteArray
  | .int32 n => packInt32 n
  | .str s   =>
      let bytes := s.toUTF8
      packInt32 (bytes.size.toUInt16.toUInt32) ++ bytes

-- A 4-character string should encode length=4, not truncated.
#guard packValue (.str "test") ==
  ByteArray.mk #[0x04, 0x00, 0x00, 0x00, 0x74, 0x65, 0x73, 0x74]
hint
  • bytes.size.toUInt16 converts the byte count to a 16-bit integer before widening back to 32 bits
  • This truncates any length larger than 65535
  • Remove the .toUInt16 step and use bytes.size.toUInt32 directly

Write: Hex Dump

i
-- Write a function that converts a ByteArray to a hex string.
-- Each byte becomes two hex characters (e.g., 0x1F -> "1f").
def hexDump (data : ByteArray) : String :=
  ""

def hexDigit (n : UInt8) : String :=
  let chars := "0123456789abcdef"
  s!"{chars[(n >>> 4).toNat]!}{chars[(n &&& 0xF).toNat]!}"

#guard hexDump (ByteArray.mk #[0x1F, 0xA0]) == "1fa0"
#guard hexDump ByteArray.empty == ""
#guard hexDump (ByteArray.mk #[0x00, 0xFF]) == "00ff"
hint
  • Iterate over each byte in the ByteArray using a foldl or recursion
  • Convert each byte to two hex characters using the provided hexDigit helper
  • Concatenate results: start with "", append each byte's hex representation

Write: Generate Format Descriptors

i
-- Write a function that takes a list of Values and produces
-- the matching list of Fmt descriptors.
inductive Value where | int32 : UInt32  Value | str : String  Value
inductive Fmt where | int32 : Fmt | str : Fmt

def fmtsOf (vals : List Value) : List Fmt :=
  []

#guard fmtsOf [Value.int32 0, Value.str ""] == [Fmt.int32, Fmt.str]
#guard fmtsOf [Value.str "x", Value.int32 42, Value.str "y"] == [Fmt.str, Fmt.int32, Fmt.str]
#guard fmtsOf [] == []
hint
  • Map each Value constructor to its corresponding Fmt constructor
  • Value.int32 maps to Fmt.int32, Value.str maps to Fmt.str
  • Use List.map with a function that pattern-matches on the Value

Write: Compute Packed Size

i
-- Write a function that computes the number of bytes a list of Values
-- would occupy when packed, without actually calling 'pack'.
inductive Value where | int32 : UInt32  Value | str : String  Value

def packedSize (vals : List Value) : Nat :=
  0

#guard packedSize [Value.int32 42, Value.int32 65] == 8
#guard packedSize [Value.str "hi"] == 6    -- 4 (length) + 2 (bytes)
#guard packedSize [] == 0
hint
  • Each int32 is always 4 bytes
  • Each str is 4 bytes (length header) plus the UTF-8 byte length of the string
  • Use List.foldl to accumulate the total, or map then sum
  • For string length: s.toUTF8.size

Write: Unpack One Value

i
-- Write a convenience function that unpacks exactly one value.
-- It should wrap the 'unpack' function to only accept single-element
-- format lists and return the value directly.

-- Assume these definitions exist (from code.lean):
--   unpack : List Fmt → ByteArray → Option (List Value)

-- Write a wrapper that expects exactly one Fmt and returns Option Value.
inductive Value where | int32 : UInt32  Value | str : String  Value

def unpackOne (fmt : Fmt) (data : ByteArray) : Option Value :=
  none

inductive Fmt where | int32 : Fmt | str : Fmt
hint
  • Create a single-element format list [fmt] and call unpack
  • Match on the result: some [v] returns some v, some _ or none returns none
  • Since unpack is defined in code.lean, you'll need to write a self-contained version or assume it's available