Skip to content

Commit e88185d

Browse files
authored
Fix leading zero handling of CIDs in DAG-CBOR (#98)
1 parent 2538b22 commit e88185d

3 files changed

Lines changed: 38 additions & 9 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "libipld"
3-
version = "3.3.1"
3+
version = "3.3.2"
44
edition = "2021"
55
license = "MIT"
66
description = "Python binding to the Rust IPLD library"

pytests/test_dag_cbor.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,13 @@ def test_encode_tag_negative_bignum() -> None:
267267
libipld.encode_dag_cbor(bignum)
268268

269269
assert 'number out of range' in str(exc_info.value).lower()
270+
271+
272+
def test_roundtrip_valid_cid_with_short_tag() -> None:
273+
encoded_hex = 'd82a582500015512205891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03'
274+
encoded_bytes = bytes.fromhex(encoded_hex)
275+
276+
decoded = libipld.decode_dag_cbor(encoded_bytes)
277+
encoded = libipld.encode_dag_cbor(decoded)
278+
279+
assert encoded == encoded_bytes

src/lib.rs

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,18 @@ impl<'de> dec::Read<'de> for SliceReader<'de> {
6464
}
6565
}
6666

67+
struct PrefixedCidBytes<'a>(&'a [u8]);
68+
69+
impl<'a> Encode for PrefixedCidBytes<'a> {
70+
fn encode<W: enc::Write>(&self, w: &mut W) -> Result<(), enc::Error<W::Error>> {
71+
// length prefix for bytes: 1 (leading 0) + payload
72+
types::Bytes::bounded(1 + self.0.len(), w)?;
73+
w.push(&[0x00])?;
74+
w.push(self.0)?;
75+
Ok(())
76+
}
77+
}
78+
6779
fn cid_hash_to_pydict<'py>(py: Python<'py>, cid: &Cid) -> PyResult<Bound<'py, PyDict>> {
6880
let hash = cid.hash();
6981
let dict_obj = PyDict::new(py);
@@ -251,14 +263,19 @@ where
251263

252264
let cid = <types::Bytes<&[u8]>>::decode(r)?.0;
253265

254-
if cid.len() <= 1 {
255-
return Err(anyhow!("CID is empty or too short"));
256-
} else if Cid::try_from(&cid[1..]).is_err() {
257-
// Parse the CID for validation. They have a zero byte at the front, strip it off.
266+
// we expect CIDs to have a leading zero byte
267+
if cid.len() <= 1 || cid[0] != 0 {
268+
return Err(anyhow!("Invalid CID"));
269+
}
270+
271+
let cid_without_prefix = &cid[1..];
272+
if Cid::try_from(cid_without_prefix).is_err() {
258273
return Err(anyhow!("Invalid CID"));
259274
}
260275

261-
PyBytes::new(py, cid).into_pyobject(py)?.into()
276+
PyBytes::new(py, cid_without_prefix)
277+
.into_pyobject(py)?
278+
.into()
262279
}
263280
major::SIMPLE => match byte {
264281
// FIXME(MarshalX): should be more clear for bool?
@@ -384,11 +401,13 @@ where
384401
Ok(())
385402
} else if let Ok(b) = obj.cast::<PyBytes>() {
386403
// FIXME (MarshalX): it's not efficient to try to parse it as CID
387-
let cid = Cid::try_from(b.as_bytes());
404+
let bytes = b.as_bytes();
405+
let cid = Cid::try_from(bytes);
388406
if cid.is_ok() {
389-
types::Tag(42, b.as_bytes()).encode(w)?;
407+
// by providing custom encoding we avoid extra allocation
408+
types::Tag(42, PrefixedCidBytes(bytes)).encode(w)?;
390409
} else {
391-
types::Bytes(b.as_bytes()).encode(w)?;
410+
types::Bytes(bytes).encode(w)?;
392411
}
393412

394413
Ok(())

0 commit comments

Comments
 (0)