From 38a36298c4b66bd845029d0fa4b41ac716f7a9dd Mon Sep 17 00:00:00 2001 From: ZC <390810839@qq.com> Date: Tue, 7 Jan 2025 19:48:53 +0800 Subject: [PATCH 1/3] support no_std (#556) * support no_std (#544) * Simpler clippy check (no features in safetensors really). --------- Co-authored-by: Nicolas Patry --- .github/workflows/rust.yml | 5 ++++- safetensors/Cargo.toml | 10 +++++++-- safetensors/src/lib.rs | 40 ++++++++++++++++++++++++++++++++- safetensors/src/slice.rs | 10 ++++----- safetensors/src/tensor.rs | 45 ++++++++++++++++++++++---------------- 5 files changed, 82 insertions(+), 28 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 1c816497..e7ac0684 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -36,11 +36,14 @@ jobs: run: cargo build --all-targets --verbose - name: Lint with Clippy - run: cargo clippy --all-targets --all-features -- -D warnings + run: cargo clippy --all-targets -- -D warnings - name: Run Tests run: cargo test --verbose + - name: Run No-STD Tests + run: cargo test --no-default-features --features alloc --verbose + - name: Run Audit # RUSTSEC-2021-0145 is criterion so only within benchmarks run: cargo audit -D warnings --ignore RUSTSEC-2021-0145 diff --git a/safetensors/Cargo.toml b/safetensors/Cargo.toml index d59a6e18..02bcc2f0 100644 --- a/safetensors/Cargo.toml +++ b/safetensors/Cargo.toml @@ -21,14 +21,20 @@ exclude = [ "rust-toolchain", "target/*", "Cargo.lock"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -serde = {version = "1.0", features = ["derive"]} -serde_json = "1.0" +hashbrown = { version = "0.15.2", features = ["serde"], optional = true } +serde = { version = "1.0", default-features = false, features = ["derive"] } +serde_json = { version = "1.0", default-features = false } [dev-dependencies] criterion = "0.5" memmap2 = "0.9" proptest = "1.4" +[features] +default = ["std"] +std = ["serde/default", "serde_json/default"] +alloc = ["serde/alloc", "serde_json/alloc", "hashbrown"] + [[bench]] name = "benchmark" harness = false diff --git a/safetensors/src/lib.rs b/safetensors/src/lib.rs index 48d8d521..4020c9dc 100644 --- a/safetensors/src/lib.rs +++ b/safetensors/src/lib.rs @@ -1,5 +1,43 @@ #![deny(missing_docs)] #![doc = include_str!("../README.md")] +#![cfg_attr(not(feature = "std"), no_std)] pub mod slice; pub mod tensor; -pub use tensor::{serialize, serialize_to_file, Dtype, SafeTensorError, SafeTensors, View}; +/// serialize_to_file only valid in std +#[cfg(feature = "std")] +pub use tensor::serialize_to_file; +pub use tensor::{serialize, Dtype, SafeTensorError, SafeTensors, View}; + +#[cfg(feature = "alloc")] +#[macro_use] +extern crate alloc; + +#[cfg(all(feature = "std", feature = "alloc"))] +compile_error!("must choose either the `std` or `alloc` feature, but not both."); +#[cfg(all(not(feature = "std"), not(feature = "alloc")))] +compile_error!("must choose either the `std` or `alloc` feature"); + +/// A facade around all the types we need from the `std`, `core`, and `alloc` +/// crates. This avoids elaborate import wrangling having to happen in every +/// module. +mod lib { + #[cfg(not(feature = "std"))] + mod no_stds { + pub use alloc::borrow::Cow; + pub use alloc::string::{String, ToString}; + pub use alloc::vec::Vec; + pub use hashbrown::HashMap; + } + #[cfg(feature = "std")] + mod stds { + pub use std::borrow::Cow; + pub use std::collections::HashMap; + pub use std::string::{String, ToString}; + pub use std::vec::Vec; + } + /// choose std or no_std to export by feature flag + #[cfg(not(feature = "std"))] + pub use no_stds::*; + #[cfg(feature = "std")] + pub use stds::*; +} diff --git a/safetensors/src/slice.rs b/safetensors/src/slice.rs index d19b4b59..91087170 100644 --- a/safetensors/src/slice.rs +++ b/safetensors/src/slice.rs @@ -1,7 +1,7 @@ //! Module handling lazy loading via iterating on slices on the original buffer. +use crate::lib::{String, ToString, Vec}; use crate::tensor::TensorView; -use std::fmt; -use std::ops::{ +use core::ops::{ Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, }; @@ -40,8 +40,8 @@ fn display_bound(bound: &Bound) -> String { } /// Intended for Python users mostly or at least for its conventions -impl fmt::Display for TensorIndexer { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for TensorIndexer { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { TensorIndexer::Select(n) => { write!(f, "{n}") @@ -77,7 +77,7 @@ macro_rules! impl_from_range { ($range_type:ty) => { impl From<$range_type> for TensorIndexer { fn from(range: $range_type) -> Self { - use std::ops::Bound::*; + use core::ops::Bound::*; let start = match range.start_bound() { Included(idx) => Included(*idx), diff --git a/safetensors/src/tensor.rs b/safetensors/src/tensor.rs index 596fa367..bee71782 100644 --- a/safetensors/src/tensor.rs +++ b/safetensors/src/tensor.rs @@ -1,11 +1,9 @@ //! Module Containing the most important structures +use crate::lib::{Cow, HashMap, String, ToString, Vec}; use crate::slice::{InvalidSlice, SliceIterator, TensorIndexer}; use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer}; -use std::borrow::Cow; -use std::collections::HashMap; -use std::fs::File; -use std::io::{BufWriter, Write}; -use std::path::Path; +#[cfg(feature = "std")] +use std::io::Write; const MAX_HEADER_SIZE: usize = 100_000_000; @@ -32,6 +30,7 @@ pub enum SafeTensorError { /// The offsets declared for tensor with name `String` in the header are invalid InvalidOffset(String), /// IoError + #[cfg(feature = "std")] IoError(std::io::Error), /// JSON error JsonError(serde_json::Error), @@ -46,6 +45,7 @@ pub enum SafeTensorError { ValidationOverflow, } +#[cfg(feature = "std")] impl From for SafeTensorError { fn from(error: std::io::Error) -> SafeTensorError { SafeTensorError::IoError(error) @@ -58,13 +58,13 @@ impl From for SafeTensorError { } } -impl std::fmt::Display for SafeTensorError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl core::fmt::Display for SafeTensorError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{self:?}") } } -impl std::error::Error for SafeTensorError {} +impl core::error::Error for SafeTensorError {} struct PreparedData { n: u64, @@ -164,7 +164,7 @@ pub trait View { fn data_len(&self) -> usize; } -fn prepare + Ord + std::fmt::Display, V: View, I: IntoIterator>( +fn prepare + Ord + core::fmt::Display, V: View, I: IntoIterator>( data: I, data_info: &Option>, // ) -> Result<(Metadata, Vec<&'hash TensorView<'data>>, usize), SafeTensorError> { @@ -212,7 +212,7 @@ fn prepare + Ord + std::fmt::Display, V: View, I: IntoIterator + Ord + std::fmt::Display, + S: AsRef + Ord + core::fmt::Display, V: View, I: IntoIterator, >( @@ -240,14 +240,15 @@ pub fn serialize< /// Serialize to a regular file the dictionnary of tensors. /// Writing directly to file reduces the need to allocate the whole amount to /// memory. +#[cfg(feature = "std")] pub fn serialize_to_file< - S: AsRef + Ord + std::fmt::Display, + S: AsRef + Ord + core::fmt::Display, V: View, I: IntoIterator, >( data: I, data_info: &Option>, - filename: &Path, + filename: &std::path::Path, ) -> Result<(), SafeTensorError> { let ( PreparedData { @@ -255,7 +256,7 @@ pub fn serialize_to_file< }, tensors, ) = prepare(data, data_info)?; - let mut f = BufWriter::new(File::create(filename)?); + let mut f = std::io::BufWriter::new(std::fs::File::create(filename)?); f.write_all(n.to_le_bytes().as_ref())?; f.write_all(&header_bytes)?; for tensor in tensors { @@ -303,7 +304,7 @@ impl<'data> SafeTensors<'data> { return Err(SafeTensorError::InvalidHeaderLength); } let string = - std::str::from_utf8(&buffer[8..stop]).map_err(|_| SafeTensorError::InvalidHeader)?; + core::str::from_utf8(&buffer[8..stop]).map_err(|_| SafeTensorError::InvalidHeader)?; // Assert the string starts with { // NOTE: Add when we move to 0.4.0 // if !string.starts_with('{') { @@ -719,6 +720,9 @@ mod tests { use super::*; use crate::slice::IndexOp; use proptest::prelude::*; + #[cfg(not(feature = "std"))] + extern crate std; + use std::io::Write; const MAX_DIMENSION: usize = 8; const MAX_SIZE: usize = 8; @@ -1021,10 +1025,13 @@ mod tests { std::fs::remove_file(&filename).unwrap(); // File api - serialize_to_file(&metadata, &None, Path::new(&filename)).unwrap(); - let raw = std::fs::read(&filename).unwrap(); - let _deserialized = SafeTensors::deserialize(&raw).unwrap(); - std::fs::remove_file(&filename).unwrap(); + #[cfg(feature = "std")] + { + serialize_to_file(&metadata, &None, std::path::Path::new(&filename)).unwrap(); + let raw = std::fs::read(&filename).unwrap(); + let _deserialized = SafeTensors::deserialize(&raw).unwrap(); + std::fs::remove_file(&filename).unwrap(); + } } #[test] @@ -1097,7 +1104,7 @@ mod tests { let n = serialized.len(); let filename = "out.safetensors"; - let mut f = BufWriter::new(File::create(filename).unwrap()); + let mut f = std::io::BufWriter::new(std::fs::File::create(filename).unwrap()); f.write_all(n.to_le_bytes().as_ref()).unwrap(); f.write_all(serialized).unwrap(); f.write_all(b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0").unwrap(); From adeda636b52a78c920e06d547d613a9cb22c5d28 Mon Sep 17 00:00:00 2001 From: SunghwanShim <128351792+SunghwanShim@users.noreply.github.com> Date: Thu, 9 Jan 2025 02:17:49 +0900 Subject: [PATCH 2/3] Fix wrong signature of `safe_open.__init__` in stub file (#557) * fix: pyi binding bug * Fixing the stubbing script (breaking change in PyO3). --------- Co-authored-by: Nicolas Patry --- .../python/py_src/safetensors/__init__.pyi | 2 +- bindings/python/stub.py | 26 +++++-------------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/bindings/python/py_src/safetensors/__init__.pyi b/bindings/python/py_src/safetensors/__init__.pyi index 81442bd3..7781229f 100644 --- a/bindings/python/py_src/safetensors/__init__.pyi +++ b/bindings/python/py_src/safetensors/__init__.pyi @@ -69,7 +69,7 @@ class safe_open: The device on which you want the tensors. """ - def __init__(filename, framework, device=...): + def __init__(self, filename, framework, device=...): pass def __enter__(self): """ diff --git a/bindings/python/stub.py b/bindings/python/stub.py index 78ddc47f..71fef2cc 100644 --- a/bindings/python/stub.py +++ b/bindings/python/stub.py @@ -42,10 +42,7 @@ def fn_predicate(obj): return ( obj.__doc__ and obj.__text_signature__ - and ( - not obj.__name__.startswith("_") - or obj.__name__ in {"__enter__", "__exit__"} - ) + and (not obj.__name__.startswith("_") or obj.__name__ in {"__enter__", "__exit__"}) ) if inspect.isgetsetdescriptor(obj): return obj.__doc__ and not obj.__name__.startswith("_") @@ -81,15 +78,14 @@ def pyi_file(obj, indent=""): body = "" if obj.__doc__: - body += ( - f'{indent}"""\n{indent}{do_indent(obj.__doc__, indent)}\n{indent}"""\n' - ) + body += f'{indent}"""\n{indent}{do_indent(obj.__doc__, indent)}\n{indent}"""\n' fns = inspect.getmembers(obj, fn_predicate) # Init if obj.__text_signature__: - body += f"{indent}def __init__{obj.__text_signature__}:\n" + signature = obj.__text_signature__.replace("(", "(self, ") + body += f"{indent}def __init__{signature}:\n" body += f"{indent+INDENT}pass\n" body += "\n" @@ -146,11 +142,7 @@ def do_black(content, is_pyi): def write(module, directory, origin, check=False): - submodules = [ - (name, member) - for name, member in inspect.getmembers(module) - if inspect.ismodule(member) - ] + submodules = [(name, member) for name, member in inspect.getmembers(module) if inspect.ismodule(member)] filename = os.path.join(directory, "__init__.pyi") pyi_content = pyi_file(module) @@ -159,9 +151,7 @@ def write(module, directory, origin, check=False): if check: with open(filename, "r") as f: data = f.read() - assert ( - data == pyi_content - ), f"The content of {filename} seems outdated, please run `python stub.py`" + assert data == pyi_content, f"The content of {filename} seems outdated, please run `python stub.py`" else: with open(filename, "w") as f: f.write(pyi_content) @@ -184,9 +174,7 @@ def write(module, directory, origin, check=False): if check: with open(filename, "r") as f: data = f.read() - assert ( - data == py_content - ), f"The content of {filename} seems outdated, please run `python stub.py`" + assert data == py_content, f"The content of {filename} seems outdated, please run `python stub.py`" else: with open(filename, "w") as f: f.write(py_content) From f05a1ec483c470202a3413772eef8eab6c3a8ba0 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 8 Jan 2025 18:37:23 +0100 Subject: [PATCH 3/3] Patch release because python bindings. --- bindings/python/Cargo.lock | 241 +++++++++++++++++++++++++++++++++++++ bindings/python/Cargo.toml | 2 +- safetensors/Cargo.toml | 2 +- 3 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 bindings/python/Cargo.lock diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock new file mode 100644 index 00000000..9c63f23a --- /dev/null +++ b/bindings/python/Cargo.lock @@ -0,0 +1,241 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "itoa" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "libc" +version = "0.2.169" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memmap2" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +dependencies = [ + "libc", +] + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "portable-atomic" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e484fd2c8b4cb67ab05a318f1fd6fa8f199fcc30819f08f07d200809dba26c15" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0e0469a84f208e20044b98965e1561028180219e35352a2afaf2b942beff3b" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb1547a7f9966f6f1a0f0227564a9945fe36b90da5a93b3933fc3dc03fae372d" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb6da8ec6fa5cedd1626c886fc8749bdcbb09424a86461eb8cdf096b7c33257" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38a385202ff5a92791168b1136afae5059d3ac118457bb7bc304c197c2d33e7d" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "safetensors" +version = "0.5.2" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "safetensors-python" +version = "0.5.2" +dependencies = [ + "memmap2", + "pyo3", + "safetensors", + "serde_json", +] + +[[package]] +name = "serde" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.135" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index c963fd64..9fd36f82 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "safetensors-python" -version = "0.5.1-dev.0" +version = "0.5.2" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/safetensors/Cargo.toml b/safetensors/Cargo.toml index 02bcc2f0..85cc1191 100644 --- a/safetensors/Cargo.toml +++ b/safetensors/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "safetensors" -version = "0.5.1-dev.0" +version = "0.5.2" edition = "2021" homepage = "https://github.com/huggingface/safetensors" repository = "https://github.com/huggingface/safetensors"