From 978b55d1e391267c9e9b7b02766e2938e7d9321e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 26 Mar 2022 17:56:29 +0100 Subject: [PATCH 1/3] Add examples in README.md (#79) --- README.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5c88703..df6e2c8 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,67 @@ A pooled representation of arrays for purposes of compression when there are few unique elements. - **Installation**: at the Julia REPL, `import Pkg; Pkg.add("PooledArrays")` -**Maintenance**: PooledArrays is maintained collectively by the [JuliaData collaborators](https://github.com/orgs/JuliaData/people). -Responsiveness to pull requests and issues can vary, depending on the availability of key collaborators. +**Usage**: + +Working with `PooledArray` objects does not differ from working with general +`AbstractArray` objects, with two exceptions: +* If you hold mutable objects in `PooledArray` it is not allowed to modify them + after they are stored in it. +* In multi-threaded context it is not safe to assign values that are not already + present in a `PooledArray`'s pool from one thread while either reading or + writing to the same array from another thread. + +Keeping in mind these two restrictions, as a user, the only thing you need to +learn is how to create `PooledArray` objects. This is accomplished by passing +an `AbstractArray` to the `PooledArray` constructor: + +``` +julia> using PooledArrays + +julia> PooledArray(["a" "b"; "c" "d"]) +2×2 PooledMatrix{String, UInt32, Matrix{UInt32}}: + "a" "b" + "c" "d" + ``` + +`PooledArray` performs compression by storing an array of reference integers and +a mapping from integers to its elements in a dictionary. In this way, if the +size of the reference integer is smaller than the size of the actual elements +the resulting `PooledArray` has a smaller memory footprint than the equivalent +`Array`. By default `UInt32` is used as a type of reference integers. However, +you can specify the reference integer type you want to use by passing it as a +second argument to the constructor. This is usually done when you know that you +will have only a few unique elements in the `PooledArray`. + +``` +julia> PooledArray(["a", "b", "c", "d"], UInt8) +4-element PooledVector{String, UInt8, Vector{UInt8}}: + "a" + "b" + "c" + "d" + ``` + +Alternatively you can pass the `compress` and `signed` keyword arguments to the +`PooledArray` constructor to automatically select the reference integer type. +When you pass `compress=true` then the reference integer type is chosen to be +the smallest type that is large enough to hold all unique values in array. When +you pass `signed=true` the reference type is signed (by default it is unsigned). +``` +julia> PooledArray(["a", "b", "c", "d"]; compress=true, signed=true) +4-element PooledVector{String, Int8, Vector{Int8}}: + "a" + "b" + "c" + "d" +``` + +**Maintenance**: PooledArrays is maintained collectively by the +[JuliaData collaborators](https://github.com/orgs/JuliaData/people). +Responsiveness to pull requests and issues can vary, +depending on the availability of key collaborators. ## Related Packages From d84c4be3e5f581122a781fd0d082960933cd5aa1 Mon Sep 17 00:00:00 2001 From: sl-solution <79064058+sl-solution@users.noreply.github.com> Date: Wed, 20 Apr 2022 00:40:42 +1200 Subject: [PATCH 2/3] fix bug in `pop!` and `popfirst!` (#77) --- src/PooledArrays.jl | 4 ++-- test/runtests.jl | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/PooledArrays.jl b/src/PooledArrays.jl index 2f4d0fe..a840034 100644 --- a/src/PooledArrays.jl +++ b/src/PooledArrays.jl @@ -613,14 +613,14 @@ function Base.append!(pv::PooledVector, items::AbstractArray) return pv end -Base.pop!(pv::PooledVector) = pv.invpool[pop!(pv.refs)] +Base.pop!(pv::PooledVector) = pv.pool[pop!(pv.refs)] function Base.pushfirst!(pv::PooledVector{S,R}, v::T) where {S,R,T} pushfirst!(pv.refs, getpoolidx(pv, v)) return pv end -Base.popfirst!(pv::PooledVector) = pv.invpool[popfirst!(pv.refs)] +Base.popfirst!(pv::PooledVector) = pv.pool[popfirst!(pv.refs)] Base.empty!(pv::PooledVector) = (empty!(pv.refs); pv) diff --git a/test/runtests.jl b/test/runtests.jl index 5b36073..04973b6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -588,3 +588,16 @@ end @test_throws BoundsError insert!(x, 9, true) @test x == [1, 1, 10, 99, 2, 3, 1] end + +@testset "pop! and popfirst!" begin + x = PooledArray([1, 2, 3]) + @test pop!(x) == 3 + @test x == [1, 2] + @test popfirst!(x) == 1 + @test x == [2] + x = PooledArray(["1", "2", "3"]) + @test pop!(x) == "3" + @test x == ["1", "2"] + @test popfirst!(x) == "1" + @test x == ["2"] +end From 65e3316b7d6db5f4420fec3f412d0a1a84eb77f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sat, 30 Apr 2022 08:20:31 +0200 Subject: [PATCH 3/3] Fix error in constructor (#82) --- Project.toml | 5 +++-- src/PooledArrays.jl | 29 +++++++++++++++++------------ test/runtests.jl | 24 +++++++++++++++++++++++- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/Project.toml b/Project.toml index d4108dd..0b90562 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "PooledArrays" uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" -version = "1.4.1" +version = "1.4.2" [deps] DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" @@ -12,6 +12,7 @@ julia = "1" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" [targets] -test = ["Test"] +test = ["OffsetArrays", "Test"] diff --git a/src/PooledArrays.jl b/src/PooledArrays.jl index a840034..7daf2b9 100644 --- a/src/PooledArrays.jl +++ b/src/PooledArrays.jl @@ -114,20 +114,25 @@ function _label(xs::AbstractArray, ) where {T, I<:Integer} @inbounds for i in start:length(xs) - x = xs[i] - lbl = get(invpool, x, zero(I)) - if lbl !== zero(I) - labels[i] = lbl + idx = i + firstindex(xs) - 1 + if !isassigned(xs, idx) + labels[i] = zero(I) else - if nlabels == typemax(I) - I2 = _widen(I) - return _label(xs, T, I2, i, convert(Vector{I2}, labels), - convert(Dict{T, I2}, invpool), pool, nlabels) + x = xs[idx] + lbl = get(invpool, x, zero(I)) + if lbl !== zero(I) + labels[i] = lbl + else + if nlabels == typemax(I) + I2 = _widen(I) + return _label(xs, T, I2, i, convert(Vector{I2}, labels), + convert(Dict{T, I2}, invpool), pool, nlabels) + end + nlabels += 1 + labels[i] = nlabels + invpool[x] = nlabels + push!(pool, x) end - nlabels += 1 - labels[i] = nlabels - invpool[x] = nlabels - push!(pool, x) end end labels, invpool, pool diff --git a/test/runtests.jl b/test/runtests.jl index 04973b6..105a708 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using Test +using Test, OffsetArrays using PooledArrays using DataAPI: refarray, refvalue, refpool, invrefpool using PooledArrays: refcount @@ -601,3 +601,25 @@ end @test popfirst!(x) == "1" @test x == ["2"] end + +@testset "constructor corner cases" begin + x = Vector{Any}(undef, 3) + y = PooledArray(x) + @test y isa PooledArray{Any} + @test !any(i -> isassigned(y, i), eachindex(y)) + @test all(iszero, y.refs) + @test isempty(y.pool) + @test isempty(y.invpool) + + x[2] = "a" + for v in (x, OffsetVector(x, -5)) + y = PooledArray(v) + @test y isa PooledArray{Any} + @test !isassigned(x, 1) + @test x[2] == "a" + @test !isassigned(x, 3) + @test y.refs == [0, 1, 0] + @test y.pool == ["a"] + @test y.invpool == Dict("a" => 1) + end +end