Skip to content

sl-solution/InMemoryDatasets.jl

 
 

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

InMemoryDatasets

InMemoryDatasets.jl is a Julia package for working with tabular data sets.

The package is developed for Julia 1.6 and later, and only works on 64bit operating systems.

CI

Example

julia> using InMemoryDatasets
julia> g1 = repeat(1:6, inner = 4);
julia> g2 = repeat(1:4, 6);
julia> y = ["d8888b.  ", " .d8b.   ", "d888888b ", "  .d8b.  ", "88  `8D  ", "d8' `8b  ",
            "`~~88~~' ", " d8' `8b ", "88   88  ", "88ooo88  ", "   88    ", " 88ooo88 ",
            "88   88  ", "88~~~88  ", "   88    ", " 88~~~88 ", "88  .8D  ", "88   88  ",
            "   88    ", " 88   88 ", "Y8888D'  ", "YP   YP  ", "   YP    ", " YP   YP "];
julia> ds = Dataset(g1 = g1, g2 = g2, y = y)
24×3 Dataset
 Row │ g1        g2        y         
     │ identity  identity  identity  
     │ Int64?    Int64?    String?   
─────┼───────────────────────────────
   11         1  d8888b.
   21         2   .d8b.
   31         3  d888888b
   41         4    .d8b.
   52         1  88  `8D
   6 │        2         2  d8' `8b
   72         3  `~~88~~'
   8 │        2         4   d8' `8b
   93         1  88   88
  103         2  88ooo88
  113         3     88
  123         4   88ooo88
  134         1  88   88
  144         2  88~~~88
  154         3     88
  164         4   88~~~88
  175         1  88  .8D
  185         2  88   88
  195         3     88
  205         4   88   88
  216         1  Y8888D'
  226         2  YP   YP
  236         3     YP
  246         4   YP   YP

julia> sort(ds, 2)
24×3 Sorted Dataset
 Sorted by: g2
 Row │ g1        g2        y         
     │ identity  identity  identity  
     │ Int64?    Int64?    String?   
─────┼───────────────────────────────
   11         1  d8888b.
   22         1  88  `8D
   3 │        3         1  88   88
   4 │        4         1  88   88
   5 │        5         1  88  .8D
   6 │        6         1  Y8888D'
   7 │        1         2   .d8b.
   8 │        2         2  d8' `8b
   93         2  88ooo88
  104         2  88~~~88
  115         2  88   88
  126         2  YP   YP
  131         3  d888888b
  142         3  `~~88~~'
  15 │        3         3     88
  16 │        4         3     88
  17 │        5         3     88
  18 │        6         3     YP
  19 │        1         4    .d8b.
  20 │        2         4   d8' `8b
  213         4   88ooo88
  224         4   88~~~88
  235         4   88   88
  246         4   YP   YP

julia> tds = transpose(groupby(ds, 1), :y)
6×6 Dataset
 Row │ g1        _variables_  _c1        _c2        _c3        _c4       
     │ identity  identity     identity   identity   identity   identity  
     │ Int64?    String?      String?    String?    String?    String?   
─────┼───────────────────────────────────────────────────────────────────
   11  y            d8888b.     .d8b.     d888888b     .d8b.
   22  y            88  `8D    d8' `8b    `~~88~~'    d8' `8b
   33  y            88   88    88ooo88       88       88ooo88
   44  y            88   88    88~~~88       88       88~~~88
   55  y            88  .8D    88   88       88       88   88
   66  y            Y8888D'    YP   YP       YP       YP   YP

julia> mds = map(tds, x->replace(x, r"[^ ]"=>"#"), r"_c")
6×6 Dataset
 Row │ g1        _variables_  _c1        _c2        _c3        _c4       
     │ identity  identity     identity   identity   identity   identity  
     │ Int64?    String?      String?    String?    String?    String?   
─────┼───────────────────────────────────────────────────────────────────
   11  y            #######     #####     ########     #####
   22  y            ##  ###    ### ###    ########    ### ###
   33  y            ##   ##    #######       ##       #######
   44  y            ##   ##    #######       ##       #######
   55  y            ##  ###    ##   ##       ##       ##   ##
   66  y            #######    ##   ##       ##       ##   ##

julia> byrow(mds, sum, r"_c", by = x->count(isequal('#'),x))
6-element Vector{Int64}:
 25
 25
 20
 20
 15
 17

julia> using Chain

julia> @chain tds begin
           repeat!(2)
           sort!(:g1)
           flatten!(r"_c")
           insertcols!(:g2=>repeat(1:9, 12))
           groupby(:g2)
           transpose(r"_c")
           modify!(r"_c"=>byrow(x->join(reverse(x))))
           select!(r"row")
           insertcols!(1, :g=>repeat(1:4, 9))
           sort!(:g)
       end
36×2 Sorted Dataset
 Sorted by: g
 Row │ g         row_function
     │ identity  identity
     │ Int64?    String?
─────┼────────────────────────
   11  YY88888888dd
   21  888888888888
   31  88        88
   41  88        88
   51  88..    ``88
   61  DD88888888bb
   71  ''DD8888DD..
   81
   91
  102  YY888888dd
  112  PP88888888..
  122      ~~oo''dd
  132      ~~oo  88
  142      ~~oo``bb
  152  YY88888888..
  162  PP888888bb
  172
  182
  193          ``dd
  203          ~~88
  213          ~~88
  223  YY8888888888
  233  PP8888888888
  243          ~~88
  253          ~~88
  263          ''bb
  273
  284
  294  YY888888dd
  304  PP88888888..
  314      ~~oo''dd
  324      ~~oo  88
  334      ~~oo``bb
  344  YY88888888..
  354  PP888888bb
  364