sort and friends

Sorting algorithms with similar interface and default settings as the Julia Base ones, on GPUs:

  • sort! (in-place), sort (out-of-place)
  • sortperm!, sortperm
  • Other names: sort, sort_team, sort_team_by_key, stable_sort or variations in Kokkos, RAJA, Thrust that I know of.

Function signatures:

AcceleratedKernels.sort!Function
sort!(
    v::AbstractArray, backend::Backend=get_backend(v);

    lt=isless,
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp::Union{Nothing, AbstractArray}=nothing,
)
source
AcceleratedKernels.sortFunction
sort(
    v::AbstractArray, backend::Backend=get_backend(v);

    lt=isless,
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp::Union{Nothing, AbstractArray}=nothing,
)
source
AcceleratedKernels.sortperm!Function
sortperm!(
    ix::AbstractArray,
    v::AbstractArray,
    backend::Backend=get_backend(v);

    lt=isless,
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp::Union{Nothing, AbstractArray}=nothing,
)
source
AcceleratedKernels.sortpermFunction
sortperm(
    v::AbstractArray,
    backend::Backend=get_backend(v);

    lt=isless,
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp::Union{Nothing, AbstractArray}=nothing,
)
source

Specific implementations that the interfaces above forward to:

  • merge_sort! (in-place), merge_sort (out-of-place) - sort arbitrary objects with custom comparisons.
  • merge_sort_by_key!, merge_sort_by_key - sort a vector of keys along with a "payload", a vector of corresponding values.
  • merge_sortperm!, merge_sortperm, merge_sortperm_lowmem!, merge_sortperm_lowmem - compute a sorting index permutation.

Function signatures:

AcceleratedKernels.merge_sort!Function
merge_sort!(
    v::AbstractArray, backend::Backend=get_backend(v);

    lt=isless,
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp::Union{Nothing, AbstractArray}=nothing,
)
source
AcceleratedKernels.merge_sortFunction
merge_sort(
    v::AbstractVector, backend::Backend=get_backend(v);

    lt=isless,
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp::Union{Nothing, AbstractGPUVector}=nothing,
)
source
AcceleratedKernels.merge_sort_by_key!Function
merge_sort_by_key!(
    keys::AbstractArray,
    values::AbstractArray,
    backend::Backend=get_backend(keys);

    lt=isless,
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp_keys::Union{Nothing, AbstractArray}=nothing,
    temp_values::Union{Nothing, AbstractArray}=nothing,
)
source
AcceleratedKernels.merge_sort_by_keyFunction
merge_sort_by_key(
    keys::AbstractGPUVector,
    values::AbstractGPUVector,
    backend::Backend=get_backend(keys);

    lt=isless,
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp_keys::Union{Nothing, AbstractGPUVector}=nothing,
    temp_values::Union{Nothing, AbstractGPUVector}=nothing,
)
source
AcceleratedKernels.merge_sortperm!Function
merge_sortperm!(
    ix::AbstractArray,
    v::AbstractArray,
    backend::Backend=get_backend(v);

    lt=(<),
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    inplace::Bool=false,
    block_size::Int=256,
    temp_ix::Union{Nothing, AbstractArray}=nothing,
    temp_v::Union{Nothing, AbstractArray}=nothing,
)
source
AcceleratedKernels.merge_sortpermFunction
merge_sortperm(
    v::AbstractArray, backend::Backend=get_backend(v);

    lt=(<),
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    inplace::Bool=false,
    block_size::Int=256,
    temp_ix::Union{Nothing, AbstractArray}=nothing,
    temp_v::Union{Nothing, AbstractArray}=nothing,
)
source
AcceleratedKernels.merge_sortperm_lowmem!Function
merge_sortperm_lowmem!(
    ix::AbstractArray,
    v::AbstractArray,
    backend::Backend=get_backend(v);

    lt=(<),
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp::Union{Nothing, AbstractArray}=nothing,
)
source
AcceleratedKernels.merge_sortperm_lowmemFunction
merge_sortperm_lowmem(
    v::AbstractArray, backend::Backend=get_backend(v);

    lt=(<),
    by=identity,
    rev::Bool=false,
    order::Base.Order.Ordering=Base.Order.Forward,

    block_size::Int=256,
    temp::Union{Nothing, AbstractArray}=nothing,
)
source

Example:

import AcceleratedKernels as AK
using AMDGPU

v = ROCArray(rand(Int32, 100_000))
AK.sort!(v)

As GPU memory is more expensive, all functions in AcceleratedKernels.jl expose any temporary arrays they will use (the temp argument); you can supply your own buffers to make the algorithms not allocate additional GPU storage, e.g.:

v = ROCArray(rand(Float32, 100_000))
temp = similar(v)
AK.sort!(v, temp=temp)