PDE

Some PDE algorithms visualized and ported for the GPU.

PDE 1

using CLArrays, GLVisualize, GeometryTypes, GLAbstraction, StaticArrays

TY = Float32
N = 1024
const h    = TY(2*π/N)
const epsn = TY(h * .5)
const C    = TY(2/epsn)
const tau  = TY(epsn * h)
Tfinal = 50.

S(x,y) = exp(-x^2/0.1f0)*exp(-y^2/0.1f0)

ArrayType = CLArray
# real-space and reciprocal-space grids
# the real-space grid is just used for plotting!
X_cpu = convert.(TY, collect(linspace(-pi+h, pi, N)) .* ones(1,N))
X = ArrayType(X_cpu);
k = collect([0:N/2; -N/2+1:-1]);
Â = ArrayType(convert.(TY,kron(k.^2, ones(1,N)) + kron(ones(N), k'.^2)));

# initial condition
uc = ArrayType(TY(2.0)*(rand(TY, N, N)-TY(0.5)))

#################################################################
#################################################################

pow3(u) = complex((u * u * u) - u)
function take_step!(u, Â, t_plot, fftplan!, ifftplan!, u3fft, uc, tmp)
    u3fft .= pow3.(u)
    fftplan! * u3fft
    uc .= complex.(u)
    fftplan! * uc
    @. tmp .= ((1f0+C*tau*Â) .* uc .- tau/epsn * (Â .* u3fft)) ./ (1f0+(epsn*tau)*Â.^2f0+C*tau*Â)
    ifftplan! * tmp
    u .= real.(tmp)
    nothing
end
function normalise_af!(u, out)
    out .= u .- minimum(u)
    out .= out ./ maximum(out)
    nothing
end
#################################################################
#################################################################

n = 1
T_plot = 0.01; t_plot = 0.0
ceil(Tfinal / tau)
up = copy(uc)
ucc = complex.(uc)
fftplan! = plan_fft!(ucc)
ifftplan! = plan_ifft!(ucc)
u3fft = similar(ucc)
tmp = similar(ucc)

w = glscreen(resolution = (N, N))
normalise_af!(uc,up)
up .= up .* 0.1f0
robj = visualize(
    reinterpret(Intensity{Float32}, Array(up)),
    stroke_width = 0f0,
    levels = 20f0,
    color_map = Colors.colormap("RdBu", 100),
    color_norm = Vec2f0(0, 0.1)
).children[]
_view(robj, position = Vec3f0(0, 0.5, 2.5), lookat = Vec3f0(0))
io, buffer = GLVisualize.create_video_stream(homedir()*"/Desktop/pd1.mkv", w)
center!(w, :orthographic_pixel)
idx = 0
while isopen(w)
    take_step!(uc, Â, t_plot, fftplan!, ifftplan!, u3fft, ucc, tmp)
    t_plot += tau
    normalise_af!(uc, up)
    up .= up .* 0.1f0
    GLAbstraction.update!(robj[:intensity], reinterpret(SVector{1, Float32}, Array(up)))
    GLWindow.poll_glfw()
    GLWindow.reactive_run_till_now()
    GLWindow.render_frame(w)
    GLWindow.swapbuffers(w)
    if idx == 4
        GLVisualize.add_frame!(io, w, buffer)
        idx = 0
    end
    idx += 1
end
close(io)
GLWindow.destroy!(w)

PDE 2

Show case ported from:

Kuramoto-Sivashinksy-benchmark

using CLArrays, GLVisualize, GPUArrays, GLAbstraction, GeometryTypes

# source: https://github.com/johnfgibson/julia-pde-benchmark/blob/master/1-Kuramoto-Sivashinksy-benchmark.ipynb
function inner_ks(n, IFFT!, FFT!, Nt, Nn, Nn1, u, G, A_inv, B, dt2, dt32, uslice, U)
    Nn1 .= Nn       # shift nonlinear term in time
    Nn .= u         # put u into Nn in prep for comp of nonlinear term

    IFFT! * Nn

    # plotting
    uslice .= real.(Nn) ./ 10f0
    U[1:Nt, n] = reshape(Array(uslice), (Nt, 1)) # copy from gpu to opengl gpu not implemented for now

    # transform Nn to gridpt values, in place
    Nn .= Nn .* Nn   # collocation calculation of u^2
    FFT!*Nn        # transform Nn back to spectral coeffs, in place

    Nn .= G .* Nn    # compute Nn == -1/2 d/dx (u^2) = -u u_x

    # loop fusion! Julia translates the folling line of code to a single for loop.
    u .= A_inv .* (B .* u .+ dt32 .* Nn .- dt2 .* Nn1)
end

T = Float32; AT = CLArray
N = 1000
Lx = T(64*pi)
Nx = T(N)
dt = T(1/16)

x = Lx*(0:Nx-1)/Nx
u = T.(cos.(x) + 0.1*sin.(x/8) + 0.01*cos.((2*pi/Lx)*x))

u = AT((T(1)+T(0)im)*u)             # force u to be complex
Nx = length(u)                      # number of gridpoints
kx = T.(vcat(0:Nx/2-1, 0:0, -Nx/2+1:-1))# integer wavenumbers: exp(2*pi*kx*x/L)
alpha = T(2)*pi*kx/Lx                  # real wavenumbers:    exp(alpha*x)

D = T(1)im*alpha                       # spectral D = d/dx operator

L = alpha.^2 .- alpha.^4            # spectral L = -D^2 - D^4 operator

G = AT(T(-0.5) .* D)               # spectral -1/2 D operator, to eval -u u_x = 1/2 d/dx u^2

# convenience variables
dt2  = T(dt/2)
dt32 = T(3*dt/2)
A_inv = AT((ones(T, Nx) - dt2*L).^(-1))
B = AT(ones(T, Nx) + dt2*L)

# compute in-place FFTW plans
FFT! = plan_fft!(u)
IFFT! = plan_ifft!(u)

# compute nonlinear term Nn == -u u_x
powed = u .* u
Nn = G .* fft(powed);    # Nn == -1/2 d/dx (u^2) = -u u_x
Nn1 = copy(Nn);        # Nn1 = Nn at first time step
FFT! * u;

uslice = real(u)
U = zeros(Float32, N, N)

w = glscreen(resolution = (1920, 1080))

robj = visualize(
    U, :surface, color_norm = Vec2f0(-0.1, 0.1),
    ranges = ((-3f0, 3f0), (-3f0, 3f0))
).children[]
Ugpu = robj[:position_z]

# setup camera and view object
_view(robj, position = Vec3f0(-4.33, 3.8, -2.7), lookat = Vec3f0(-0.23, 0.5, 0.7))
cam = w.cameras[:perspective]
push!(cam.up, Vec3f0(0.3, -0.33, -0.9))

io, buffer = GLVisualize.create_video_stream(homedir()*"/Desktop/pd2.mkv", w)
for n in 1:N
    isopen(w) || break
    inner_ks(n, (IFFT!), (FFT!), N, Nn, Nn1, u, G, A_inv, B, dt2, dt32, uslice, Ugpu)
    GLWindow.poll_glfw()
    GLWindow.reactive_run_till_now()
    GLWindow.render_frame(w)
    GLWindow.swapbuffers(w)
    (n % 4 == 0) && GLVisualize.add_frame!(io, w, buffer)
end
close(io)
GLWindow.destroy!(w)