pystencils

5. pystencils#

import numpy as np
import pystencils as ps
import sympy as sp
from pystencils import create_kernel
from pystencils import plot as plt
sp.init_printing()
input_arr = np.random.rand(1024, 1024)
output_arr = np.zeros_like(input_arr)
def numpy_kernel():
    output_arr[1:-1, 1:-1] = (
        input_arr[2:, 1:-1]
        + input_arr[:-2, 1:-1]
        + input_arr[1:-1, 2:]
        + input_arr[1:-1, :-2]
    )
%%timeit
numpy_kernel()
1.77 ms ± 12.6 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
src, dst = ps.fields(src=input_arr, dst=output_arr)

symbolic_description = ps.Assignment(
    dst[0, 0], (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / 4
)
symbolic_description
_images/6f3b5823225d94c0a1e71a1d5f0410480b81bdd4c889f75091a1f120fa4c646b.png
plt.figure(figsize=(3, 3))
ps.stencil.plot_expression(symbolic_description.rhs)
_images/bd41052d2ff1456db680f42450b406e46c7879c81ba50482e154f2c34c3dc0c4.png
kernel = ps.create_kernel(symbolic_description).compile()
def pystencils_kernel():
    kernel(src=input_arr, dst=output_arr)
%%timeit
pystencils_kernel()
408 μs ± 188 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
my_field = ps.fields("f(3) : double[2D]")
field_access = my_field[1, 0](1)
field_access
_images/c7344de8ac7d4b661b0ba27dd03c8040983077081f954c5669b4482e1ed7bfaf.png
isinstance(field_access, sp.Symbol)
True
img_field = ps.fields("img(4): [2D]")
w1, w2 = sp.symbols("w_1 w_2")
color = 2
sobel_x = (
    -w2 * img_field[-1, 0](color)
    - w1 * img_field[-1, -1](color)
    - w1 * img_field[-1, +1](color)
    + w2 * img_field[+1, 0](color)
    + w1 * img_field[+1, -1](color)
    - w1 * img_field[+1, +1](color)
) ** 2
sobel_x
_images/0578d6cca7fa53c16ae676c96bd0b9e400a27e81b495582117239edff7271752.png
sobel_x = sobel_x.subs(w1, 0.5)
sobel_x
_images/52fdba39ab3aa62aa4a8d5a7f5975d5f0f631965d1dbf4598d234f1d29baa0a1.png
dst_field = ps.fields("dst: [2D]")
update_rule = ps.Assignment(dst_field[0, 0], sobel_x)
update_rule
_images/1423ff538474806767a3d8599a73e3d7dd1670cfc8df813870f39360e4bc63e5.png
ast = create_kernel(update_rule, cpu_openmp=False)
compiled_kernel = ast.compile()
src_arr = np.zeros([20, 30])
dst_arr = np.zeros_like(src_arr)

dst, src = ps.fields(dst=dst_arr, src=src_arr)
grad_x, grad_y = sp.symbols("grad_x, grad_y")

symbolic_description = [
    ps.Assignment(grad_x, (src[1, 0] - src[-1, 0]) / 2),
    ps.Assignment(grad_y, (src[0, 1] - src[0, -1]) / 2),
    ps.Assignment(dst[0, 0], grad_x + grad_y),
]
kernel = ps.create_kernel(symbolic_description)
symbolic_description
_images/1e68e4894c1e08163b0861084bc1960590980a82cbb5b646337fdd0de5fb0d76.png
@ps.kernel
def symbolic_description_using_function():
    grad_x @= (src[1, 0] - src[-1, 0]) / 2
    grad_y @= (src[0, 1] - src[0, -1]) / 2
    dst[0, 0] @= grad_x + grad_y


symbolic_description_using_function
_images/1e68e4894c1e08163b0861084bc1960590980a82cbb5b646337fdd0de5fb0d76.png
sp.Piecewise((1.0, src[0, 1] > 0), (0.0, True)) + src[1, 0]
_images/5702a6986c896b610bd377d6f74f698fe962f8bac08211362e4665f0279a1a06.png
@ps.kernel
def kernel_with_piecewise():
    grad_x @= (src[1, 0] - src[-1, 0]) / 2 if src[-1, 0] > 0 else 0.0


kernel_with_piecewise
_images/b8103874bf7db58dbe3d6db97ba2236883f81c85ae513ea71b5bfa36e90f7823.png
@ps.kernel
def somewhat_longer_dummy_kernel(s):
    s.a @= src[0, 1] + src[-1, 0]
    s.b @= 2 * src[1, 0] + src[0, -1]
    s.c @= src[0, 1] + 2 * src[1, 0] + src[-1, 0] + src[0, -1] - src[0, 0]
    dst[0, 0] @= s.a + s.b + s.c


ac = ps.AssignmentCollection(
    main_assignments=somewhat_longer_dummy_kernel[-1:],
    subexpressions=somewhat_longer_dummy_kernel[:-1],
)
ac
Subexpressions:
$$a \leftarrow_{} {src}_{(0,1)} + {src}_{(-1,0)}$$
$$b \leftarrow_{} 2 {src}_{(1,0)} + {src}_{(0,-1)}$$
$$c \leftarrow_{} - {src}_{(0,0)} + 2 {src}_{(1,0)} + {src}_{(0,1)} + {src}_{(0,-1)} + {src}_{(-1,0)}$$
Main Assignments:
$${dst}_{(0,0)} \leftarrow_{} a + b + c$$
ac.operation_count
{'adds': 8,
 'muls': 2,
 'divs': 0,
 'sqrts': 0,
 'fast_sqrts': 0,
 'fast_inv_sqrts': 0,
 'fast_div': 0}
opt_ac = ps.simp.subexpression_substitution_in_existing_subexpressions(ac)
opt_ac
Subexpressions:
$$a \leftarrow_{} {src}_{(0,1)} + {src}_{(-1,0)}$$
$$b \leftarrow_{} 2 {src}_{(1,0)} + {src}_{(0,-1)}$$
$$c \leftarrow_{} - {src}_{(0,0)} + a + b$$
Main Assignments:
$${dst}_{(0,0)} \leftarrow_{} a + b + c$$
opt_ac.operation_count
{'adds': 6,
 'muls': 1,
 'divs': 0,
 'sqrts': 0,
 'fast_sqrts': 0,
 'fast_inv_sqrts': 0,
 'fast_div': 0}
kernel = ps.create_kernel(ps.Assignment(dst[0, 0], src[2, 0] + src[-1, 0]))
ps.show_code(kernel)
FUNC_PREFIX void kernel(double * RESTRICT  _data_dst, double * RESTRICT const _data_src)
{
   for (int64_t ctr_0 = 2; ctr_0 < 18; ctr_0 += 1)
   {
      for (int64_t ctr_1 = 2; ctr_1 < 28; ctr_1 += 1)
      {
         _data_dst[30*ctr_0 + ctr_1] = _data_src[30*ctr_0 + ctr_1 + 60] + _data_src[30*ctr_0 + ctr_1 - 30];
      }
   }
}
gl_spec = [
    (0, 2),  # 0 ghost layers at the left, 2 at the right border
    (1, 0),
]  # 1 ghost layer at the lower y, one at the upper y coordinate
kernel = ps.create_kernel(
    ps.Assignment(dst[0, 0], src[2, 0] + src[-1, 0]), ghost_layers=gl_spec
)
ps.show_code(kernel)
FUNC_PREFIX void kernel(double * RESTRICT  _data_dst, double * RESTRICT const _data_src)
{
   for (int64_t ctr_0 = 0; ctr_0 < 18; ctr_0 += 1)
   {
      for (int64_t ctr_1 = 1; ctr_1 < 30; ctr_1 += 1)
      {
         _data_dst[30*ctr_0 + ctr_1] = _data_src[30*ctr_0 + ctr_1 + 60] + _data_src[30*ctr_0 + ctr_1 - 30];
      }
   }
}
invalid_description = [
    ps.Assignment(dst[1, 0], src[1, 0] + src[-1, 0]),
    ps.Assignment(dst[0, 0], src[1, 0] - src[-1, 0]),
]
try:
    invalid_kernel = ps.create_kernel(invalid_description)
    assert False, "Should never be executed"
except ValueError as e:
    print(e)
Field dst is written at two different locations
valid_kernel = ps.create_kernel(ps.Assignment(src[0, 0], 2 * src[0, 0] + 42))
v = ps.fields("v(2): double[2D]")
valid_kernel = ps.create_kernel(
    [
        ps.Assignment(v[0, 0](1), 2 * v[0, 0](1) + 42),
        ps.Assignment(v[0, 1](0), 2 * v[1, 0](0) + 42),
    ]
)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[38], line 2
      1 v = ps.fields("v(2): double[2D]")
----> 2 valid_kernel = ps.create_kernel(
      3     [
      4         ps.Assignment(v[0, 0](1), 2 * v[0, 0](1) + 42),
      5         ps.Assignment(v[0, 1](0), 2 * v[1, 0](0) + 42),
      6     ]
      7 )

File /usr/lib/python3.12/site-packages/pystencils/kernelcreation.py:87, in create_kernel(assignments, config, **kwargs)
     85     return create_indexed_kernel(assignments, config=config)
     86 else:
---> 87     return create_domain_kernel(assignments, config=config)

File /usr/lib/python3.12/site-packages/pystencils/kernelcreation.py:132, in create_domain_kernel(assignments, config)
    127 # FUTURE WORK from here we shouldn't NEED sympy
    128 # --- check constrains
    129 check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check,
    130                                check_double_write_condition=not config.allow_double_writes)
--> 132 check.visit(assignments)
    134 assignments.bound_fields = check.fields_written
    135 assignments.rhs_fields = check.fields_read

File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:49, in KernelConstraintsCheck.visit(self, obj)
     47 def visit(self, obj):
     48     if isinstance(obj, (AssignmentCollection, NodeCollection)):
---> 49         [self.visit(e) for e in obj.all_assignments]
     50     elif isinstance(obj, list) or isinstance(obj, tuple):
     51         [self.visit(e) for e in obj]

File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:53, in KernelConstraintsCheck.visit(self, obj)
     51     [self.visit(e) for e in obj]
     52 elif isinstance(obj, (sp.Eq, ast.SympyAssignment, Assignment)):
---> 53     self.process_assignment(obj)
     54 elif isinstance(obj, ast.Conditional):
     55     self.scopes.push()

File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:81, in KernelConstraintsCheck.process_assignment(self, assignment)
     78 def process_assignment(self, assignment: Union[sp.Eq, ast.SympyAssignment, Assignment]):
     79     # for checks it is crucial to process rhs before lhs to catch e.g. a = a + 1
     80     self.process_expression(assignment.rhs)
---> 81     self.process_lhs(assignment.lhs)

File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:102, in KernelConstraintsCheck.process_lhs(self, lhs)
    100 def process_lhs(self, lhs: Union[Field.Access, TypedSymbol, sp.Symbol]):
    101     assert isinstance(lhs, sp.Symbol)
--> 102     self.update_accesses_lhs(lhs)

File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:120, in KernelConstraintsCheck.update_accesses_lhs(self, lhs)
    118         if len(reads) > 1 or lhs.offsets != reads[0]:
    119             if self.check_independence_condition:
--> 120                 raise ValueError(f"Field {lhs.field.name} is written at different location than it was read. "
    121                                  f"This means the resulting kernel would not be thread safe")
    122 elif isinstance(lhs, sp.Symbol):
    123     if self.scopes.is_defined_locally(lhs):

ValueError: Field v is written at different location than it was read. This means the resulting kernel would not be thread safe
@ps.kernel
def not_allowed():
    a, b = sp.symbols("a b")
    a @= src[0, 0]
    b @= a + 3
    a @= src[-1, 0]
    dst[0, 0] @= a + b


try:
    ps.create_kernel(not_allowed)
    assert False
except ValueError as e:
    print(e)
Assignments not in SSA form, multiple assignments to a
@ps.kernel
def not_allowed():
    dst[0, 0] @= src[0, 1] + src[1, 0]
    dst[0, 0] @= 2 * dst[0, 0]


try:
    ps.create_kernel(not_allowed)
    assert False
except ValueError as e:
    print(e)
Field dst is written twice at the same location
tmp_var = sp.Symbol("a")


@ps.kernel
def allowed():
    tmp_var @= src[0, 1] + src[1, 0]
    dst[0, 0] @= 2 * tmp_var


ast = ps.create_kernel(allowed)
ps.show_code(ast)
FUNC_PREFIX void kernel(double * RESTRICT  _data_dst, double * RESTRICT const _data_src)
{
   for (int64_t ctr_0 = 1; ctr_0 < 19; ctr_0 += 1)
   {
      for (int64_t ctr_1 = 1; ctr_1 < 29; ctr_1 += 1)
      {
         const double a = _data_src[30*ctr_0 + ctr_1 + 1] + _data_src[30*ctr_0 + ctr_1 + 30];
         _data_dst[30*ctr_0 + ctr_1] = a*2.0;
      }
   }
}