5. pystencils#
import numpy as np
import pystencils as ps
import sympy as sp
from pystencils import create_kernel
from pystencils import plot as plt
sp.init_printing()
input_arr = np.random.rand(1024, 1024)
output_arr = np.zeros_like(input_arr)
def numpy_kernel():
    output_arr[1:-1, 1:-1] = (
        input_arr[2:, 1:-1]
        + input_arr[:-2, 1:-1]
        + input_arr[1:-1, 2:]
        + input_arr[1:-1, :-2]
    )
%%timeit
numpy_kernel()
1.77 ms ± 12.6 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
src, dst = ps.fields(src=input_arr, dst=output_arr)
symbolic_description = ps.Assignment(
    dst[0, 0], (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / 4
)
symbolic_description
kernel = ps.create_kernel(symbolic_description).compile()
def pystencils_kernel():
    kernel(src=input_arr, dst=output_arr)
%%timeit
pystencils_kernel()
408 μs ± 188 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
my_field = ps.fields("f(3) : double[2D]")
field_access = my_field[1, 0](1)
field_access
isinstance(field_access, sp.Symbol)
True
img_field = ps.fields("img(4): [2D]")
w1, w2 = sp.symbols("w_1 w_2")
color = 2
sobel_x = (
    -w2 * img_field[-1, 0](color)
    - w1 * img_field[-1, -1](color)
    - w1 * img_field[-1, +1](color)
    + w2 * img_field[+1, 0](color)
    + w1 * img_field[+1, -1](color)
    - w1 * img_field[+1, +1](color)
) ** 2
sobel_x
sobel_x = sobel_x.subs(w1, 0.5)
sobel_x
dst_field = ps.fields("dst: [2D]")
update_rule = ps.Assignment(dst_field[0, 0], sobel_x)
update_rule
ast = create_kernel(update_rule, cpu_openmp=False)
compiled_kernel = ast.compile()
src_arr = np.zeros([20, 30])
dst_arr = np.zeros_like(src_arr)
dst, src = ps.fields(dst=dst_arr, src=src_arr)
grad_x, grad_y = sp.symbols("grad_x, grad_y")
symbolic_description = [
    ps.Assignment(grad_x, (src[1, 0] - src[-1, 0]) / 2),
    ps.Assignment(grad_y, (src[0, 1] - src[0, -1]) / 2),
    ps.Assignment(dst[0, 0], grad_x + grad_y),
]
kernel = ps.create_kernel(symbolic_description)
symbolic_description
@ps.kernel
def symbolic_description_using_function():
    grad_x @= (src[1, 0] - src[-1, 0]) / 2
    grad_y @= (src[0, 1] - src[0, -1]) / 2
    dst[0, 0] @= grad_x + grad_y
symbolic_description_using_function
sp.Piecewise((1.0, src[0, 1] > 0), (0.0, True)) + src[1, 0]
@ps.kernel
def kernel_with_piecewise():
    grad_x @= (src[1, 0] - src[-1, 0]) / 2 if src[-1, 0] > 0 else 0.0
kernel_with_piecewise
@ps.kernel
def somewhat_longer_dummy_kernel(s):
    s.a @= src[0, 1] + src[-1, 0]
    s.b @= 2 * src[1, 0] + src[0, -1]
    s.c @= src[0, 1] + 2 * src[1, 0] + src[-1, 0] + src[0, -1] - src[0, 0]
    dst[0, 0] @= s.a + s.b + s.c
ac = ps.AssignmentCollection(
    main_assignments=somewhat_longer_dummy_kernel[-1:],
    subexpressions=somewhat_longer_dummy_kernel[:-1],
)
ac
Subexpressions:
| $$a \leftarrow_{} {src}_{(0,1)} + {src}_{(-1,0)}$$ | 
| $$b \leftarrow_{} 2 {src}_{(1,0)} + {src}_{(0,-1)}$$ | 
| $$c \leftarrow_{} - {src}_{(0,0)} + 2 {src}_{(1,0)} + {src}_{(0,1)} + {src}_{(0,-1)} + {src}_{(-1,0)}$$ | 
Main Assignments:
| $${dst}_{(0,0)} \leftarrow_{} a + b + c$$ | 
ac.operation_count
{'adds': 8,
 'muls': 2,
 'divs': 0,
 'sqrts': 0,
 'fast_sqrts': 0,
 'fast_inv_sqrts': 0,
 'fast_div': 0}
opt_ac = ps.simp.subexpression_substitution_in_existing_subexpressions(ac)
opt_ac
Subexpressions:
| $$a \leftarrow_{} {src}_{(0,1)} + {src}_{(-1,0)}$$ | 
| $$b \leftarrow_{} 2 {src}_{(1,0)} + {src}_{(0,-1)}$$ | 
| $$c \leftarrow_{} - {src}_{(0,0)} + a + b$$ | 
Main Assignments:
| $${dst}_{(0,0)} \leftarrow_{} a + b + c$$ | 
opt_ac.operation_count
{'adds': 6,
 'muls': 1,
 'divs': 0,
 'sqrts': 0,
 'fast_sqrts': 0,
 'fast_inv_sqrts': 0,
 'fast_div': 0}
kernel = ps.create_kernel(ps.Assignment(dst[0, 0], src[2, 0] + src[-1, 0]))
ps.show_code(kernel)
FUNC_PREFIX void kernel(double * RESTRICT  _data_dst, double * RESTRICT const _data_src)
{
   for (int64_t ctr_0 = 2; ctr_0 < 18; ctr_0 += 1)
   {
      for (int64_t ctr_1 = 2; ctr_1 < 28; ctr_1 += 1)
      {
         _data_dst[30*ctr_0 + ctr_1] = _data_src[30*ctr_0 + ctr_1 + 60] + _data_src[30*ctr_0 + ctr_1 - 30];
      }
   }
}
gl_spec = [
    (0, 2),  # 0 ghost layers at the left, 2 at the right border
    (1, 0),
]  # 1 ghost layer at the lower y, one at the upper y coordinate
kernel = ps.create_kernel(
    ps.Assignment(dst[0, 0], src[2, 0] + src[-1, 0]), ghost_layers=gl_spec
)
ps.show_code(kernel)
FUNC_PREFIX void kernel(double * RESTRICT  _data_dst, double * RESTRICT const _data_src)
{
   for (int64_t ctr_0 = 0; ctr_0 < 18; ctr_0 += 1)
   {
      for (int64_t ctr_1 = 1; ctr_1 < 30; ctr_1 += 1)
      {
         _data_dst[30*ctr_0 + ctr_1] = _data_src[30*ctr_0 + ctr_1 + 60] + _data_src[30*ctr_0 + ctr_1 - 30];
      }
   }
}
invalid_description = [
    ps.Assignment(dst[1, 0], src[1, 0] + src[-1, 0]),
    ps.Assignment(dst[0, 0], src[1, 0] - src[-1, 0]),
]
try:
    invalid_kernel = ps.create_kernel(invalid_description)
    assert False, "Should never be executed"
except ValueError as e:
    print(e)
Field dst is written at two different locations
valid_kernel = ps.create_kernel(ps.Assignment(src[0, 0], 2 * src[0, 0] + 42))
v = ps.fields("v(2): double[2D]")
valid_kernel = ps.create_kernel(
    [
        ps.Assignment(v[0, 0](1), 2 * v[0, 0](1) + 42),
        ps.Assignment(v[0, 1](0), 2 * v[1, 0](0) + 42),
    ]
)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[38], line 2
      1 v = ps.fields("v(2): double[2D]")
----> 2 valid_kernel = ps.create_kernel(
      3     [
      4         ps.Assignment(v[0, 0](1), 2 * v[0, 0](1) + 42),
      5         ps.Assignment(v[0, 1](0), 2 * v[1, 0](0) + 42),
      6     ]
      7 )
File /usr/lib/python3.12/site-packages/pystencils/kernelcreation.py:87, in create_kernel(assignments, config, **kwargs)
     85     return create_indexed_kernel(assignments, config=config)
     86 else:
---> 87     return create_domain_kernel(assignments, config=config)
File /usr/lib/python3.12/site-packages/pystencils/kernelcreation.py:132, in create_domain_kernel(assignments, config)
    127 # FUTURE WORK from here we shouldn't NEED sympy
    128 # --- check constrains
    129 check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check,
    130                                check_double_write_condition=not config.allow_double_writes)
--> 132 check.visit(assignments)
    134 assignments.bound_fields = check.fields_written
    135 assignments.rhs_fields = check.fields_read
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:49, in KernelConstraintsCheck.visit(self, obj)
     47 def visit(self, obj):
     48     if isinstance(obj, (AssignmentCollection, NodeCollection)):
---> 49         [self.visit(e) for e in obj.all_assignments]
     50     elif isinstance(obj, list) or isinstance(obj, tuple):
     51         [self.visit(e) for e in obj]
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:53, in KernelConstraintsCheck.visit(self, obj)
     51     [self.visit(e) for e in obj]
     52 elif isinstance(obj, (sp.Eq, ast.SympyAssignment, Assignment)):
---> 53     self.process_assignment(obj)
     54 elif isinstance(obj, ast.Conditional):
     55     self.scopes.push()
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:81, in KernelConstraintsCheck.process_assignment(self, assignment)
     78 def process_assignment(self, assignment: Union[sp.Eq, ast.SympyAssignment, Assignment]):
     79     # for checks it is crucial to process rhs before lhs to catch e.g. a = a + 1
     80     self.process_expression(assignment.rhs)
---> 81     self.process_lhs(assignment.lhs)
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:102, in KernelConstraintsCheck.process_lhs(self, lhs)
    100 def process_lhs(self, lhs: Union[Field.Access, TypedSymbol, sp.Symbol]):
    101     assert isinstance(lhs, sp.Symbol)
--> 102     self.update_accesses_lhs(lhs)
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:120, in KernelConstraintsCheck.update_accesses_lhs(self, lhs)
    118         if len(reads) > 1 or lhs.offsets != reads[0]:
    119             if self.check_independence_condition:
--> 120                 raise ValueError(f"Field {lhs.field.name} is written at different location than it was read. "
    121                                  f"This means the resulting kernel would not be thread safe")
    122 elif isinstance(lhs, sp.Symbol):
    123     if self.scopes.is_defined_locally(lhs):
ValueError: Field v is written at different location than it was read. This means the resulting kernel would not be thread safe
@ps.kernel
def not_allowed():
    a, b = sp.symbols("a b")
    a @= src[0, 0]
    b @= a + 3
    a @= src[-1, 0]
    dst[0, 0] @= a + b
try:
    ps.create_kernel(not_allowed)
    assert False
except ValueError as e:
    print(e)
Assignments not in SSA form, multiple assignments to a
@ps.kernel
def not_allowed():
    dst[0, 0] @= src[0, 1] + src[1, 0]
    dst[0, 0] @= 2 * dst[0, 0]
try:
    ps.create_kernel(not_allowed)
    assert False
except ValueError as e:
    print(e)
Field dst is written twice at the same location
tmp_var = sp.Symbol("a")
@ps.kernel
def allowed():
    tmp_var @= src[0, 1] + src[1, 0]
    dst[0, 0] @= 2 * tmp_var
ast = ps.create_kernel(allowed)
ps.show_code(ast)
FUNC_PREFIX void kernel(double * RESTRICT  _data_dst, double * RESTRICT const _data_src)
{
   for (int64_t ctr_0 = 1; ctr_0 < 19; ctr_0 += 1)
   {
      for (int64_t ctr_1 = 1; ctr_1 < 29; ctr_1 += 1)
      {
         const double a = _data_src[30*ctr_0 + ctr_1 + 1] + _data_src[30*ctr_0 + ctr_1 + 30];
         _data_dst[30*ctr_0 + ctr_1] = a*2.0;
      }
   }
}
    
  
  
