5. pystencils#
import numpy as np
import pystencils as ps
import sympy as sp
from pystencils import create_kernel
from pystencils import plot as plt
sp.init_printing()
input_arr = np.random.rand(1024, 1024)
output_arr = np.zeros_like(input_arr)
def numpy_kernel():
output_arr[1:-1, 1:-1] = (
input_arr[2:, 1:-1]
+ input_arr[:-2, 1:-1]
+ input_arr[1:-1, 2:]
+ input_arr[1:-1, :-2]
)
%%timeit
numpy_kernel()
1.77 ms ± 12.6 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
src, dst = ps.fields(src=input_arr, dst=output_arr)
symbolic_description = ps.Assignment(
dst[0, 0], (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / 4
)
symbolic_description

kernel = ps.create_kernel(symbolic_description).compile()
def pystencils_kernel():
kernel(src=input_arr, dst=output_arr)
%%timeit
pystencils_kernel()
408 μs ± 188 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
my_field = ps.fields("f(3) : double[2D]")
field_access = my_field[1, 0](1)
field_access

isinstance(field_access, sp.Symbol)
True
img_field = ps.fields("img(4): [2D]")
w1, w2 = sp.symbols("w_1 w_2")
color = 2
sobel_x = (
-w2 * img_field[-1, 0](color)
- w1 * img_field[-1, -1](color)
- w1 * img_field[-1, +1](color)
+ w2 * img_field[+1, 0](color)
+ w1 * img_field[+1, -1](color)
- w1 * img_field[+1, +1](color)
) ** 2
sobel_x

sobel_x = sobel_x.subs(w1, 0.5)
sobel_x

dst_field = ps.fields("dst: [2D]")
update_rule = ps.Assignment(dst_field[0, 0], sobel_x)
update_rule

ast = create_kernel(update_rule, cpu_openmp=False)
compiled_kernel = ast.compile()
src_arr = np.zeros([20, 30])
dst_arr = np.zeros_like(src_arr)
dst, src = ps.fields(dst=dst_arr, src=src_arr)
grad_x, grad_y = sp.symbols("grad_x, grad_y")
symbolic_description = [
ps.Assignment(grad_x, (src[1, 0] - src[-1, 0]) / 2),
ps.Assignment(grad_y, (src[0, 1] - src[0, -1]) / 2),
ps.Assignment(dst[0, 0], grad_x + grad_y),
]
kernel = ps.create_kernel(symbolic_description)
symbolic_description

@ps.kernel
def symbolic_description_using_function():
grad_x @= (src[1, 0] - src[-1, 0]) / 2
grad_y @= (src[0, 1] - src[0, -1]) / 2
dst[0, 0] @= grad_x + grad_y
symbolic_description_using_function

sp.Piecewise((1.0, src[0, 1] > 0), (0.0, True)) + src[1, 0]

@ps.kernel
def kernel_with_piecewise():
grad_x @= (src[1, 0] - src[-1, 0]) / 2 if src[-1, 0] > 0 else 0.0
kernel_with_piecewise

@ps.kernel
def somewhat_longer_dummy_kernel(s):
s.a @= src[0, 1] + src[-1, 0]
s.b @= 2 * src[1, 0] + src[0, -1]
s.c @= src[0, 1] + 2 * src[1, 0] + src[-1, 0] + src[0, -1] - src[0, 0]
dst[0, 0] @= s.a + s.b + s.c
ac = ps.AssignmentCollection(
main_assignments=somewhat_longer_dummy_kernel[-1:],
subexpressions=somewhat_longer_dummy_kernel[:-1],
)
ac
Subexpressions:
$$a \leftarrow_{} {src}_{(0,1)} + {src}_{(-1,0)}$$ |
$$b \leftarrow_{} 2 {src}_{(1,0)} + {src}_{(0,-1)}$$ |
$$c \leftarrow_{} - {src}_{(0,0)} + 2 {src}_{(1,0)} + {src}_{(0,1)} + {src}_{(0,-1)} + {src}_{(-1,0)}$$ |
Main Assignments:
$${dst}_{(0,0)} \leftarrow_{} a + b + c$$ |
ac.operation_count
{'adds': 8,
'muls': 2,
'divs': 0,
'sqrts': 0,
'fast_sqrts': 0,
'fast_inv_sqrts': 0,
'fast_div': 0}
opt_ac = ps.simp.subexpression_substitution_in_existing_subexpressions(ac)
opt_ac
Subexpressions:
$$a \leftarrow_{} {src}_{(0,1)} + {src}_{(-1,0)}$$ |
$$b \leftarrow_{} 2 {src}_{(1,0)} + {src}_{(0,-1)}$$ |
$$c \leftarrow_{} - {src}_{(0,0)} + a + b$$ |
Main Assignments:
$${dst}_{(0,0)} \leftarrow_{} a + b + c$$ |
opt_ac.operation_count
{'adds': 6,
'muls': 1,
'divs': 0,
'sqrts': 0,
'fast_sqrts': 0,
'fast_inv_sqrts': 0,
'fast_div': 0}
kernel = ps.create_kernel(ps.Assignment(dst[0, 0], src[2, 0] + src[-1, 0]))
ps.show_code(kernel)
FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)
{
for (int64_t ctr_0 = 2; ctr_0 < 18; ctr_0 += 1)
{
for (int64_t ctr_1 = 2; ctr_1 < 28; ctr_1 += 1)
{
_data_dst[30*ctr_0 + ctr_1] = _data_src[30*ctr_0 + ctr_1 + 60] + _data_src[30*ctr_0 + ctr_1 - 30];
}
}
}
gl_spec = [
(0, 2), # 0 ghost layers at the left, 2 at the right border
(1, 0),
] # 1 ghost layer at the lower y, one at the upper y coordinate
kernel = ps.create_kernel(
ps.Assignment(dst[0, 0], src[2, 0] + src[-1, 0]), ghost_layers=gl_spec
)
ps.show_code(kernel)
FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)
{
for (int64_t ctr_0 = 0; ctr_0 < 18; ctr_0 += 1)
{
for (int64_t ctr_1 = 1; ctr_1 < 30; ctr_1 += 1)
{
_data_dst[30*ctr_0 + ctr_1] = _data_src[30*ctr_0 + ctr_1 + 60] + _data_src[30*ctr_0 + ctr_1 - 30];
}
}
}
invalid_description = [
ps.Assignment(dst[1, 0], src[1, 0] + src[-1, 0]),
ps.Assignment(dst[0, 0], src[1, 0] - src[-1, 0]),
]
try:
invalid_kernel = ps.create_kernel(invalid_description)
assert False, "Should never be executed"
except ValueError as e:
print(e)
Field dst is written at two different locations
valid_kernel = ps.create_kernel(ps.Assignment(src[0, 0], 2 * src[0, 0] + 42))
v = ps.fields("v(2): double[2D]")
valid_kernel = ps.create_kernel(
[
ps.Assignment(v[0, 0](1), 2 * v[0, 0](1) + 42),
ps.Assignment(v[0, 1](0), 2 * v[1, 0](0) + 42),
]
)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[38], line 2
1 v = ps.fields("v(2): double[2D]")
----> 2 valid_kernel = ps.create_kernel(
3 [
4 ps.Assignment(v[0, 0](1), 2 * v[0, 0](1) + 42),
5 ps.Assignment(v[0, 1](0), 2 * v[1, 0](0) + 42),
6 ]
7 )
File /usr/lib/python3.12/site-packages/pystencils/kernelcreation.py:87, in create_kernel(assignments, config, **kwargs)
85 return create_indexed_kernel(assignments, config=config)
86 else:
---> 87 return create_domain_kernel(assignments, config=config)
File /usr/lib/python3.12/site-packages/pystencils/kernelcreation.py:132, in create_domain_kernel(assignments, config)
127 # FUTURE WORK from here we shouldn't NEED sympy
128 # --- check constrains
129 check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check,
130 check_double_write_condition=not config.allow_double_writes)
--> 132 check.visit(assignments)
134 assignments.bound_fields = check.fields_written
135 assignments.rhs_fields = check.fields_read
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:49, in KernelConstraintsCheck.visit(self, obj)
47 def visit(self, obj):
48 if isinstance(obj, (AssignmentCollection, NodeCollection)):
---> 49 [self.visit(e) for e in obj.all_assignments]
50 elif isinstance(obj, list) or isinstance(obj, tuple):
51 [self.visit(e) for e in obj]
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:53, in KernelConstraintsCheck.visit(self, obj)
51 [self.visit(e) for e in obj]
52 elif isinstance(obj, (sp.Eq, ast.SympyAssignment, Assignment)):
---> 53 self.process_assignment(obj)
54 elif isinstance(obj, ast.Conditional):
55 self.scopes.push()
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:81, in KernelConstraintsCheck.process_assignment(self, assignment)
78 def process_assignment(self, assignment: Union[sp.Eq, ast.SympyAssignment, Assignment]):
79 # for checks it is crucial to process rhs before lhs to catch e.g. a = a + 1
80 self.process_expression(assignment.rhs)
---> 81 self.process_lhs(assignment.lhs)
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:102, in KernelConstraintsCheck.process_lhs(self, lhs)
100 def process_lhs(self, lhs: Union[Field.Access, TypedSymbol, sp.Symbol]):
101 assert isinstance(lhs, sp.Symbol)
--> 102 self.update_accesses_lhs(lhs)
File /usr/lib/python3.12/site-packages/pystencils/kernel_contrains_check.py:120, in KernelConstraintsCheck.update_accesses_lhs(self, lhs)
118 if len(reads) > 1 or lhs.offsets != reads[0]:
119 if self.check_independence_condition:
--> 120 raise ValueError(f"Field {lhs.field.name} is written at different location than it was read. "
121 f"This means the resulting kernel would not be thread safe")
122 elif isinstance(lhs, sp.Symbol):
123 if self.scopes.is_defined_locally(lhs):
ValueError: Field v is written at different location than it was read. This means the resulting kernel would not be thread safe
@ps.kernel
def not_allowed():
a, b = sp.symbols("a b")
a @= src[0, 0]
b @= a + 3
a @= src[-1, 0]
dst[0, 0] @= a + b
try:
ps.create_kernel(not_allowed)
assert False
except ValueError as e:
print(e)
Assignments not in SSA form, multiple assignments to a
@ps.kernel
def not_allowed():
dst[0, 0] @= src[0, 1] + src[1, 0]
dst[0, 0] @= 2 * dst[0, 0]
try:
ps.create_kernel(not_allowed)
assert False
except ValueError as e:
print(e)
Field dst is written twice at the same location
tmp_var = sp.Symbol("a")
@ps.kernel
def allowed():
tmp_var @= src[0, 1] + src[1, 0]
dst[0, 0] @= 2 * tmp_var
ast = ps.create_kernel(allowed)
ps.show_code(ast)
FUNC_PREFIX void kernel(double * RESTRICT _data_dst, double * RESTRICT const _data_src)
{
for (int64_t ctr_0 = 1; ctr_0 < 19; ctr_0 += 1)
{
for (int64_t ctr_1 = 1; ctr_1 < 29; ctr_1 += 1)
{
const double a = _data_src[30*ctr_0 + ctr_1 + 1] + _data_src[30*ctr_0 + ctr_1 + 30];
_data_dst[30*ctr_0 + ctr_1] = a*2.0;
}
}
}