Skip to content

Commit b7249bd

Browse files
committed
Fix memory leak
1 parent 2d3ddad commit b7249bd

File tree

2 files changed

+95
-106
lines changed

2 files changed

+95
-106
lines changed

roboticstoolbox/cuda/fknm.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ __global__ void _jacob0(double *T,
5656
T_i = &T[tid * 16];
5757

5858
if (tid >= N) {
59+
free(U);
60+
free(invU);
61+
free(temp);
5962
return;
6063
}
6164

@@ -407,7 +410,6 @@ void jacob0(double *T,
407410
njoints,
408411
d_out);
409412

410-
// cudaDeviceSynchronize();
411413
// cudaError_t cudaerr = cudaDeviceSynchronize();
412414
// if (cudaerr != cudaSuccess)
413415
// printf("kernel launch failed with error \"%s\".\n",
@@ -416,7 +418,6 @@ void jacob0(double *T,
416418
// memset(out, 1, N * 6 * njoints);
417419
// out[0] = 1;
418420
cudaMemcpy(out, d_out, sizeof(double) * N * 6 * njoints, cudaMemcpyDeviceToHost);
419-
// printf("Out size %d %d %f %f %f %f %f", N, njoints, d_out[0], d_out[1], d_out[2], d_out[3], d_out[4]);
420421

421422
// Deallocate device memory
422423
cudaFree(d_T);

roboticstoolbox/examples/chomp.py

Lines changed: 92 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -19,25 +19,25 @@
1919
fknm_ = None
2020

2121

22-
def jacob0_loop(robot, link, pts, qt=None):
22+
def jacob0_loop(robot, link, pts, jacob_vec, qt=None):
2323
"""
2424
Non-parallel, use for loop
2525
:param pts: (num, 3) xyz positions of pts
2626
:param q: (cdim,) joint configurations
27+
:param jacob_vec: (num, 6, njoints)
2728
"""
2829
if qt is None:
2930
qt = robot.q
30-
jacob_vec = []
31-
for pt in pts:
31+
for ip, pt in enumerate(pts):
3232
JJ = robot.jacob0(qt, end=link, tool=SE3(pt)) # (k, 6)
33-
jacob_vec.append(JJ)
34-
return np.array(jacob_vec)
33+
jacob_vec[ip] = JJ
3534

36-
def jacob0_vec(robot, link, pts, qt=None, verbose=False):
35+
def jacob0_vec(robot, link, pts, jacob_vec, qt=None, verbose=False):
3736
"""
3837
Parallel, use CUDA
3938
:param pts: (num, 3) xyz positions of pts
4039
:param q: (cdim,) joint configurations
40+
:param jacob_vec: (num, 6, njoints)
4141
"""
4242
import ctypes as ct
4343
global fknm_
@@ -46,19 +46,19 @@ def jacob0_vec(robot, link, pts, qt=None, verbose=False):
4646
if fknm_ is None:
4747
fknm_=np.ctypeslib.load_library('roboticstoolbox/cuda/fknm','.')
4848
# Parallel, use cuda
49-
t_start = time.time()
5049
num_pts = len(pts)
51-
se3_pts = SE3(pts)
52-
pts_tool = np.array(se3_pts.A)
50+
pts_tool = np.repeat(np.eye(4)[None, :], len(pts), axis=0)
51+
pts_tool[:, :3, 3] = pts
5352
link_base = robot.fkine(qt, end=link)
54-
# print(f"1: {time.time() - t_start:.3f}\n")
55-
t_start = time.time()
5653
# pts_mat = np.array((link_base @ se3_pts).A)
57-
pts_mat = np.array(link_base.A.dot(np.array(se3_pts.A)).swapaxes(0, 1), order='C')
58-
e_pts = np.zeros((num_pts, 3))
59-
pts_etool = np.array(SE3(e_pts).A)
60-
# print(f"2: {time.time() - t_start:.3f}\n")
61-
t_start = time.time()
54+
# pts_mat = np.array(link_base.A.dot(pts_tool).swapaxes(0, 1), order='C')
55+
# pts_mat = np.einsum('ij,ljk->lik', link_base.A, pts_tool)
56+
# pts_mat = np.ascontiguousarray(link_base.A.dot(pts_tool).swapaxes(0, 1))
57+
pts_mat = np.matmul(np.repeat(link_base.A[None, :], len(pts), axis=0), pts_tool)
58+
59+
# e_pts = np.zeros((num_pts, 3))
60+
# pts_etool = np.array(SE3(e_pts).A)
61+
pts_etool = np.repeat(np.eye(4)[None, :], len(pts), axis=0)
6262
link_As = []
6363
link_axes = []
6464
link_isjoint = []
@@ -69,10 +69,10 @@ def jacob0_vec(robot, link, pts, qt=None, verbose=False):
6969
link_As.append(link.A(qt[link.jindex]).A)
7070
link_axes.append(axis)
7171
link_isjoint.append(link.isjoint)
72+
# print(nlinks)
7273
link_As = np.array(link_As)
7374
link_axes = np.array(link_axes, dtype=int)
7475
link_isjoint = np.array(link_isjoint, dtype=int)
75-
jacob_vec = np.zeros((num_pts, 6, njoints))
7676

7777
if verbose:
7878
print(f"pts shape {pts_mat.shape}")
@@ -86,18 +86,19 @@ def jacob0_vec(robot, link, pts, qt=None, verbose=False):
8686
print(f"njoints {njoints}")
8787
print(f"num_pts {num_pts}")
8888

89-
fknm_.jacob0(pts_mat.ctypes.data_as(ct.c_void_p),
90-
pts_tool.ctypes.data_as(ct.c_void_p),
91-
pts_etool.ctypes.data_as(ct.c_void_p),
92-
# link_As.ctypes.data_as(ct.c_void_p),
93-
ct.c_void_p(link_As.ctypes.data),
94-
link_axes.ctypes.data_as(ct.c_void_p),
95-
link_isjoint.ctypes.data_as(ct.c_void_p),
96-
ct.c_int(num_pts),
97-
ct.c_int(nlinks),
98-
ct.c_int(njoints),
99-
jacob_vec.ctypes.data_as(ct.c_void_p))
100-
return jacob_vec
89+
with Profiler("vec 2", enable=False):
90+
# with Profiler("vec 2"):
91+
fknm_.jacob0(pts_mat.ctypes.data_as(ct.c_void_p),
92+
pts_tool.ctypes.data_as(ct.c_void_p),
93+
pts_etool.ctypes.data_as(ct.c_void_p),
94+
# link_As.ctypes.data_as(ct.c_void_p),
95+
ct.c_void_p(link_As.ctypes.data),
96+
link_axes.ctypes.data_as(ct.c_void_p),
97+
link_isjoint.ctypes.data_as(ct.c_void_p),
98+
ct.c_int(num_pts),
99+
ct.c_int(nlinks),
100+
ct.c_int(njoints),
101+
jacob_vec.ctypes.data_as(ct.c_void_p))
101102

102103
def chomp(seed=0):
103104
np.random.seed(seed)
@@ -115,11 +116,10 @@ def chomp(seed=0):
115116
qtraj = rtb.jtraj(robot.qz, q_pickup, 50)
116117
# robot.plot(qt.q, movie='panda1.gif')
117118
robot = rtb.models.URDF.Panda() # load URDF version of the Panda
118-
obstacle = Box([0.3, 0.3, 0.3], base=SE3(0.5, 0.5, 0.5))
119+
obstacle = Box([0.3, 0.3, 0.3], base=SE3(0.5, 0.5, 0.8))
119120
pt = robot.closest_point(robot.q, obstacle)
120121
# robot.links[2].collision[0].get_data()
121122
# print(robot) # display the model
122-
123123
# print(len(qt.q))
124124
# for qk in qt.q: # for each joint configuration on trajectory
125125
# print(qk)
@@ -144,14 +144,15 @@ def chomp(seed=0):
144144
lmbda = 1000
145145
eta = 1000
146146
iters = 4
147-
num_pts = 50
147+
num_pts = 200
148+
# num_pts = 1
148149

149150
# Make cost field, starting & end points
150151
cdim = len(qtraj.q[0])
151152
xidim = cdim * nq
152153
AA = np.zeros((xidim, xidim))
153154
xi = np.zeros(xidim)
154-
robot._set_link_fk(qtraj.q[1])
155+
# robot._set_link_fk(qtraj.q[1])
155156

156157
qs = np.zeros(cdim)
157158
qe = q_pickup
@@ -170,14 +171,23 @@ def chomp(seed=0):
170171
AA /= dt * dt * (nq + 1)
171172
Ainv = np.linalg.pinv(AA)
172173

174+
def vmatmul(mat1, mat2):
175+
# mat1: (N, a, b)
176+
# mat2: (N, b)
177+
# out: (N, a)
178+
return np.matmul(mat1, mat2[:, :, None]).squeeze(-1)
179+
180+
def vrepeat(vec, N):
181+
# vec: (...)
182+
# out: (N, ...)
183+
return np.repeat(vec[None, :], N, axis=0)
184+
173185
for t in range(iters):
174186
nabla_smooth = AA.dot(xi) + bb
175187
nabla_obs = np.zeros(xidim)
176188
xidd = AA.dot(xi)
177189
total_cost = 0
178190
for i in range(nq): # timestep
179-
robot.q = xi[cdim * i: cdim * (i+1)]
180-
# print(t, i, xi[cdim * i: cdim * (i+1)])
181191
qt = xi[cdim * i: cdim * (i+1)]
182192
if i == 0:
183193
qd = 0.5 * (xi[cdim * (i+1): cdim * (i+2)] - qs)
@@ -186,8 +196,7 @@ def chomp(seed=0):
186196
else:
187197
qd = 0.5 * (xi[cdim * (i+1): cdim * (i+2)] - xi[cdim * (i-1): cdim * (i)])
188198

189-
# import pdb; pdb.set_trace()
190-
for link in robot.links: # bodypart
199+
for il, link in enumerate(robot.links): # bodypart
191200
k = link.jindex
192201
if k is None:
193202
continue
@@ -197,38 +206,41 @@ def chomp(seed=0):
197206
mesh = meshes[link.name]
198207
idxs = np.random.choice(np.arange(len(mesh.vertices)), num_pts, replace=False)
199208
pts = mesh.vertices[idxs]
200-
201-
with Profiler("Step 1"):
209+
link_base = robot.fkine(qt, end=link)
210+
pts_se3 = vrepeat(np.eye(4), num_pts)
211+
pts_se3[:, :3, 3] = pts
212+
pts_xyz = link_base.A.dot(pts_se3).swapaxes(0, 1)[:, :3, 3]
213+
214+
# with Profiler("Step 1"):
215+
with Profiler("Step 1", enable=False):
216+
path, njoints, _ = robot.get_path(end=link)
217+
jacobs = np.zeros((num_pts, 6, njoints))
202218
if not parallel:
203-
jacobs = jacob0_loop(robot, link, pts, qt)
219+
jacob0_loop(robot, link, pts, jacobs, qt) # (N, 6, k + 1)
204220
else:
205-
jacobs = jacob0_vec(robot, link, pts, qt)
206-
with Profiler("Step 2"):
207-
# TODO: vectorize cost calculation
208-
for j in range(num_pts):
209-
# For each point: compute Jacobian, compute cost, compute cost gradient
210-
pt_rel = mesh.vertices[j]
211-
pt_tool = link_base @ SE3(pt_rel)
212-
pt_pos = pt_tool.t
213-
214-
JJ = robot.jacob0(qt, end=link, tool=SE3(pt_rel)) # (k, 6)
215-
xd = JJ.dot(qd[:k+1]) # x_delta
216-
vel = np.linalg.norm(xd)
217-
xdn = xd / (vel + 1e-3) # speed normalized
218-
xdd = JJ.dot(xidd[cdim * i: cdim * i + k + 1]) # second derivative of xi
219-
prj = np.eye(6) - xdn[:, None].dot(xdn[:, None].T) # curvature vector (6, 6)
220-
kappa = (prj.dot(xdd) / (vel ** 2 + 1e-3)) # (6,)
221-
222-
cost = np.sum(pt_pos)
223-
total_cost += cost / num_pts
224-
# delta := negative gradient of obstacle cost in work space, (6, cdim)
225-
delta = -1 * np.concatenate([[1, 1, 0], np.zeros(3)])
226-
# for link in robot.links:
227-
# cost = get_link_cost(robot, meshes, link)
228-
delta_nabla_obs += JJ.T.dot(vel).dot(prj.dot(delta) - cost * kappa)
221+
jacob0_vec(robot, link, pts, jacobs, qt)
229222

230-
nabla_obs[cdim * i: cdim * i + k + 1] += (delta_nabla_obs / num_pts)
231-
223+
# with Profiler("Step 2"):
224+
with Profiler("Step 2", enable=False):
225+
xd = jacobs.dot(qd[:k+1]) # x_delta, (N, 6)
226+
vel = np.linalg.norm(xd, axis=1, keepdims=True) # (N, 1)
227+
xdn = xd / (vel + 1e-3) # speed normalized, (N, 6)
228+
xdd = jacobs.dot(xidd[cdim * i: cdim * i + k + 1]) # second derivative of xi, (N, 6)
229+
prj = vrepeat(np.eye(6), num_pts)
230+
prj -= np.matmul(xdn[:, :, None], xdn[:, None, :]) # curvature vector (N, 6, 6)
231+
kappa = (vmatmul(prj, xdd) / (vel ** 2 + 1e-3)) # (N, 6)
232+
cost = np.sum(pts_xyz, axis=1, keepdims=True) # (N, 1)
233+
# delta := negative gradient of obstacle cost in work space, (6, cdim)
234+
delta = -1 * np.concatenate([[1, 1, 0], np.zeros(3)])
235+
delta = vrepeat(delta, num_pts) # (N, 3, 6)
236+
# for link in robot.links:
237+
# cost = get_link_cost(robot, meshes, link)
238+
part1 = jacobs.swapaxes(1, 2) # (N, k + 1, 6)
239+
part2 = vmatmul(prj, delta) - cost * kappa # (N, 6)
240+
delta_nabla_obs = vmatmul(part1, part2) * vel # (N, k + 1)
241+
total_cost += cost.mean()
242+
243+
nabla_obs[cdim * i: cdim * i + k + 1] += delta_nabla_obs.mean(axis=0)
232244

233245
# dxi = Ainv.dot(lmbda * nabla_smooth)
234246
dxi = Ainv.dot(nabla_obs + lmbda * nabla_smooth)
@@ -287,44 +299,14 @@ def chomp(seed=0):
287299
# time.sleep(dt)
288300

289301

290-
def get_vertices_xyz(robot, meshes, link, num=-1, parallel=True):
291-
"""Return (num, 3) array of xyz positions of vertices on the mesh
292-
"""
293-
if link.name not in meshes:
294-
return None
295-
mesh = meshes[link.name]
296-
base_se3 = robot.fkine(robot.q, end=link) # x_current, (4, 4)
297-
if num < 0:
298-
num = len(mesh.vertices)
299-
idxs = np.arange(num)
300-
else:
301-
idxs = np.random.choice(np.arange(num, replace=True))
302-
303-
offsets = np.repeat(np.eye(4)[np.newaxis], num, axis=0)
304-
xs = mesh.vertices[idxs]
305-
offsets[:, :3, 3] = xs
306-
if parallel:
307-
costs = []
308-
out_xs = np.dot(base_se3.data[0], offsets)[:3, :, -1].T
309-
else:
310-
pass
311-
# print(f"Out shape {out_xs.shape}")
312-
return out_xs
313-
314-
315-
def get_link_cost(robot, meshes, link, num=-1, parallel=True):
316-
"""Return gradient of the costs.
317-
"""
318-
vertice_xyz = get_vertices_xyz(robot, meshes, link, num, parallel)
319-
320-
def test_parallel():
302+
def test_parallel(seed=0):
303+
np.random.seed(seed)
321304
robot = rtb.models.DH.Panda() # load Mesh version (for collisions)
322305
T = robot.fkine(robot.qz) # forward kinematics
323306
T = SE3(0.7, 0.2, 0.1) * SE3.OA([0, 1, 0], [0, 0, -1])
324307
sol = robot.ikine_LM(T) # solve IK
325308
q_pickup = sol.q
326309
robot = rtb.models.URDF.Panda() # load URDF version of the Panda
327-
robot.q = q_pickup
328310
meshes = {}
329311
for link in robot.links:
330312
if len(link.geometry) != 1:
@@ -351,26 +333,32 @@ def test_parallel():
351333
AA = np.zeros((xidim, xidim))
352334
xi = np.zeros(xidim)
353335

354-
355-
for idx in [2, 3, 4, 5, 6, 7]:
336+
# for idx in [2, 3, 4, 5, 6, 7]:
337+
for idx in [2]:
356338
print(f"Iidx {idx}")
357339
link = robot.links[idx]
358340
k = link.jindex
359341

360342

361343
# Non-parallel, use for loop
362344
mesh = meshes[link.name]
363-
pts = mesh.vertices[:num_pts]
345+
idxs = np.random.choice(np.arange(len(mesh.vertices)), num_pts, replace=False)
346+
pts = mesh.vertices[idxs]
347+
path, njoints, _ = robot.get_path(end=link)
348+
364349

365350
t_start = time.time()
366-
jacobs1 = jacob0_loop(robot, link, pts)
351+
jacobs1 = np.zeros((num_pts, 6, njoints))
352+
jacob0_loop(robot, link, pts, jacobs1, q_pickup)
367353
print(f"\nNon-parallel time: {time.time() - t_start:.3f}\n")
368354
# print(jacobs1)
369355
# Parallel, use cuda
370356
t_start = time.time()
371-
jacobs2 = jacob0_vec(robot, link, pts, verbose=True)
357+
jacobs2 = np.zeros((num_pts, 6, njoints))
358+
jacob0_vec(robot, link, pts, jacobs2, q_pickup, verbose=True)
359+
import pdb; pdb.set_trace()
372360
print(f"\nParallel time: {time.time() - t_start:.3f}\n")
373-
# print(jacobs2)
361+
print(jacobs2)
374362
# print(f"Max diff 0 {np.max(np.abs(jacobs1[0] - jacobs2[0]))}")
375363
# print(f"Max diff 0 {np.argmax(np.abs(jacobs1 - jacobs2))}")
376364
# print(f"Max diff {np.max(np.abs(jacobs1 - jacobs2))}")

0 commit comments

Comments
 (0)