通过对roi像素进行排序并使用转置坐标,似乎可以节省至少一点时间:
>>> def f_pp(im2D, idx):
... s = np.argsort(idx.ravel())
... out = np.empty((*idx.shape, im2D.shape[0]), im2D.dtype)
... out.reshape(-1, im2D.shape[0])[s] = im2D.T[idx.ravel()[s]]
... return out
...
# results are the same:
>>> np.all(f_pp(imgs.reshape(100, -1), idx) == np.moveaxis(imgs.reshape(100, -1)[:, idx], 0, 2))
True
>>> timeit("imgs.reshape(100, -1)[:, idx]", globals=globals(), number=100)
1.3392871069954708
# transposing alone is not enough:
>>> timeit("imgs.reshape(100, -1).T[idx]", globals=globals(), number=100)
1.3336799899989273
# but together with sorting I see a 2x speedup
>>> timeit("f_pp(imgs.reshape(100, -1), idx)", globals=globals(), number=100)
0.5874412529956317
# still much worse than if we had a more favorable memory layout in
# the first place
>>> timeit("imgs.reshape(-1, 100)[idx]", globals=globals(), number=100)
0.06296327701420523