float *a = (float*)aligned_alloc(64, (size_t)N * sizeof(float)); float *b = (float*)aligned_alloc(64, (size_t)N * sizeof(float)); ...