parent
215535701d
commit
3120413ccd
|
@ -722,7 +722,7 @@ static vk_op_unary_push_constants vk_op_unary_push_constants_init(const ggml_ten
|
||||||
p.nb11 = (uint32_t)(dst->nb[1] / dst_tsize);
|
p.nb11 = (uint32_t)(dst->nb[1] / dst_tsize);
|
||||||
p.nb12 = (uint32_t)(dst->nb[2] / dst_tsize);
|
p.nb12 = (uint32_t)(dst->nb[2] / dst_tsize);
|
||||||
p.nb13 = (uint32_t)(dst->nb[3] / dst_tsize);
|
p.nb13 = (uint32_t)(dst->nb[3] / dst_tsize);
|
||||||
|
|
||||||
return p; // fastdiv values and offsets are initialized later in ggml_vk_op
|
return p; // fastdiv values and offsets are initialized later in ggml_vk_op
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7677,9 +7677,6 @@ static void ggml_vk_repeat_back(ggml_backend_vk_context * ctx, vk_context& subct
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
|
static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
|
||||||
const uint32_t src0_type_size = ggml_type_size(src0->type);
|
|
||||||
const uint32_t dst_type_size = ggml_type_size(dst->type);
|
|
||||||
|
|
||||||
uint32_t ne = (uint32_t)ggml_nelements(src0);
|
uint32_t ne = (uint32_t)ggml_nelements(src0);
|
||||||
if (ggml_is_quantized(src0->type) && ggml_is_quantized(dst->type)) {
|
if (ggml_is_quantized(src0->type) && ggml_is_quantized(dst->type)) {
|
||||||
// Convert from number of logical elements to 2- or 4-byte units.
|
// Convert from number of logical elements to 2- or 4-byte units.
|
||||||
|
|
|
@ -42,7 +42,7 @@ float fetch_bilinear(ivec2 c0, ivec2 c1, vec2 d, uint i12, uint i13) {
|
||||||
const float v10 = data_a[base + c1.y * p.nb01 + c0.x * p.nb00];
|
const float v10 = data_a[base + c1.y * p.nb01 + c0.x * p.nb00];
|
||||||
const float v11 = data_a[base + c1.y * p.nb01 + c1.x * p.nb00];
|
const float v11 = data_a[base + c1.y * p.nb01 + c1.x * p.nb00];
|
||||||
|
|
||||||
return
|
return
|
||||||
v00 * (1.0-d.x) * (1.0-d.y) +
|
v00 * (1.0-d.x) * (1.0-d.y) +
|
||||||
v01 * d.x * (1.0-d.y) +
|
v01 * d.x * (1.0-d.y) +
|
||||||
v10 * (1.0-d.x) * d.y +
|
v10 * (1.0-d.x) * d.y +
|
||||||
|
@ -57,7 +57,7 @@ float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) {
|
||||||
const vec2 d = c - c0f;
|
const vec2 d = c - c0f;
|
||||||
const ivec2 c0 = max(ivec2(c0f), 0);
|
const ivec2 c0 = max(ivec2(c0f), 0);
|
||||||
const ivec2 c1 = min(ivec2(c0f + 1), ne0 - 1);
|
const ivec2 c1 = min(ivec2(c0f + 1), ne0 - 1);
|
||||||
|
|
||||||
return fetch_bilinear(c0, c1, d, i12, i13);
|
return fetch_bilinear(c0, c1, d, i12, i13);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue