@@ -256,10 +256,10 @@ bool ggml_metal_add_buffer(
256
256
if (ctx->buffers [ctx->n_buffers].metal == nil ) {
257
257
fprintf (stderr, " %s : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, aligned_size / 1024.0 / 1024.0 );
258
258
return false ;
259
- } else {
260
- fprintf (stderr, " %s : allocated '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, aligned_size / 1024.0 / 1024.0 );
261
259
}
262
260
261
+ fprintf (stderr, " %s : allocated '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, aligned_size / 1024.0 / 1024.0 );
262
+
263
263
++ctx->n_buffers ;
264
264
}
265
265
@@ -765,18 +765,23 @@ void ggml_metal_graph_compute(
765
765
} break ;
766
766
case GGML_OP_ALIBI:
767
767
{
768
+ if (encoder == nil ) {
769
+ encoder = [command_buffer computeCommandEncoder ];
770
+ }
771
+
768
772
GGML_ASSERT ((src0t == GGML_TYPE_F32));
769
- const int n_past = ((int32_t *) src1->data )[0 ];
773
+
774
+ const int n_past = ((int32_t *) src1->data )[0 ]; UNUSED (n_past);
770
775
const int n_head = ((int32_t *) src1->data )[1 ];
771
776
const float max_bias = ((float *) src1->data )[2 ];
777
+
772
778
if (__builtin_popcount (n_head) != 1 ) {
773
779
GGML_ASSERT (false && " only power-of-two n_head implemented" );
774
780
}
781
+
775
782
const int n_heads_log2_floor = 1 << (int ) floor (log2 (n_head));
776
783
const float m0 = powf (2 .0f , -(max_bias) / n_heads_log2_floor);
777
- if (encoder == nil ) {
778
- encoder = [command_buffer computeCommandEncoder ];
779
- }
784
+
780
785
[encoder setComputePipelineState: ctx->pipeline_alibi_f32];
781
786
[encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
782
787
[encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
0 commit comments