@@ -800,6 +800,158 @@ static void sample_k_diffusion(sample_method_t method,
800800 }
801801 }
802802 } break ;
803+ case IPNDM: // iPNDM sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main
804+ {
805+ int max_order = 4 ;
806+ ggml_tensor* x_next = x;
807+ std::vector<ggml_tensor*> buffer_model;
808+
809+ for (int i = 0 ; i < steps; i++) {
810+ float sigma = sigmas[i];
811+ float sigma_next = sigmas[i + 1 ];
812+
813+ ggml_tensor* x_cur = x_next;
814+ float * vec_x_cur = (float *)x_cur->data ;
815+ float * vec_x_next = (float *)x_next->data ;
816+
817+ // Denoising step
818+ ggml_tensor* denoised = model (x_cur, sigma, i + 1 );
819+ float * vec_denoised = (float *)denoised->data ;
820+ // d_cur = (x_cur - denoised) / sigma
821+ struct ggml_tensor * d_cur = ggml_dup_tensor (work_ctx, x_cur);
822+ float * vec_d_cur = (float *)d_cur->data ;
823+
824+ for (int j = 0 ; j < ggml_nelements (d_cur); j++) {
825+ vec_d_cur[j] = (vec_x_cur[j] - vec_denoised[j]) / sigma;
826+ }
827+
828+ int order = std::min (max_order, i + 1 );
829+
830+ // Calculate vec_x_next based on the order
831+ switch (order) {
832+ case 1 : // First Euler step
833+ for (int j = 0 ; j < ggml_nelements (x_next); j++) {
834+ vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * vec_d_cur[j];
835+ }
836+ break ;
837+
838+ case 2 : // Use one history point
839+ {
840+ float * vec_d_prev1 = (float *)buffer_model.back ()->data ;
841+ for (int j = 0 ; j < ggml_nelements (x_next); j++) {
842+ vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (3 * vec_d_cur[j] - vec_d_prev1[j]) / 2 ;
843+ }
844+ }
845+ break ;
846+
847+ case 3 : // Use two history points
848+ {
849+ float * vec_d_prev1 = (float *)buffer_model.back ()->data ;
850+ float * vec_d_prev2 = (float *)buffer_model[buffer_model.size () - 2 ]->data ;
851+ for (int j = 0 ; j < ggml_nelements (x_next); j++) {
852+ vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (23 * vec_d_cur[j] - 16 * vec_d_prev1[j] + 5 * vec_d_prev2[j]) / 12 ;
853+ }
854+ }
855+ break ;
856+
857+ case 4 : // Use three history points
858+ {
859+ float * vec_d_prev1 = (float *)buffer_model.back ()->data ;
860+ float * vec_d_prev2 = (float *)buffer_model[buffer_model.size () - 2 ]->data ;
861+ float * vec_d_prev3 = (float *)buffer_model[buffer_model.size () - 3 ]->data ;
862+ for (int j = 0 ; j < ggml_nelements (x_next); j++) {
863+ vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (55 * vec_d_cur[j] - 59 * vec_d_prev1[j] + 37 * vec_d_prev2[j] - 9 * vec_d_prev3[j]) / 24 ;
864+ }
865+ }
866+ break ;
867+ }
868+
869+ // Manage buffer_model
870+ if (buffer_model.size () == max_order - 1 ) {
871+ // Shift elements to the left
872+ for (int k = 0 ; k < max_order - 2 ; k++) {
873+ buffer_model[k] = buffer_model[k + 1 ];
874+ }
875+ buffer_model.back () = d_cur; // Replace the last element with d_cur
876+ } else {
877+ buffer_model.push_back (d_cur);
878+ }
879+ }
880+ } break ;
881+ case IPNDM_V: // iPNDM_v sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main
882+ {
883+ int max_order = 4 ;
884+ std::vector<ggml_tensor*> buffer_model;
885+ ggml_tensor* x_next = x;
886+
887+ for (int i = 0 ; i < steps; i++) {
888+ float sigma = sigmas[i];
889+ float t_next = sigmas[i + 1 ];
890+
891+ // Denoising step
892+ ggml_tensor* denoised = model (x, sigma, i + 1 );
893+ float * vec_denoised = (float *)denoised->data ;
894+ struct ggml_tensor * d_cur = ggml_dup_tensor (work_ctx, x);
895+ float * vec_d_cur = (float *)d_cur->data ;
896+ float * vec_x = (float *)x->data ;
897+
898+ // d_cur = (x - denoised) / sigma
899+ for (int j = 0 ; j < ggml_nelements (d_cur); j++) {
900+ vec_d_cur[j] = (vec_x[j] - vec_denoised[j]) / sigma;
901+ }
902+
903+ int order = std::min (max_order, i + 1 );
904+ float h_n = t_next - sigma;
905+ float h_n_1 = (i > 0 ) ? (sigma - sigmas[i - 1 ]) : h_n;
906+
907+ switch (order) {
908+ case 1 : // First Euler step
909+ for (int j = 0 ; j < ggml_nelements (x_next); j++) {
910+ vec_x[j] += vec_d_cur[j] * h_n;
911+ }
912+ break ;
913+
914+ case 2 : {
915+ float * vec_d_prev1 = (float *)buffer_model.back ()->data ;
916+ for (int j = 0 ; j < ggml_nelements (x_next); j++) {
917+ vec_x[j] += h_n * ((2 + (h_n / h_n_1)) * vec_d_cur[j] - (h_n / h_n_1) * vec_d_prev1[j]) / 2 ;
918+ }
919+ break ;
920+ }
921+
922+ case 3 : {
923+ float h_n_2 = (i > 1 ) ? (sigmas[i - 1 ] - sigmas[i - 2 ]) : h_n_1;
924+ float * vec_d_prev1 = (float *)buffer_model.back ()->data ;
925+ float * vec_d_prev2 = (buffer_model.size () > 1 ) ? (float *)buffer_model[buffer_model.size () - 2 ]->data : vec_d_prev1;
926+ for (int j = 0 ; j < ggml_nelements (x_next); j++) {
927+ vec_x[j] += h_n * ((23 * vec_d_cur[j] - 16 * vec_d_prev1[j] + 5 * vec_d_prev2[j]) / 12 );
928+ }
929+ break ;
930+ }
931+
932+ case 4 : {
933+ float h_n_2 = (i > 1 ) ? (sigmas[i - 1 ] - sigmas[i - 2 ]) : h_n_1;
934+ float h_n_3 = (i > 2 ) ? (sigmas[i - 2 ] - sigmas[i - 3 ]) : h_n_2;
935+ float * vec_d_prev1 = (float *)buffer_model.back ()->data ;
936+ float * vec_d_prev2 = (buffer_model.size () > 1 ) ? (float *)buffer_model[buffer_model.size () - 2 ]->data : vec_d_prev1;
937+ float * vec_d_prev3 = (buffer_model.size () > 2 ) ? (float *)buffer_model[buffer_model.size () - 3 ]->data : vec_d_prev2;
938+ for (int j = 0 ; j < ggml_nelements (x_next); j++) {
939+ vec_x[j] += h_n * ((55 * vec_d_cur[j] - 59 * vec_d_prev1[j] + 37 * vec_d_prev2[j] - 9 * vec_d_prev3[j]) / 24 );
940+ }
941+ break ;
942+ }
943+ }
944+
945+ // Manage buffer_model
946+ if (buffer_model.size () == max_order - 1 ) {
947+ buffer_model.erase (buffer_model.begin ());
948+ }
949+ buffer_model.push_back (d_cur);
950+
951+ // Prepare the next d tensor
952+ d_cur = ggml_dup_tensor (work_ctx, x_next);
953+ }
954+ } break ;
803955 case LCM: // Latent Consistency Models
804956 {
805957 struct ggml_tensor * noise = ggml_dup_tensor (work_ctx, x);
0 commit comments