Skip to content

Commit 49497b0

Browse files
author
CKI KWF Bot
committed
Merge: RHEL 10.2: DRM Main Backport (v6.16)
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/1436 The DRM backport's goal is to backport all the changes in the DRM subsystem to the kernel target version, with the biggest value being that we get the upstream hardware enablement (and bug fixes) into RHEL. The first step of the DRM backport is identifying dependencies of the DRM backport outside of `drivers/gpu`. Sent as a draft in !1448 for kernel 6.16. This MR is the second step: Backporting the changes in the DRM subsystem. Depends: !1448 Depends: !1425 Depends: !1357 Fixes part of 6.17 MR and Omitted in this MR: Omitted-fix: 942ac8d ("`drm/xe/configfs: Fix pci_dev reference leak`") Omitted-fix: b67e742 ("`drm/xe: Fix build with CONFIG_MODULES=n`") Omitted-fix: fef8b64 ("`drm/xe/pf: Drop rounddown_pow_of_two fair LMEM limitation`") Omitted-fix: f9e4d8b ("`drm/xe/hwmon: Fix kernel version documentation for fan speed`") Omitted-fix: 58364f0 ("`drm/amdgpu: Fix vcn v4.0.3 poison irq call trace on sriov guest`") Omitted-fix: 3271faf ("`drm/hisilicon/hibmc: fix dp and vga cannot show together`") Omitted-fix: 9c28830 ("`drm/amd/display: fix initial backlight brightness calculation`") Omitted-fix: ff89a4d ("`drm/xe/sysfs: Add cleanup action in xe_device_sysfs_init`") Omitted-fix: 500dad4 ("`drm/xe/vf: Don't expose sysfs attributes not applicable for VFs`") Omitted-fix: d115a63 ("`drm/amdgpu/vcn4: add additional ring reset error checking`") Omitted-fix: 9a9e87d ("`drm/amdgpu/sdma: handle paging queues in amdgpu_sdma_reset_engine()`") Omitted-fix: 5fb9042 ("`drm/amdgpu: fix slab-use-after-free in amdgpu_userq_mgr_fini+0x70c`") Omitted-fix: a73345b ("`"Revert "drm/amdgpu: fix slab-use-after-free in amdgpu_userq_mgr_fini"`") Omitted-fix: a886d26 ("`drm/amdgpu: fix use-after-free in amdgpu_userq_suspend+0x51a/0x5a0`") Omitted-fix: 58e6fc2 ("`drm/amdkfd: Fix kfd process ref leaking when userptr unmapping`") Omitted-fix: 989fe67 ("`drm/nouveau/gsp: fix mismatched alloc/free for kvmalloc()`") Omitted-fix: 7a5b69d ("`drm/amdgpu/vcn5: add additional ring reset error checking`") Omitted-fix: 8e6a18c ("`drm/amd/display: Revert "drm/amd/display: Fix AMDGPU_MAX_BL_LEVEL value"`") Omitted-fix: 7551865 ("`drm/xe/vm: Don't pin the vm_resv during validation`") Omitted-fix: 2cb66ae ("`nouveau: Membar before between semaphore writes and the interrupt`") Omitted-fix: a3f7d26 ("`drm/sitronix: Remove broken backwards-compatibility layer`") Omitted-fix: fe69a39 ("`drm/panthor: Fix UAF in panthor_gem_create_with_handle() debugfs code`") Omitted-fix: 9d7a1cb ("`drm/xe/migrate: prevent infinite recursion`") Omitted-fix: 4126cb3 ("`drm/xe/migrate: don't overflow max copy size`") Omitted-fix: 145832f ("`drm/xe/migrate: prevent potential UAF`") Omitted-fix: 1b556bc ("`drm/amdgpu/vcn4.0.5: add additional ring reset error checking`") Omitted-fix: 7934fdc ("`drm/xe/configfs: Don't touch survivability_mode on fini`") Omitted-fix: fd56b9c ("`drm/i915/fbc: fix the implementation of wa_18038517565`") Omitted-fix: 658a1c8 ("`drm/xe: Assign ioctl xe file handler to vm in xe_vm_create`") Omitted-fix: 9337166 ("`drm/xe: Assign ioctl xe file handler to vm in xe_vm_create`") Omitted-fix: 8a30114 ("`drm/xe: Move ASID allocation and user PT BO tracking into xe_vm_create`") Omitted-fix: db2e7bc ("`drm: nova-drm: fix 32-bit arm build`") Omitted-fix: d848203 ("`drm/xe: Allow the pm notifier to continue on failure`") Omitted-fix: eb5723a ("`drm/xe: Block exec and rebind worker while evicting for suspend / hibernate`") Omitted-fix: 77c8ede ("`drm/xe: Don't copy pinned kernel bos twice on suspend`") Omitted-fix: 8e6a18c ("`drm/amd/display: Revert "drm/amd/display: Fix AMDGPU_MAX_BL_LEVEL value"`") Omitted-fix: ee38ea0 ("`drm/amdgpu: update firmware version checks for user queue support`") Fixes brought in by 6.18 and hence Omitted in this MR: Omitted-fix: d9b2623 ("`drm/xe: Fix build with CONFIG_MODULES=n`") Omitted-fix: d3d73bd ("`drm/amdgpu: Fix jpeg v4.0.3 poison irq call trace on sriov guest`") Omitted-fix: b389df0 ("`drm/st7571-i2c: Fix IS_ERR() vs NULL checks in probe()`") Omitted-fix: bf4e4b9 ("`drm/amd/display: Add NULL pointer checks in dc_stream cursor attribute functions`") Omitted-fix: 2343750 ("`drm/panic: Fix 24bit pixel crossing page boundaries`") Omitted-fix: 77a62e5 ("`drm/sched/tests: Remove redundant header files`") Omitted-fix: 38b34e9 ("`drm/xe/migrate: prevent infinite recursion`") Omitted-fix: 9b7ca35 ("`drm/xe/migrate: prevent potential UAF`") Omitted-fix: 6a91af2 ("`drm/xe/migrate: don't misalign current bytes`") Omitted-fix: 9af8f2b ("`drm/panic: Add a u64 divide by 10 for arm32`") Omitted-fix: 8bed4ec ("`drm/hisilicon/hibmc: fix irq_request()'s irq name variable is local`") Omitted-fix: 04864af ("`drm/bridge: cdns-dsi: Fix the _atomic_check()`") Omitted-fix: 9337166 ("`drm/xe: Assign ioctl xe file handler to vm in xe_vm_create`") Omitted-fix: 5993345 ("`drm/xe: Block exec and rebind worker while evicting for suspend / hibernate`") Omitted-fix: 225bc03 ("`drm/xe/evict: drop bogus assert`") Omitted-fix: 382bd6a ("`drm/amd/display: Don't program BLNDGAM_MEM_PWR_FORCE when CM low-power is disabled on CN30`") Omitted fixes: Omitted-fix: a600794 ("`accel/amdxdna: s/drm_gem_v[un]map_unlocked/drm_gem_v[un]map/`") Omitted-fix: d13e959 ("`drm/amdgpu/userq: move waiting for last fence before umap`") Omitted-fix: 36b0bc1 ("`drm/amdgpu/userq: unmap queues amdgpu_userq_mgr_fini()`") Omitted-fix: 44b6535 ("`drm/virtio: Fix NULL pointer deref in virtgpu_dma_buf_free_obj()`") Omitted-fix: 553ab30 ("`Documentation: nouveau: Update GSP message queue kernel-doc reference`") Omitted-fix: ef38b4e ("`drm/amdgpu: drop unused structures in amdgpu_drm.h`") Omitted-fix: 1cf52a0 ("`drm: define NVIDIA DRM format modifiers for GB20x`") Omitted-fix: 664ce10 ("`drm/nouveau: Advertise correct modifiers on GB20x`") Omitted-fix: eef295a ("`drm/vmwgfx: Restore Guest-Backed only cursor plane support`") Omitted-fix: 994dec1 ("`drm/i915/psr: fix pipe to vblank conversion`") ``` JIRA: https://issues.redhat.com/browse/RHEL-114533 Signed-off-by: Anusha Srivatsa <asrivats@redhat.com> # Merge Request Required Information ## Summary of Changes ## Approved Development Ticket(s) All submissions to CentOS Stream must reference a ticket in [Red Hat Jira](https://issues.redhat.com/). <details><summary>Click for formatting instructions</summary> Please follow the CentOS Stream [contribution documentation](https://docs.centos.org/en-US/stream-contrib/quickstart/) for how to file this ticket and have it approved. List tickets each on their own line of this description using the format "Resolves: RHEL-76229", "Related: RHEL-76229" or "Reverts: RHEL-76229", as appropriate. </details> Approved-by: José Expósito <jexposit@redhat.com> Approved-by: Jarod Wilson <jarod@redhat.com> Approved-by: David Airlie <airlied@redhat.com> Approved-by: Brian Masney <bmasney@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents 7502303 + 69ad27e commit 49497b0

File tree

1,489 files changed

+61932
-33214
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,489 files changed

+61932
-33214
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ NAME = Baby Opossum Posse
88
# DRM backport version
99
#
1010
RHEL_DRM_VERSION = 6
11-
RHEL_DRM_PATCHLEVEL = 15
12-
RHEL_DRM_SUBLEVEL = 5
11+
RHEL_DRM_PATCHLEVEL = 16
12+
RHEL_DRM_SUBLEVEL =
1313

1414
# *DOCUMENTATION*
1515
# To see a list of typical targets execute "make help"

drivers/accel/ivpu/ivpu_drv.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,9 @@ int ivpu_boot(struct ivpu_device *vdev)
374374
{
375375
int ret;
376376

377+
drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter));
378+
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
379+
377380
/* Update boot params located at first 4KB of FW memory */
378381
ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
379382

@@ -573,6 +576,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
573576
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
574577
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
575578
atomic64_set(&vdev->unique_id_counter, 0);
579+
atomic_set(&vdev->job_timeout_counter, 0);
576580
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
577581
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
578582
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);

drivers/accel/ivpu/ivpu_drv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ struct ivpu_device {
154154
struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
155155
struct xarray submitted_jobs_xa;
156156
struct ivpu_ipc_consumer job_done_consumer;
157+
atomic_t job_timeout_counter;
157158

158159
atomic64_t unique_id_counter;
159160

drivers/accel/ivpu/ivpu_fw.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ struct ivpu_fw_info {
3939
u64 read_only_addr;
4040
u32 read_only_size;
4141
u32 sched_mode;
42+
u64 last_heartbeat;
4243
};
4344

4445
int ivpu_fw_init(struct ivpu_device *vdev);

drivers/accel/ivpu/ivpu_gem.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con
3030
"%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
3131
action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx_id,
3232
(bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
33-
(bool)bo->base.base.import_attach);
33+
(bool)drm_gem_is_imported(&bo->base.base));
3434
}
3535

3636
static inline int ivpu_bo_lock(struct ivpu_bo *bo)
@@ -130,7 +130,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
130130
bo->ctx = NULL;
131131
}
132132

133-
if (bo->base.base.import_attach)
133+
if (drm_gem_is_imported(&bo->base.base))
134134
return;
135135

136136
if (bo->base.sgt) {
@@ -294,7 +294,7 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj)
294294
drm_WARN_ON(&vdev->drm, bo->mmu_mapped);
295295
drm_WARN_ON(&vdev->drm, bo->ctx);
296296

297-
drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
297+
drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1);
298298
drm_gem_shmem_free(&bo->base);
299299
}
300300

@@ -377,7 +377,7 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
377377

378378
if (flags & DRM_IVPU_BO_MAPPABLE) {
379379
ivpu_bo_lock(bo);
380-
ret = drm_gem_shmem_vmap(&bo->base, &map);
380+
ret = drm_gem_shmem_vmap_locked(&bo->base, &map);
381381
ivpu_bo_unlock(bo);
382382

383383
if (ret)
@@ -402,7 +402,7 @@ void ivpu_bo_free(struct ivpu_bo *bo)
402402

403403
if (bo->flags & DRM_IVPU_BO_MAPPABLE) {
404404
ivpu_bo_lock(bo);
405-
drm_gem_shmem_vunmap(&bo->base, &map);
405+
drm_gem_shmem_vunmap_locked(&bo->base, &map);
406406
ivpu_bo_unlock(bo);
407407
}
408408

@@ -476,7 +476,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
476476
if (bo->mmu_mapped)
477477
drm_printf(p, " mmu_mapped");
478478

479-
if (bo->base.base.import_attach)
479+
if (drm_gem_is_imported(&bo->base.base))
480480
drm_printf(p, " imported");
481481

482482
drm_printf(p, "\n");

drivers/accel/ivpu/ivpu_pm.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
3434
MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
3535

3636
#define PM_RESCHEDULE_LIMIT 5
37+
#define PM_TDR_HEARTBEAT_LIMIT 30
3738

3839
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
3940
{
@@ -44,6 +45,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
4445
ivpu_fw_log_reset(vdev);
4546
ivpu_fw_load(vdev);
4647
fw->entry_point = fw->cold_boot_entry_point;
48+
fw->last_heartbeat = 0;
4749
}
4850

4951
static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
@@ -189,7 +191,24 @@ static void ivpu_job_timeout_work(struct work_struct *work)
189191
{
190192
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
191193
struct ivpu_device *vdev = pm->vdev;
194+
u64 heartbeat;
192195

196+
if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) {
197+
ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n");
198+
goto recovery;
199+
}
200+
201+
if (atomic_fetch_inc(&vdev->job_timeout_counter) > PM_TDR_HEARTBEAT_LIMIT) {
202+
ivpu_err(vdev, "Job timeout detected, heartbeat limit exceeded\n");
203+
goto recovery;
204+
}
205+
206+
vdev->fw->last_heartbeat = heartbeat;
207+
ivpu_start_job_timeout_detection(vdev);
208+
return;
209+
210+
recovery:
211+
atomic_set(&vdev->job_timeout_counter, 0);
193212
ivpu_pm_trigger_recovery(vdev, "TDR");
194213
}
195214

@@ -204,6 +223,7 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
204223
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
205224
{
206225
cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
226+
atomic_set(&vdev->job_timeout_counter, 0);
207227
}
208228

209229
int ivpu_pm_suspend_cb(struct device *dev)

0 commit comments

Comments
 (0)