Skip to content

Commit 6f51092

Browse files
committed
Merge tag 'char-misc-5.1-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc driver fixes from Greg KH: "Here are some binder, habanalabs, and vboxguest driver fixes for 5.1-rc3. The Binder fixes resolve some reported issues found by testing, first by the selinux developers, and then earlier today by syzbot. The habanalabs fixes are all minor, resolving a number of tiny things. The vboxguest patches are a bit larger. They resolve the fact that virtual box decided to change their api in their latest release in a way that broke the existing kernel code, despite saying that they were never going to do that. So this is a bit of a "new feature", but is good to get merged so that 5.1 will work with the latest release. The changes are not large and of course virtual box "swears" they will not break this again, but no one is holding their breath here. All of these have been in linux-next for a while with no reported issues" * tag 'char-misc-5.1-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: virt: vbox: Implement passing requestor info to the host for VirtualBox 6.0.x binder: fix race between munmap() and direct reclaim binder: fix BUG_ON found by selinux-testsuite habanalabs: cast to expected type habanalabs: prevent host crash during suspend/resume habanalabs: perform accounting for active CS habanalabs: fix mapping with page size bigger than 4KB habanalabs: complete user context cleanup before hard reset habanalabs: fix bug when mapping very large memory area habanalabs: fix MMU number of pages calculation
2 parents 3467b90 + 0532a1b commit 6f51092

File tree

18 files changed

+324
-184
lines changed

18 files changed

+324
-184
lines changed

drivers/android/binder.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2057,7 +2057,8 @@ static size_t binder_get_object(struct binder_proc *proc,
20572057
size_t object_size = 0;
20582058

20592059
read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
2060-
if (read_size < sizeof(*hdr) || !IS_ALIGNED(offset, sizeof(u32)))
2060+
if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
2061+
!IS_ALIGNED(offset, sizeof(u32)))
20612062
return 0;
20622063
binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
20632064
offset, read_size);

drivers/android/binder_alloc.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -927,14 +927,13 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
927927

928928
index = page - alloc->pages;
929929
page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE;
930+
931+
mm = alloc->vma_vm_mm;
932+
if (!mmget_not_zero(mm))
933+
goto err_mmget;
934+
if (!down_write_trylock(&mm->mmap_sem))
935+
goto err_down_write_mmap_sem_failed;
930936
vma = binder_alloc_get_vma(alloc);
931-
if (vma) {
932-
if (!mmget_not_zero(alloc->vma_vm_mm))
933-
goto err_mmget;
934-
mm = alloc->vma_vm_mm;
935-
if (!down_read_trylock(&mm->mmap_sem))
936-
goto err_down_write_mmap_sem_failed;
937-
}
938937

939938
list_lru_isolate(lru, item);
940939
spin_unlock(lock);
@@ -945,10 +944,9 @@ enum lru_status binder_alloc_free_page(struct list_head *item,
945944
zap_page_range(vma, page_addr, PAGE_SIZE);
946945

947946
trace_binder_unmap_user_end(alloc, index);
948-
949-
up_read(&mm->mmap_sem);
950-
mmput(mm);
951947
}
948+
up_write(&mm->mmap_sem);
949+
mmput(mm);
952950

953951
trace_binder_unmap_kernel_start(alloc, index);
954952

drivers/misc/habanalabs/command_submission.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,12 @@ static void cs_do_release(struct kref *ref)
179179

180180
/* We also need to update CI for internal queues */
181181
if (cs->submitted) {
182+
int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
183+
184+
WARN_ONCE((cs_cnt < 0),
185+
"hl%d: error in CS active cnt %d\n",
186+
hdev->id, cs_cnt);
187+
182188
hl_int_hw_queue_update_ci(cs);
183189

184190
spin_lock(&hdev->hw_queues_mirror_lock);

drivers/misc/habanalabs/debugfs.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ static int vm_show(struct seq_file *s, void *data)
232232
struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
233233
enum vm_type_t *vm_type;
234234
bool once = true;
235+
u64 j;
235236
int i;
236237

237238
if (!dev_entry->hdev->mmu_enable)
@@ -260,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data)
260261
} else {
261262
phys_pg_pack = hnode->ptr;
262263
seq_printf(s,
263-
" 0x%-14llx %-10u %-4u\n",
264+
" 0x%-14llx %-10llu %-4u\n",
264265
hnode->vaddr, phys_pg_pack->total_size,
265266
phys_pg_pack->handle);
266267
}
@@ -282,9 +283,9 @@ static int vm_show(struct seq_file *s, void *data)
282283
phys_pg_pack->page_size);
283284
seq_puts(s, " physical address\n");
284285
seq_puts(s, "---------------------\n");
285-
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
286+
for (j = 0 ; j < phys_pg_pack->npages ; j++) {
286287
seq_printf(s, " 0x%-14llx\n",
287-
phys_pg_pack->pages[i]);
288+
phys_pg_pack->pages[j]);
288289
}
289290
}
290291
spin_unlock(&vm->idr_lock);

drivers/misc/habanalabs/device.c

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include <linux/sched/signal.h>
1212
#include <linux/hwmon.h>
1313

14+
#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
15+
1416
bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
1517
{
1618
if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
@@ -216,6 +218,7 @@ static int device_early_init(struct hl_device *hdev)
216218
spin_lock_init(&hdev->hw_queues_mirror_lock);
217219
atomic_set(&hdev->in_reset, 0);
218220
atomic_set(&hdev->fd_open_cnt, 0);
221+
atomic_set(&hdev->cs_active_cnt, 0);
219222

220223
return 0;
221224

@@ -413,6 +416,27 @@ int hl_device_suspend(struct hl_device *hdev)
413416

414417
pci_save_state(hdev->pdev);
415418

419+
/* Block future CS/VM/JOB completion operations */
420+
rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
421+
if (rc) {
422+
dev_err(hdev->dev, "Can't suspend while in reset\n");
423+
return -EIO;
424+
}
425+
426+
/* This blocks all other stuff that is not blocked by in_reset */
427+
hdev->disabled = true;
428+
429+
/*
430+
* Flush anyone that is inside the critical section of enqueue
431+
* jobs to the H/W
432+
*/
433+
hdev->asic_funcs->hw_queues_lock(hdev);
434+
hdev->asic_funcs->hw_queues_unlock(hdev);
435+
436+
/* Flush processes that are sending message to CPU */
437+
mutex_lock(&hdev->send_cpu_message_lock);
438+
mutex_unlock(&hdev->send_cpu_message_lock);
439+
416440
rc = hdev->asic_funcs->suspend(hdev);
417441
if (rc)
418442
dev_err(hdev->dev,
@@ -440,31 +464,55 @@ int hl_device_resume(struct hl_device *hdev)
440464

441465
pci_set_power_state(hdev->pdev, PCI_D0);
442466
pci_restore_state(hdev->pdev);
443-
rc = pci_enable_device(hdev->pdev);
467+
rc = pci_enable_device_mem(hdev->pdev);
444468
if (rc) {
445469
dev_err(hdev->dev,
446470
"Failed to enable PCI device in resume\n");
447471
return rc;
448472
}
449473

474+
pci_set_master(hdev->pdev);
475+
450476
rc = hdev->asic_funcs->resume(hdev);
451477
if (rc) {
452-
dev_err(hdev->dev,
453-
"Failed to enable PCI access from device CPU\n");
454-
return rc;
478+
dev_err(hdev->dev, "Failed to resume device after suspend\n");
479+
goto disable_device;
480+
}
481+
482+
483+
hdev->disabled = false;
484+
atomic_set(&hdev->in_reset, 0);
485+
486+
rc = hl_device_reset(hdev, true, false);
487+
if (rc) {
488+
dev_err(hdev->dev, "Failed to reset device during resume\n");
489+
goto disable_device;
455490
}
456491

457492
return 0;
493+
494+
disable_device:
495+
pci_clear_master(hdev->pdev);
496+
pci_disable_device(hdev->pdev);
497+
498+
return rc;
458499
}
459500

460501
static void hl_device_hard_reset_pending(struct work_struct *work)
461502
{
462503
struct hl_device_reset_work *device_reset_work =
463504
container_of(work, struct hl_device_reset_work, reset_work);
464505
struct hl_device *hdev = device_reset_work->hdev;
465-
u16 pending_cnt = HL_PENDING_RESET_PER_SEC;
506+
u16 pending_total, pending_cnt;
466507
struct task_struct *task = NULL;
467508

509+
if (hdev->pldm)
510+
pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
511+
else
512+
pending_total = HL_PENDING_RESET_PER_SEC;
513+
514+
pending_cnt = pending_total;
515+
468516
/* Flush all processes that are inside hl_open */
469517
mutex_lock(&hdev->fd_open_cnt_lock);
470518

@@ -489,6 +537,19 @@ static void hl_device_hard_reset_pending(struct work_struct *work)
489537
}
490538
}
491539

540+
pending_cnt = pending_total;
541+
542+
while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
543+
544+
pending_cnt--;
545+
546+
ssleep(1);
547+
}
548+
549+
if (atomic_read(&hdev->fd_open_cnt))
550+
dev_crit(hdev->dev,
551+
"Going to hard reset with open user contexts\n");
552+
492553
mutex_unlock(&hdev->fd_open_cnt_lock);
493554

494555
hl_device_reset(hdev, true, true);

drivers/misc/habanalabs/goya/goya.c

Lines changed: 2 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,15 +1201,6 @@ static int goya_stop_external_queues(struct hl_device *hdev)
12011201
return retval;
12021202
}
12031203

1204-
static void goya_resume_external_queues(struct hl_device *hdev)
1205-
{
1206-
WREG32(mmDMA_QM_0_GLBL_CFG1, 0);
1207-
WREG32(mmDMA_QM_1_GLBL_CFG1, 0);
1208-
WREG32(mmDMA_QM_2_GLBL_CFG1, 0);
1209-
WREG32(mmDMA_QM_3_GLBL_CFG1, 0);
1210-
WREG32(mmDMA_QM_4_GLBL_CFG1, 0);
1211-
}
1212-
12131204
/*
12141205
* goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
12151206
*
@@ -2178,36 +2169,6 @@ static int goya_stop_internal_queues(struct hl_device *hdev)
21782169
return retval;
21792170
}
21802171

2181-
static void goya_resume_internal_queues(struct hl_device *hdev)
2182-
{
2183-
WREG32(mmMME_QM_GLBL_CFG1, 0);
2184-
WREG32(mmMME_CMDQ_GLBL_CFG1, 0);
2185-
2186-
WREG32(mmTPC0_QM_GLBL_CFG1, 0);
2187-
WREG32(mmTPC0_CMDQ_GLBL_CFG1, 0);
2188-
2189-
WREG32(mmTPC1_QM_GLBL_CFG1, 0);
2190-
WREG32(mmTPC1_CMDQ_GLBL_CFG1, 0);
2191-
2192-
WREG32(mmTPC2_QM_GLBL_CFG1, 0);
2193-
WREG32(mmTPC2_CMDQ_GLBL_CFG1, 0);
2194-
2195-
WREG32(mmTPC3_QM_GLBL_CFG1, 0);
2196-
WREG32(mmTPC3_CMDQ_GLBL_CFG1, 0);
2197-
2198-
WREG32(mmTPC4_QM_GLBL_CFG1, 0);
2199-
WREG32(mmTPC4_CMDQ_GLBL_CFG1, 0);
2200-
2201-
WREG32(mmTPC5_QM_GLBL_CFG1, 0);
2202-
WREG32(mmTPC5_CMDQ_GLBL_CFG1, 0);
2203-
2204-
WREG32(mmTPC6_QM_GLBL_CFG1, 0);
2205-
WREG32(mmTPC6_CMDQ_GLBL_CFG1, 0);
2206-
2207-
WREG32(mmTPC7_QM_GLBL_CFG1, 0);
2208-
WREG32(mmTPC7_CMDQ_GLBL_CFG1, 0);
2209-
}
2210-
22112172
static void goya_dma_stall(struct hl_device *hdev)
22122173
{
22132174
WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
@@ -2905,20 +2866,6 @@ int goya_suspend(struct hl_device *hdev)
29052866
{
29062867
int rc;
29072868

2908-
rc = goya_stop_internal_queues(hdev);
2909-
2910-
if (rc) {
2911-
dev_err(hdev->dev, "failed to stop internal queues\n");
2912-
return rc;
2913-
}
2914-
2915-
rc = goya_stop_external_queues(hdev);
2916-
2917-
if (rc) {
2918-
dev_err(hdev->dev, "failed to stop external queues\n");
2919-
return rc;
2920-
}
2921-
29222869
rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
29232870
if (rc)
29242871
dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
@@ -2928,15 +2875,7 @@ int goya_suspend(struct hl_device *hdev)
29282875

29292876
int goya_resume(struct hl_device *hdev)
29302877
{
2931-
int rc;
2932-
2933-
goya_resume_external_queues(hdev);
2934-
goya_resume_internal_queues(hdev);
2935-
2936-
rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
2937-
if (rc)
2938-
dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2939-
return rc;
2878+
return goya_init_iatu(hdev);
29402879
}
29412880

29422881
static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
@@ -3070,7 +3009,7 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
30703009

30713010
*dma_handle = hdev->asic_prop.sram_base_address;
30723011

3073-
base = hdev->pcie_bar[SRAM_CFG_BAR_ID];
3012+
base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
30743013

30753014
switch (queue_id) {
30763015
case GOYA_QUEUE_ID_MME:

drivers/misc/habanalabs/habanalabs.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -793,11 +793,11 @@ struct hl_vm_hash_node {
793793
* struct hl_vm_phys_pg_pack - physical page pack.
794794
* @vm_type: describes the type of the virtual area descriptor.
795795
* @pages: the physical page array.
796+
* @npages: num physical pages in the pack.
797+
* @total_size: total size of all the pages in this list.
796798
* @mapping_cnt: number of shared mappings.
797799
* @asid: the context related to this list.
798-
* @npages: num physical pages in the pack.
799800
* @page_size: size of each page in the pack.
800-
* @total_size: total size of all the pages in this list.
801801
* @flags: HL_MEM_* flags related to this list.
802802
* @handle: the provided handle related to this list.
803803
* @offset: offset from the first page.
@@ -807,11 +807,11 @@ struct hl_vm_hash_node {
807807
struct hl_vm_phys_pg_pack {
808808
enum vm_type_t vm_type; /* must be first */
809809
u64 *pages;
810+
u64 npages;
811+
u64 total_size;
810812
atomic_t mapping_cnt;
811813
u32 asid;
812-
u32 npages;
813814
u32 page_size;
814-
u32 total_size;
815815
u32 flags;
816816
u32 handle;
817817
u32 offset;
@@ -1056,13 +1056,15 @@ struct hl_device_reset_work {
10561056
* @cb_pool_lock: protects the CB pool.
10571057
* @user_ctx: current user context executing.
10581058
* @dram_used_mem: current DRAM memory consumption.
1059-
* @in_reset: is device in reset flow.
1060-
* @curr_pll_profile: current PLL profile.
1061-
* @fd_open_cnt: number of open user processes.
10621059
* @timeout_jiffies: device CS timeout value.
10631060
* @max_power: the max power of the device, as configured by the sysadmin. This
10641061
* value is saved so in case of hard-reset, KMD will restore this
10651062
* value and update the F/W after the re-initialization
1063+
* @in_reset: is device in reset flow.
1064+
* @curr_pll_profile: current PLL profile.
1065+
* @fd_open_cnt: number of open user processes.
1066+
* @cs_active_cnt: number of active command submissions on this device (active
1067+
* means already in H/W queues)
10661068
* @major: habanalabs KMD major.
10671069
* @high_pll: high PLL profile frequency.
10681070
* @soft_reset_cnt: number of soft reset since KMD loading.
@@ -1128,11 +1130,12 @@ struct hl_device {
11281130
struct hl_ctx *user_ctx;
11291131

11301132
atomic64_t dram_used_mem;
1133+
u64 timeout_jiffies;
1134+
u64 max_power;
11311135
atomic_t in_reset;
11321136
atomic_t curr_pll_profile;
11331137
atomic_t fd_open_cnt;
1134-
u64 timeout_jiffies;
1135-
u64 max_power;
1138+
atomic_t cs_active_cnt;
11361139
u32 major;
11371140
u32 high_pll;
11381141
u32 soft_reset_cnt;

drivers/misc/habanalabs/hw_queue.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,12 +370,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
370370
spin_unlock(&hdev->hw_queues_mirror_lock);
371371
}
372372

373-
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) {
373+
atomic_inc(&hdev->cs_active_cnt);
374+
375+
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
374376
if (job->ext_queue)
375377
ext_hw_queue_schedule_job(job);
376378
else
377379
int_hw_queue_schedule_job(job);
378-
}
379380

380381
cs->submitted = true;
381382

0 commit comments

Comments
 (0)