Skip to content

Commit 188b9f4

Browse files
sbradshawmicronaxboe
authored andcommitted
mtip32xx: Make SGL container per-command to eliminate high order dma allocation
The mtip32xx driver makes a high order dma memory allocation to store a command index table, some dedicated buffers, and a command header & SGL blob. This allocation can fail with a surprise insert under low & fragmented memory conditions. This patch breaks these regions up into separate low order allocations and increases the maximum number of segments a single command SGL can have. We wanted to allow at least 256 segments for 1 MB direct IO. Since the command header occupies the first 0x80 bytes of the SGL blob, that meant we needed two 4k pages to contain the header and SGL. The two pages allow up to 504 SGL segments. Signed-off-by: Sam Bradshaw <sbradshaw@micron.com> Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent e1803a7 commit 188b9f4

File tree

2 files changed

+149
-97
lines changed

2 files changed

+149
-97
lines changed

drivers/block/mtip32xx/mtip32xx.c

Lines changed: 144 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,31 @@
4141
#include "mtip32xx.h"
4242

4343
#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
44-
#define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16))
45-
#define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS)
46-
#define HW_PORT_PRIV_DMA_SZ \
47-
(HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
44+
45+
/* DMA region containing RX Fis, Identify, RLE10, and SMART buffers */
46+
#define AHCI_RX_FIS_SZ 0x100
47+
#define AHCI_RX_FIS_OFFSET 0x0
48+
#define AHCI_IDFY_SZ ATA_SECT_SIZE
49+
#define AHCI_IDFY_OFFSET 0x400
50+
#define AHCI_SECTBUF_SZ ATA_SECT_SIZE
51+
#define AHCI_SECTBUF_OFFSET 0x800
52+
#define AHCI_SMARTBUF_SZ ATA_SECT_SIZE
53+
#define AHCI_SMARTBUF_OFFSET 0xC00
54+
/* 0x100 + 0x200 + 0x200 + 0x200 is smaller than 4k but we pad it out */
55+
#define BLOCK_DMA_ALLOC_SZ 4096
56+
57+
/* DMA region containing command table (should be 8192 bytes) */
58+
#define AHCI_CMD_SLOT_SZ sizeof(struct mtip_cmd_hdr)
59+
#define AHCI_CMD_TBL_SZ (MTIP_MAX_COMMAND_SLOTS * AHCI_CMD_SLOT_SZ)
60+
#define AHCI_CMD_TBL_OFFSET 0x0
61+
62+
/* DMA region per command (contains header and SGL) */
63+
#define AHCI_CMD_TBL_HDR_SZ 0x80
64+
#define AHCI_CMD_TBL_HDR_OFFSET 0x0
65+
#define AHCI_CMD_TBL_SGL_SZ (MTIP_MAX_SG * sizeof(struct mtip_cmd_sg))
66+
#define AHCI_CMD_TBL_SGL_OFFSET AHCI_CMD_TBL_HDR_SZ
67+
#define CMD_DMA_ALLOC_SZ (AHCI_CMD_TBL_SGL_SZ + AHCI_CMD_TBL_HDR_SZ)
68+
4869

4970
#define HOST_CAP_NZDMA (1 << 19)
5071
#define HOST_HSORG 0xFC
@@ -3312,6 +3333,118 @@ static int mtip_service_thread(void *data)
33123333
return 0;
33133334
}
33143335

3336+
/*
3337+
* DMA region teardown
3338+
*
3339+
* @dd Pointer to driver_data structure
3340+
*
3341+
* return value
3342+
* None
3343+
*/
3344+
static void mtip_dma_free(struct driver_data *dd)
3345+
{
3346+
int i;
3347+
struct mtip_port *port = dd->port;
3348+
3349+
if (port->block1)
3350+
dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3351+
port->block1, port->block1_dma);
3352+
3353+
if (port->command_list) {
3354+
dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
3355+
port->command_list, port->command_list_dma);
3356+
}
3357+
3358+
for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
3359+
if (port->commands[i].command)
3360+
dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
3361+
port->commands[i].command,
3362+
port->commands[i].command_dma);
3363+
}
3364+
}
3365+
3366+
/*
3367+
* DMA region setup
3368+
*
3369+
* @dd Pointer to driver_data structure
3370+
*
3371+
* return value
3372+
* -ENOMEM Not enough free DMA region space to initialize driver
3373+
*/
3374+
static int mtip_dma_alloc(struct driver_data *dd)
3375+
{
3376+
struct mtip_port *port = dd->port;
3377+
int i, rv = 0;
3378+
u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
3379+
3380+
/* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
3381+
port->block1 =
3382+
dmam_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3383+
&port->block1_dma, GFP_KERNEL);
3384+
if (!port->block1)
3385+
return -ENOMEM;
3386+
memset(port->block1, 0, BLOCK_DMA_ALLOC_SZ);
3387+
3388+
/* Allocate dma memory for command list */
3389+
port->command_list =
3390+
dmam_alloc_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
3391+
&port->command_list_dma, GFP_KERNEL);
3392+
if (!port->command_list) {
3393+
dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3394+
port->block1, port->block1_dma);
3395+
port->block1 = NULL;
3396+
port->block1_dma = 0;
3397+
return -ENOMEM;
3398+
}
3399+
memset(port->command_list, 0, AHCI_CMD_TBL_SZ);
3400+
3401+
/* Setup all pointers into first DMA region */
3402+
port->rxfis = port->block1 + AHCI_RX_FIS_OFFSET;
3403+
port->rxfis_dma = port->block1_dma + AHCI_RX_FIS_OFFSET;
3404+
port->identify = port->block1 + AHCI_IDFY_OFFSET;
3405+
port->identify_dma = port->block1_dma + AHCI_IDFY_OFFSET;
3406+
port->log_buf = port->block1 + AHCI_SECTBUF_OFFSET;
3407+
port->log_buf_dma = port->block1_dma + AHCI_SECTBUF_OFFSET;
3408+
port->smart_buf = port->block1 + AHCI_SMARTBUF_OFFSET;
3409+
port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
3410+
3411+
/* Setup per command SGL DMA region */
3412+
3413+
/* Point the command headers at the command tables */
3414+
for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
3415+
port->commands[i].command =
3416+
dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
3417+
&port->commands[i].command_dma, GFP_KERNEL);
3418+
if (!port->commands[i].command) {
3419+
rv = -ENOMEM;
3420+
mtip_dma_free(dd);
3421+
return rv;
3422+
}
3423+
memset(port->commands[i].command, 0, CMD_DMA_ALLOC_SZ);
3424+
3425+
port->commands[i].command_header = port->command_list +
3426+
(sizeof(struct mtip_cmd_hdr) * i);
3427+
port->commands[i].command_header_dma =
3428+
dd->port->command_list_dma +
3429+
(sizeof(struct mtip_cmd_hdr) * i);
3430+
3431+
if (host_cap_64)
3432+
port->commands[i].command_header->ctbau =
3433+
__force_bit2int cpu_to_le32(
3434+
(port->commands[i].command_dma >> 16) >> 16);
3435+
3436+
port->commands[i].command_header->ctba =
3437+
__force_bit2int cpu_to_le32(
3438+
port->commands[i].command_dma & 0xFFFFFFFF);
3439+
3440+
sg_init_table(port->commands[i].sg, MTIP_MAX_SG);
3441+
3442+
/* Mark command as currently inactive */
3443+
atomic_set(&dd->port->commands[i].active, 0);
3444+
}
3445+
return 0;
3446+
}
3447+
33153448
/*
33163449
* Called once for each card.
33173450
*
@@ -3370,83 +3503,10 @@ static int mtip_hw_init(struct driver_data *dd)
33703503
dd->port->mmio = dd->mmio + PORT_OFFSET;
33713504
dd->port->dd = dd;
33723505

3373-
/* Allocate memory for the command list. */
3374-
dd->port->command_list =
3375-
dmam_alloc_coherent(&dd->pdev->dev,
3376-
HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3377-
&dd->port->command_list_dma,
3378-
GFP_KERNEL);
3379-
if (!dd->port->command_list) {
3380-
dev_err(&dd->pdev->dev,
3381-
"Memory allocation: command list\n");
3382-
rv = -ENOMEM;
3506+
/* DMA allocations */
3507+
rv = mtip_dma_alloc(dd);
3508+
if (rv < 0)
33833509
goto out1;
3384-
}
3385-
3386-
/* Clear the memory we have allocated. */
3387-
memset(dd->port->command_list,
3388-
0,
3389-
HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
3390-
3391-
/* Setup the addresse of the RX FIS. */
3392-
dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ;
3393-
dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
3394-
3395-
/* Setup the address of the command tables. */
3396-
dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ;
3397-
dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
3398-
3399-
/* Setup the address of the identify data. */
3400-
dd->port->identify = dd->port->command_table +
3401-
HW_CMD_TBL_AR_SZ;
3402-
dd->port->identify_dma = dd->port->command_tbl_dma +
3403-
HW_CMD_TBL_AR_SZ;
3404-
3405-
/* Setup the address of the sector buffer - for some non-ncq cmds */
3406-
dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
3407-
dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
3408-
3409-
/* Setup the address of the log buf - for read log command */
3410-
dd->port->log_buf = (void *)dd->port->sector_buffer + ATA_SECT_SIZE;
3411-
dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
3412-
3413-
/* Setup the address of the smart buf - for smart read data command */
3414-
dd->port->smart_buf = (void *)dd->port->log_buf + ATA_SECT_SIZE;
3415-
dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
3416-
3417-
3418-
/* Point the command headers at the command tables. */
3419-
for (i = 0; i < num_command_slots; i++) {
3420-
dd->port->commands[i].command_header =
3421-
dd->port->command_list +
3422-
(sizeof(struct mtip_cmd_hdr) * i);
3423-
dd->port->commands[i].command_header_dma =
3424-
dd->port->command_list_dma +
3425-
(sizeof(struct mtip_cmd_hdr) * i);
3426-
3427-
dd->port->commands[i].command =
3428-
dd->port->command_table + (HW_CMD_TBL_SZ * i);
3429-
dd->port->commands[i].command_dma =
3430-
dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
3431-
3432-
if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
3433-
dd->port->commands[i].command_header->ctbau =
3434-
__force_bit2int cpu_to_le32(
3435-
(dd->port->commands[i].command_dma >> 16) >> 16);
3436-
dd->port->commands[i].command_header->ctba =
3437-
__force_bit2int cpu_to_le32(
3438-
dd->port->commands[i].command_dma & 0xFFFFFFFF);
3439-
3440-
/*
3441-
* If this is not done, a bug is reported by the stock
3442-
* FC11 i386. Due to the fact that it has lots of kernel
3443-
* debugging enabled.
3444-
*/
3445-
sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
3446-
3447-
/* Mark all commands as currently inactive.*/
3448-
atomic_set(&dd->port->commands[i].active, 0);
3449-
}
34503510

34513511
/* Setup the pointers to the extended s_active and CI registers. */
34523512
for (i = 0; i < dd->slot_groups; i++) {
@@ -3594,12 +3654,8 @@ static int mtip_hw_init(struct driver_data *dd)
35943654

35953655
out2:
35963656
mtip_deinit_port(dd->port);
3657+
mtip_dma_free(dd);
35973658

3598-
/* Free the command/command header memory. */
3599-
dmam_free_coherent(&dd->pdev->dev,
3600-
HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3601-
dd->port->command_list,
3602-
dd->port->command_list_dma);
36033659
out1:
36043660
/* Free the memory allocated for the for structure. */
36053661
kfree(dd->port);
@@ -3641,11 +3697,9 @@ static int mtip_hw_exit(struct driver_data *dd)
36413697
irq_set_affinity_hint(dd->pdev->irq, NULL);
36423698
devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
36433699

3644-
/* Free the command/command header memory. */
3645-
dmam_free_coherent(&dd->pdev->dev,
3646-
HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3647-
dd->port->command_list,
3648-
dd->port->command_list_dma);
3700+
/* Free dma regions */
3701+
mtip_dma_free(dd);
3702+
36493703
/* Free the memory allocated for the for structure. */
36503704
kfree(dd->port);
36513705
dd->port = NULL;

drivers/block/mtip32xx/mtip32xx.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969
* Maximum number of scatter gather entries
7070
* a single command may have.
7171
*/
72-
#define MTIP_MAX_SG 128
72+
#define MTIP_MAX_SG 504
7373

7474
/*
7575
* Maximum number of slot groups (Command Issue & s_active registers)
@@ -391,15 +391,13 @@ struct mtip_port {
391391
*/
392392
dma_addr_t rxfis_dma;
393393
/*
394-
* Pointer to the beginning of the command table memory as used
395-
* by the driver.
394+
* Pointer to the DMA region for RX Fis, Identify, RLE10, and SMART
396395
*/
397-
void *command_table;
396+
void *block1;
398397
/*
399-
* Pointer to the beginning of the command table memory as used
400-
* by the DMA.
398+
* DMA address of region for RX Fis, Identify, RLE10, and SMART
401399
*/
402-
dma_addr_t command_tbl_dma;
400+
dma_addr_t block1_dma;
403401
/*
404402
* Pointer to the beginning of the identify data memory as used
405403
* by the driver.

0 commit comments

Comments
 (0)