Skip to content

Commit a3db102

Browse files
KAGA-KOKOaxboe
authored andcommitted
skd: Reduce memory usage
Every single coherent DMA memory buffer occupies at least one page. Reduce memory usage by switching from coherent buffers to streaming DMA for I/O requests (struct skd_fitmsg_context) and S/G-lists (struct fit_sg_descriptor[]). Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.de> Cc: Johannes Thumshirn <jthumshirn@suse.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent d4d0f5f commit a3db102

File tree

1 file changed

+108
-37
lines changed

1 file changed

+108
-37
lines changed

drivers/block/skd_main.c

Lines changed: 108 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <linux/aer.h>
3333
#include <linux/wait.h>
3434
#include <linux/stringify.h>
35+
#include <linux/slab_def.h>
3536
#include <scsi/scsi.h>
3637
#include <scsi/sg.h>
3738
#include <linux/io.h>
@@ -256,6 +257,9 @@ struct skd_device {
256257

257258
u8 skcomp_cycle;
258259
u32 skcomp_ix;
260+
struct kmem_cache *msgbuf_cache;
261+
struct kmem_cache *sglist_cache;
262+
struct kmem_cache *databuf_cache;
259263
struct fit_completion_entry_v1 *skcomp_table;
260264
struct fit_comp_error_info *skerr_table;
261265
dma_addr_t cq_dma_address;
@@ -538,6 +542,11 @@ static void skd_process_request(struct request *req, bool last)
538542
return;
539543
}
540544

545+
dma_sync_single_for_device(&skdev->pdev->dev, skreq->sksg_dma_address,
546+
skreq->n_sg *
547+
sizeof(struct fit_sg_descriptor),
548+
DMA_TO_DEVICE);
549+
541550
spin_lock_irqsave(&skdev->lock, flags);
542551
/* Either a FIT msg is in progress or we have to start one. */
543552
skmsg = skdev->skmsg;
@@ -1078,6 +1087,11 @@ static void skd_complete_internal(struct skd_device *skdev,
10781087

10791088
dev_dbg(&skdev->pdev->dev, "complete internal %x\n", scsi->cdb[0]);
10801089

1090+
dma_sync_single_for_cpu(&skdev->pdev->dev,
1091+
skspcl->db_dma_address,
1092+
skspcl->req.sksg_list[0].byte_count,
1093+
DMA_BIDIRECTIONAL);
1094+
10811095
skspcl->req.completion = *skcomp;
10821096
skspcl->req.state = SKD_REQ_STATE_IDLE;
10831097
skspcl->req.id += SKD_ID_INCR;
@@ -1263,6 +1277,9 @@ static void skd_send_fitmsg(struct skd_device *skdev,
12631277
*/
12641278
qcmd |= FIT_QCMD_MSGSIZE_64;
12651279

1280+
dma_sync_single_for_device(&skdev->pdev->dev, skmsg->mb_dma_address,
1281+
skmsg->length, DMA_TO_DEVICE);
1282+
12661283
/* Make sure skd_msg_buf is written before the doorbell is triggered. */
12671284
smp_wmb();
12681285

@@ -1274,6 +1291,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
12741291
{
12751292
u64 qcmd;
12761293

1294+
WARN_ON_ONCE(skspcl->req.n_sg != 1);
1295+
12771296
if (unlikely(skdev->dbg_level > 1)) {
12781297
u8 *bp = (u8 *)skspcl->msg_buf;
12791298
int i;
@@ -1307,6 +1326,17 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
13071326
qcmd = skspcl->mb_dma_address;
13081327
qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128;
13091328

1329+
dma_sync_single_for_device(&skdev->pdev->dev, skspcl->mb_dma_address,
1330+
SKD_N_SPECIAL_FITMSG_BYTES, DMA_TO_DEVICE);
1331+
dma_sync_single_for_device(&skdev->pdev->dev,
1332+
skspcl->req.sksg_dma_address,
1333+
1 * sizeof(struct fit_sg_descriptor),
1334+
DMA_TO_DEVICE);
1335+
dma_sync_single_for_device(&skdev->pdev->dev,
1336+
skspcl->db_dma_address,
1337+
skspcl->req.sksg_list[0].byte_count,
1338+
DMA_BIDIRECTIONAL);
1339+
13101340
/* Make sure skd_msg_buf is written before the doorbell is triggered. */
13111341
smp_wmb();
13121342

@@ -2619,6 +2649,35 @@ static void skd_release_irq(struct skd_device *skdev)
26192649
*****************************************************************************
26202650
*/
26212651

2652+
static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s,
2653+
dma_addr_t *dma_handle, gfp_t gfp,
2654+
enum dma_data_direction dir)
2655+
{
2656+
struct device *dev = &skdev->pdev->dev;
2657+
void *buf;
2658+
2659+
buf = kmem_cache_alloc(s, gfp);
2660+
if (!buf)
2661+
return NULL;
2662+
*dma_handle = dma_map_single(dev, buf, s->size, dir);
2663+
if (dma_mapping_error(dev, *dma_handle)) {
2664+
kfree(buf);
2665+
buf = NULL;
2666+
}
2667+
return buf;
2668+
}
2669+
2670+
static void skd_free_dma(struct skd_device *skdev, struct kmem_cache *s,
2671+
void *vaddr, dma_addr_t dma_handle,
2672+
enum dma_data_direction dir)
2673+
{
2674+
if (!vaddr)
2675+
return;
2676+
2677+
dma_unmap_single(&skdev->pdev->dev, dma_handle, s->size, dir);
2678+
kmem_cache_free(s, vaddr);
2679+
}
2680+
26222681
static int skd_cons_skcomp(struct skd_device *skdev)
26232682
{
26242683
int rc = 0;
@@ -2695,18 +2754,14 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
26952754
dma_addr_t *ret_dma_addr)
26962755
{
26972756
struct fit_sg_descriptor *sg_list;
2698-
u32 nbytes;
26992757

2700-
nbytes = sizeof(*sg_list) * n_sg;
2701-
2702-
sg_list = pci_alloc_consistent(skdev->pdev, nbytes, ret_dma_addr);
2758+
sg_list = skd_alloc_dma(skdev, skdev->sglist_cache, ret_dma_addr,
2759+
GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
27032760

27042761
if (sg_list != NULL) {
27052762
uint64_t dma_address = *ret_dma_addr;
27062763
u32 i;
27072764

2708-
memset(sg_list, 0, nbytes);
2709-
27102765
for (i = 0; i < n_sg - 1; i++) {
27112766
uint64_t ndp_off;
27122767
ndp_off = (i + 1) * sizeof(struct fit_sg_descriptor);
@@ -2720,15 +2775,14 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
27202775
}
27212776

27222777
static void skd_free_sg_list(struct skd_device *skdev,
2723-
struct fit_sg_descriptor *sg_list, u32 n_sg,
2778+
struct fit_sg_descriptor *sg_list,
27242779
dma_addr_t dma_addr)
27252780
{
2726-
u32 nbytes = sizeof(*sg_list) * n_sg;
2727-
27282781
if (WARN_ON_ONCE(!sg_list))
27292782
return;
27302783

2731-
pci_free_consistent(skdev->pdev, nbytes, sg_list, dma_addr);
2784+
skd_free_dma(skdev, skdev->sglist_cache, sg_list, dma_addr,
2785+
DMA_TO_DEVICE);
27322786
}
27332787

27342788
static int skd_init_request(struct blk_mq_tag_set *set, struct request *rq,
@@ -2752,34 +2806,31 @@ static void skd_exit_request(struct blk_mq_tag_set *set, struct request *rq,
27522806
struct skd_device *skdev = set->driver_data;
27532807
struct skd_request_context *skreq = blk_mq_rq_to_pdu(rq);
27542808

2755-
skd_free_sg_list(skdev, skreq->sksg_list,
2756-
skdev->sgs_per_request,
2757-
skreq->sksg_dma_address);
2809+
skd_free_sg_list(skdev, skreq->sksg_list, skreq->sksg_dma_address);
27582810
}
27592811

27602812
static int skd_cons_sksb(struct skd_device *skdev)
27612813
{
27622814
int rc = 0;
27632815
struct skd_special_context *skspcl;
2764-
u32 nbytes;
27652816

27662817
skspcl = &skdev->internal_skspcl;
27672818

27682819
skspcl->req.id = 0 + SKD_ID_INTERNAL;
27692820
skspcl->req.state = SKD_REQ_STATE_IDLE;
27702821

2771-
nbytes = SKD_N_INTERNAL_BYTES;
2772-
2773-
skspcl->data_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
2774-
&skspcl->db_dma_address);
2822+
skspcl->data_buf = skd_alloc_dma(skdev, skdev->databuf_cache,
2823+
&skspcl->db_dma_address,
2824+
GFP_DMA | __GFP_ZERO,
2825+
DMA_BIDIRECTIONAL);
27752826
if (skspcl->data_buf == NULL) {
27762827
rc = -ENOMEM;
27772828
goto err_out;
27782829
}
27792830

2780-
nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
2781-
skspcl->msg_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
2782-
&skspcl->mb_dma_address);
2831+
skspcl->msg_buf = skd_alloc_dma(skdev, skdev->msgbuf_cache,
2832+
&skspcl->mb_dma_address,
2833+
GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
27832834
if (skspcl->msg_buf == NULL) {
27842835
rc = -ENOMEM;
27852836
goto err_out;
@@ -2886,6 +2937,7 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
28862937
{
28872938
struct skd_device *skdev;
28882939
int blk_major = skd_major;
2940+
size_t size;
28892941
int rc;
28902942

28912943
skdev = kzalloc(sizeof(*skdev), GFP_KERNEL);
@@ -2914,6 +2966,31 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
29142966
INIT_WORK(&skdev->start_queue, skd_start_queue);
29152967
INIT_WORK(&skdev->completion_worker, skd_completion_worker);
29162968

2969+
size = max(SKD_N_FITMSG_BYTES, SKD_N_SPECIAL_FITMSG_BYTES);
2970+
skdev->msgbuf_cache = kmem_cache_create("skd-msgbuf", size, 0,
2971+
SLAB_HWCACHE_ALIGN, NULL);
2972+
if (!skdev->msgbuf_cache)
2973+
goto err_out;
2974+
WARN_ONCE(kmem_cache_size(skdev->msgbuf_cache) < size,
2975+
"skd-msgbuf: %d < %zd\n",
2976+
kmem_cache_size(skdev->msgbuf_cache), size);
2977+
size = skd_sgs_per_request * sizeof(struct fit_sg_descriptor);
2978+
skdev->sglist_cache = kmem_cache_create("skd-sglist", size, 0,
2979+
SLAB_HWCACHE_ALIGN, NULL);
2980+
if (!skdev->sglist_cache)
2981+
goto err_out;
2982+
WARN_ONCE(kmem_cache_size(skdev->sglist_cache) < size,
2983+
"skd-sglist: %d < %zd\n",
2984+
kmem_cache_size(skdev->sglist_cache), size);
2985+
size = SKD_N_INTERNAL_BYTES;
2986+
skdev->databuf_cache = kmem_cache_create("skd-databuf", size, 0,
2987+
SLAB_HWCACHE_ALIGN, NULL);
2988+
if (!skdev->databuf_cache)
2989+
goto err_out;
2990+
WARN_ONCE(kmem_cache_size(skdev->databuf_cache) < size,
2991+
"skd-databuf: %d < %zd\n",
2992+
kmem_cache_size(skdev->databuf_cache), size);
2993+
29172994
dev_dbg(&skdev->pdev->dev, "skcomp\n");
29182995
rc = skd_cons_skcomp(skdev);
29192996
if (rc < 0)
@@ -2986,31 +3063,21 @@ static void skd_free_skmsg(struct skd_device *skdev)
29863063

29873064
static void skd_free_sksb(struct skd_device *skdev)
29883065
{
2989-
struct skd_special_context *skspcl;
2990-
u32 nbytes;
2991-
2992-
skspcl = &skdev->internal_skspcl;
2993-
2994-
if (skspcl->data_buf != NULL) {
2995-
nbytes = SKD_N_INTERNAL_BYTES;
3066+
struct skd_special_context *skspcl = &skdev->internal_skspcl;
29963067

2997-
pci_free_consistent(skdev->pdev, nbytes,
2998-
skspcl->data_buf, skspcl->db_dma_address);
2999-
}
3068+
skd_free_dma(skdev, skdev->databuf_cache, skspcl->data_buf,
3069+
skspcl->db_dma_address, DMA_BIDIRECTIONAL);
30003070

30013071
skspcl->data_buf = NULL;
30023072
skspcl->db_dma_address = 0;
30033073

3004-
if (skspcl->msg_buf != NULL) {
3005-
nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
3006-
pci_free_consistent(skdev->pdev, nbytes,
3007-
skspcl->msg_buf, skspcl->mb_dma_address);
3008-
}
3074+
skd_free_dma(skdev, skdev->msgbuf_cache, skspcl->msg_buf,
3075+
skspcl->mb_dma_address, DMA_TO_DEVICE);
30093076

30103077
skspcl->msg_buf = NULL;
30113078
skspcl->mb_dma_address = 0;
30123079

3013-
skd_free_sg_list(skdev, skspcl->req.sksg_list, 1,
3080+
skd_free_sg_list(skdev, skspcl->req.sksg_list,
30143081
skspcl->req.sksg_dma_address);
30153082

30163083
skspcl->req.sksg_list = NULL;
@@ -3056,6 +3123,10 @@ static void skd_destruct(struct skd_device *skdev)
30563123
dev_dbg(&skdev->pdev->dev, "skcomp\n");
30573124
skd_free_skcomp(skdev);
30583125

3126+
kmem_cache_destroy(skdev->databuf_cache);
3127+
kmem_cache_destroy(skdev->sglist_cache);
3128+
kmem_cache_destroy(skdev->msgbuf_cache);
3129+
30593130
dev_dbg(&skdev->pdev->dev, "skdev\n");
30603131
kfree(skdev);
30613132
}

0 commit comments

Comments
 (0)