34
34
#include <linux/init.h>
35
35
#include <linux/errno.h>
36
36
#include <linux/mm.h>
37
+ #include <linux/scatterlist.h>
37
38
38
39
#include <linux/mlx4/cmd.h>
39
40
@@ -50,36 +51,87 @@ enum {
50
51
MLX4_TABLE_CHUNK_SIZE = 1 << 18
51
52
};
52
53
53
- void mlx4_free_icm (struct mlx4_dev * dev , struct mlx4_icm * icm )
54
+ static void mlx4_free_icm_pages (struct mlx4_dev * dev , struct mlx4_icm_chunk * chunk )
54
55
{
55
- struct mlx4_icm_chunk * chunk , * tmp ;
56
56
int i ;
57
57
58
- list_for_each_entry_safe (chunk , tmp , & icm -> chunk_list , list ) {
59
- if (chunk -> nsg > 0 )
60
- pci_unmap_sg (dev -> pdev , chunk -> mem , chunk -> npages ,
61
- PCI_DMA_BIDIRECTIONAL );
58
+ if (chunk -> nsg > 0 )
59
+ pci_unmap_sg (dev -> pdev , chunk -> mem , chunk -> npages ,
60
+ PCI_DMA_BIDIRECTIONAL );
61
+
62
+ for (i = 0 ; i < chunk -> npages ; ++ i )
63
+ __free_pages (chunk -> mem [i ].page ,
64
+ get_order (chunk -> mem [i ].length ));
65
+ }
62
66
63
- for (i = 0 ; i < chunk -> npages ; ++ i )
64
- __free_pages (chunk -> mem [i ].page ,
65
- get_order (chunk -> mem [i ].length ));
67
+ static void mlx4_free_icm_coherent (struct mlx4_dev * dev , struct mlx4_icm_chunk * chunk )
68
+ {
69
+ int i ;
70
+
71
+ for (i = 0 ; i < chunk -> npages ; ++ i )
72
+ dma_free_coherent (& dev -> pdev -> dev , chunk -> mem [i ].length ,
73
+ lowmem_page_address (chunk -> mem [i ].page ),
74
+ sg_dma_address (& chunk -> mem [i ]));
75
+ }
76
+
77
+ void mlx4_free_icm (struct mlx4_dev * dev , struct mlx4_icm * icm , int coherent )
78
+ {
79
+ struct mlx4_icm_chunk * chunk , * tmp ;
80
+
81
+ if (!icm )
82
+ return ;
83
+
84
+ list_for_each_entry_safe (chunk , tmp , & icm -> chunk_list , list ) {
85
+ if (coherent )
86
+ mlx4_free_icm_coherent (dev , chunk );
87
+ else
88
+ mlx4_free_icm_pages (dev , chunk );
66
89
67
90
kfree (chunk );
68
91
}
69
92
70
93
kfree (icm );
71
94
}
72
95
96
+ static int mlx4_alloc_icm_pages (struct scatterlist * mem , int order , gfp_t gfp_mask )
97
+ {
98
+ mem -> page = alloc_pages (gfp_mask , order );
99
+ if (!mem -> page )
100
+ return - ENOMEM ;
101
+
102
+ mem -> length = PAGE_SIZE << order ;
103
+ mem -> offset = 0 ;
104
+ return 0 ;
105
+ }
106
+
107
+ static int mlx4_alloc_icm_coherent (struct device * dev , struct scatterlist * mem ,
108
+ int order , gfp_t gfp_mask )
109
+ {
110
+ void * buf = dma_alloc_coherent (dev , PAGE_SIZE << order ,
111
+ & sg_dma_address (mem ), gfp_mask );
112
+ if (!buf )
113
+ return - ENOMEM ;
114
+
115
+ sg_set_buf (mem , buf , PAGE_SIZE << order );
116
+ BUG_ON (mem -> offset );
117
+ sg_dma_len (mem ) = PAGE_SIZE << order ;
118
+ return 0 ;
119
+ }
120
+
73
121
struct mlx4_icm * mlx4_alloc_icm (struct mlx4_dev * dev , int npages ,
74
- gfp_t gfp_mask )
122
+ gfp_t gfp_mask , int coherent )
75
123
{
76
124
struct mlx4_icm * icm ;
77
125
struct mlx4_icm_chunk * chunk = NULL ;
78
126
int cur_order ;
127
+ int ret ;
128
+
129
+ /* We use sg_set_buf for coherent allocs, which assumes low memory */
130
+ BUG_ON (coherent && (gfp_mask & __GFP_HIGHMEM ));
79
131
80
132
icm = kmalloc (sizeof * icm , gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN ));
81
133
if (!icm )
82
- return icm ;
134
+ return NULL ;
83
135
84
136
icm -> refcount = 0 ;
85
137
INIT_LIST_HEAD (& icm -> chunk_list );
@@ -101,12 +153,20 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
101
153
while (1 << cur_order > npages )
102
154
-- cur_order ;
103
155
104
- chunk -> mem [chunk -> npages ].page = alloc_pages (gfp_mask , cur_order );
105
- if (chunk -> mem [chunk -> npages ].page ) {
106
- chunk -> mem [chunk -> npages ].length = PAGE_SIZE << cur_order ;
107
- chunk -> mem [chunk -> npages ].offset = 0 ;
156
+ if (coherent )
157
+ ret = mlx4_alloc_icm_coherent (& dev -> pdev -> dev ,
158
+ & chunk -> mem [chunk -> npages ],
159
+ cur_order , gfp_mask );
160
+ else
161
+ ret = mlx4_alloc_icm_pages (& chunk -> mem [chunk -> npages ],
162
+ cur_order , gfp_mask );
163
+
164
+ if (!ret ) {
165
+ ++ chunk -> npages ;
108
166
109
- if (++ chunk -> npages == MLX4_ICM_CHUNK_LEN ) {
167
+ if (coherent )
168
+ ++ chunk -> nsg ;
169
+ else if (chunk -> npages == MLX4_ICM_CHUNK_LEN ) {
110
170
chunk -> nsg = pci_map_sg (dev -> pdev , chunk -> mem ,
111
171
chunk -> npages ,
112
172
PCI_DMA_BIDIRECTIONAL );
@@ -125,7 +185,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
125
185
}
126
186
}
127
187
128
- if (chunk ) {
188
+ if (! coherent && chunk ) {
129
189
chunk -> nsg = pci_map_sg (dev -> pdev , chunk -> mem ,
130
190
chunk -> npages ,
131
191
PCI_DMA_BIDIRECTIONAL );
@@ -137,7 +197,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
137
197
return icm ;
138
198
139
199
fail :
140
- mlx4_free_icm (dev , icm );
200
+ mlx4_free_icm (dev , icm , coherent );
141
201
return NULL ;
142
202
}
143
203
@@ -202,15 +262,15 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
202
262
203
263
table -> icm [i ] = mlx4_alloc_icm (dev , MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT ,
204
264
(table -> lowmem ? GFP_KERNEL : GFP_HIGHUSER ) |
205
- __GFP_NOWARN );
265
+ __GFP_NOWARN , table -> coherent );
206
266
if (!table -> icm [i ]) {
207
267
ret = - ENOMEM ;
208
268
goto out ;
209
269
}
210
270
211
271
if (mlx4_MAP_ICM (dev , table -> icm [i ], table -> virt +
212
272
(u64 ) i * MLX4_TABLE_CHUNK_SIZE )) {
213
- mlx4_free_icm (dev , table -> icm [i ]);
273
+ mlx4_free_icm (dev , table -> icm [i ], table -> coherent );
214
274
table -> icm [i ] = NULL ;
215
275
ret = - ENOMEM ;
216
276
goto out ;
@@ -234,7 +294,7 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
234
294
if (-- table -> icm [i ]-> refcount == 0 ) {
235
295
mlx4_UNMAP_ICM (dev , table -> virt + i * MLX4_TABLE_CHUNK_SIZE ,
236
296
MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE );
237
- mlx4_free_icm (dev , table -> icm [i ]);
297
+ mlx4_free_icm (dev , table -> icm [i ], table -> coherent );
238
298
table -> icm [i ] = NULL ;
239
299
}
240
300
@@ -309,7 +369,7 @@ void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
309
369
310
370
int mlx4_init_icm_table (struct mlx4_dev * dev , struct mlx4_icm_table * table ,
311
371
u64 virt , int obj_size , int nobj , int reserved ,
312
- int use_lowmem )
372
+ int use_lowmem , int use_coherent )
313
373
{
314
374
int obj_per_chunk ;
315
375
int num_icm ;
@@ -327,6 +387,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
327
387
table -> num_obj = nobj ;
328
388
table -> obj_size = obj_size ;
329
389
table -> lowmem = use_lowmem ;
390
+ table -> coherent = use_coherent ;
330
391
mutex_init (& table -> mutex );
331
392
332
393
for (i = 0 ; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size ; ++ i ) {
@@ -336,11 +397,11 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
336
397
337
398
table -> icm [i ] = mlx4_alloc_icm (dev , chunk_size >> PAGE_SHIFT ,
338
399
(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER ) |
339
- __GFP_NOWARN );
400
+ __GFP_NOWARN , use_coherent );
340
401
if (!table -> icm [i ])
341
402
goto err ;
342
403
if (mlx4_MAP_ICM (dev , table -> icm [i ], virt + i * MLX4_TABLE_CHUNK_SIZE )) {
343
- mlx4_free_icm (dev , table -> icm [i ]);
404
+ mlx4_free_icm (dev , table -> icm [i ], use_coherent );
344
405
table -> icm [i ] = NULL ;
345
406
goto err ;
346
407
}
@@ -359,7 +420,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
359
420
if (table -> icm [i ]) {
360
421
mlx4_UNMAP_ICM (dev , virt + i * MLX4_TABLE_CHUNK_SIZE ,
361
422
MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE );
362
- mlx4_free_icm (dev , table -> icm [i ]);
423
+ mlx4_free_icm (dev , table -> icm [i ], use_coherent );
363
424
}
364
425
365
426
return - ENOMEM ;
@@ -373,7 +434,7 @@ void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
373
434
if (table -> icm [i ]) {
374
435
mlx4_UNMAP_ICM (dev , table -> virt + i * MLX4_TABLE_CHUNK_SIZE ,
375
436
MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE );
376
- mlx4_free_icm (dev , table -> icm [i ]);
437
+ mlx4_free_icm (dev , table -> icm [i ], table -> coherent );
377
438
}
378
439
379
440
kfree (table -> icm );
0 commit comments