1 /*
2  * Intel I/OAT DMA Linux driver
3  * Copyright(c) 2004 - 2009 Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  */
22 
23 /*
24  * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
25  * does asynchronous data movement and checksumming operations.
26  */
27 
28 #include <linux/init.h>
29 #include <linux/module.h>
30 #include <linux/slab.h>
31 #include <linux/pci.h>
32 #include <linux/interrupt.h>
33 #include <linux/dmaengine.h>
34 #include <linux/delay.h>
35 #include <linux/dma-mapping.h>
36 #include <linux/workqueue.h>
37 #include <linux/prefetch.h>
38 #include <linux/i7300_idle.h>
39 #include "dma.h"
40 #include "dma_v2.h"
41 #include "registers.h"
42 #include "hw.h"
43 
44 int ioat_ring_alloc_order = 8;
45 module_param(ioat_ring_alloc_order, int, 0644);
46 MODULE_PARM_DESC(ioat_ring_alloc_order,
47 		 "ioat2+: allocate 2^n descriptors per channel"
48 		 " (default: 8 max: 16)");
49 static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
50 module_param(ioat_ring_max_alloc_order, int, 0644);
51 MODULE_PARM_DESC(ioat_ring_max_alloc_order,
52 		 "ioat2+: upper limit for ring size (default: 16)");
53 
__ioat2_issue_pending(struct ioat2_dma_chan * ioat)54 void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
55 {
56 	struct ioat_chan_common *chan = &ioat->base;
57 
58 	ioat->dmacount += ioat2_ring_pending(ioat);
59 	ioat->issued = ioat->head;
60 	writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
61 	dev_dbg(to_dev(chan),
62 		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
63 		__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
64 }
65 
ioat2_issue_pending(struct dma_chan * c)66 void ioat2_issue_pending(struct dma_chan *c)
67 {
68 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
69 
70 	if (ioat2_ring_pending(ioat)) {
71 		spin_lock_bh(&ioat->prep_lock);
72 		__ioat2_issue_pending(ioat);
73 		spin_unlock_bh(&ioat->prep_lock);
74 	}
75 }
76 
77 /**
78  * ioat2_update_pending - log pending descriptors
79  * @ioat: ioat2+ channel
80  *
81  * Check if the number of unsubmitted descriptors has exceeded the
82  * watermark.  Called with prep_lock held
83  */
ioat2_update_pending(struct ioat2_dma_chan * ioat)84 static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
85 {
86 	if (ioat2_ring_pending(ioat) > ioat_pending_level)
87 		__ioat2_issue_pending(ioat);
88 }
89 
__ioat2_start_null_desc(struct ioat2_dma_chan * ioat)90 static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
91 {
92 	struct ioat_ring_ent *desc;
93 	struct ioat_dma_descriptor *hw;
94 
95 	if (ioat2_ring_space(ioat) < 1) {
96 		dev_err(to_dev(&ioat->base),
97 			"Unable to start null desc - ring full\n");
98 		return;
99 	}
100 
101 	dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
102 		__func__, ioat->head, ioat->tail, ioat->issued);
103 	desc = ioat2_get_ring_ent(ioat, ioat->head);
104 
105 	hw = desc->hw;
106 	hw->ctl = 0;
107 	hw->ctl_f.null = 1;
108 	hw->ctl_f.int_en = 1;
109 	hw->ctl_f.compl_write = 1;
110 	/* set size to non-zero value (channel returns error when size is 0) */
111 	hw->size = NULL_DESC_BUFFER_SIZE;
112 	hw->src_addr = 0;
113 	hw->dst_addr = 0;
114 	async_tx_ack(&desc->txd);
115 	ioat2_set_chainaddr(ioat, desc->txd.phys);
116 	dump_desc_dbg(ioat, desc);
117 	wmb();
118 	ioat->head += 1;
119 	__ioat2_issue_pending(ioat);
120 }
121 
ioat2_start_null_desc(struct ioat2_dma_chan * ioat)122 static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
123 {
124 	spin_lock_bh(&ioat->prep_lock);
125 	__ioat2_start_null_desc(ioat);
126 	spin_unlock_bh(&ioat->prep_lock);
127 }
128 
__cleanup(struct ioat2_dma_chan * ioat,unsigned long phys_complete)129 static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
130 {
131 	struct ioat_chan_common *chan = &ioat->base;
132 	struct dma_async_tx_descriptor *tx;
133 	struct ioat_ring_ent *desc;
134 	bool seen_current = false;
135 	u16 active;
136 	int idx = ioat->tail, i;
137 
138 	dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
139 		__func__, ioat->head, ioat->tail, ioat->issued);
140 
141 	active = ioat2_ring_active(ioat);
142 	for (i = 0; i < active && !seen_current; i++) {
143 		smp_read_barrier_depends();
144 		prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
145 		desc = ioat2_get_ring_ent(ioat, idx + i);
146 		tx = &desc->txd;
147 		dump_desc_dbg(ioat, desc);
148 		if (tx->cookie) {
149 			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
150 			chan->completed_cookie = tx->cookie;
151 			tx->cookie = 0;
152 			if (tx->callback) {
153 				tx->callback(tx->callback_param);
154 				tx->callback = NULL;
155 			}
156 		}
157 
158 		if (tx->phys == phys_complete)
159 			seen_current = true;
160 	}
161 	smp_mb(); /* finish all descriptor reads before incrementing tail */
162 	ioat->tail = idx + i;
163 	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
164 
165 	chan->last_completion = phys_complete;
166 	if (active - i == 0) {
167 		dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
168 			__func__);
169 		clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
170 		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
171 	}
172 }
173 
174 /**
175  * ioat2_cleanup - clean finished descriptors (advance tail pointer)
176  * @chan: ioat channel to be cleaned up
177  */
ioat2_cleanup(struct ioat2_dma_chan * ioat)178 static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
179 {
180 	struct ioat_chan_common *chan = &ioat->base;
181 	unsigned long phys_complete;
182 
183 	spin_lock_bh(&chan->cleanup_lock);
184 	if (ioat_cleanup_preamble(chan, &phys_complete))
185 		__cleanup(ioat, phys_complete);
186 	spin_unlock_bh(&chan->cleanup_lock);
187 }
188 
ioat2_cleanup_event(unsigned long data)189 void ioat2_cleanup_event(unsigned long data)
190 {
191 	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
192 
193 	ioat2_cleanup(ioat);
194 	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
195 }
196 
__ioat2_restart_chan(struct ioat2_dma_chan * ioat)197 void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
198 {
199 	struct ioat_chan_common *chan = &ioat->base;
200 
201 	/* set the tail to be re-issued */
202 	ioat->issued = ioat->tail;
203 	ioat->dmacount = 0;
204 	set_bit(IOAT_COMPLETION_PENDING, &chan->state);
205 	mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
206 
207 	dev_dbg(to_dev(chan),
208 		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
209 		__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
210 
211 	if (ioat2_ring_pending(ioat)) {
212 		struct ioat_ring_ent *desc;
213 
214 		desc = ioat2_get_ring_ent(ioat, ioat->tail);
215 		ioat2_set_chainaddr(ioat, desc->txd.phys);
216 		__ioat2_issue_pending(ioat);
217 	} else
218 		__ioat2_start_null_desc(ioat);
219 }
220 
ioat2_quiesce(struct ioat_chan_common * chan,unsigned long tmo)221 int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo)
222 {
223 	unsigned long end = jiffies + tmo;
224 	int err = 0;
225 	u32 status;
226 
227 	status = ioat_chansts(chan);
228 	if (is_ioat_active(status) || is_ioat_idle(status))
229 		ioat_suspend(chan);
230 	while (is_ioat_active(status) || is_ioat_idle(status)) {
231 		if (tmo && time_after(jiffies, end)) {
232 			err = -ETIMEDOUT;
233 			break;
234 		}
235 		status = ioat_chansts(chan);
236 		cpu_relax();
237 	}
238 
239 	return err;
240 }
241 
ioat2_reset_sync(struct ioat_chan_common * chan,unsigned long tmo)242 int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo)
243 {
244 	unsigned long end = jiffies + tmo;
245 	int err = 0;
246 
247 	ioat_reset(chan);
248 	while (ioat_reset_pending(chan)) {
249 		if (end && time_after(jiffies, end)) {
250 			err = -ETIMEDOUT;
251 			break;
252 		}
253 		cpu_relax();
254 	}
255 
256 	return err;
257 }
258 
ioat2_restart_channel(struct ioat2_dma_chan * ioat)259 static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
260 {
261 	struct ioat_chan_common *chan = &ioat->base;
262 	unsigned long phys_complete;
263 
264 	ioat2_quiesce(chan, 0);
265 	if (ioat_cleanup_preamble(chan, &phys_complete))
266 		__cleanup(ioat, phys_complete);
267 
268 	__ioat2_restart_chan(ioat);
269 }
270 
ioat2_timer_event(unsigned long data)271 void ioat2_timer_event(unsigned long data)
272 {
273 	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
274 	struct ioat_chan_common *chan = &ioat->base;
275 
276 	if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
277 		unsigned long phys_complete;
278 		u64 status;
279 
280 		status = ioat_chansts(chan);
281 
282 		/* when halted due to errors check for channel
283 		 * programming errors before advancing the completion state
284 		 */
285 		if (is_ioat_halted(status)) {
286 			u32 chanerr;
287 
288 			chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
289 			dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
290 				__func__, chanerr);
291 			if (test_bit(IOAT_RUN, &chan->state))
292 				BUG_ON(is_ioat_bug(chanerr));
293 			else /* we never got off the ground */
294 				return;
295 		}
296 
297 		/* if we haven't made progress and we have already
298 		 * acknowledged a pending completion once, then be more
299 		 * forceful with a restart
300 		 */
301 		spin_lock_bh(&chan->cleanup_lock);
302 		if (ioat_cleanup_preamble(chan, &phys_complete)) {
303 			__cleanup(ioat, phys_complete);
304 		} else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
305 			spin_lock_bh(&ioat->prep_lock);
306 			ioat2_restart_channel(ioat);
307 			spin_unlock_bh(&ioat->prep_lock);
308 		} else {
309 			set_bit(IOAT_COMPLETION_ACK, &chan->state);
310 			mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
311 		}
312 		spin_unlock_bh(&chan->cleanup_lock);
313 	} else {
314 		u16 active;
315 
316 		/* if the ring is idle, empty, and oversized try to step
317 		 * down the size
318 		 */
319 		spin_lock_bh(&chan->cleanup_lock);
320 		spin_lock_bh(&ioat->prep_lock);
321 		active = ioat2_ring_active(ioat);
322 		if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
323 			reshape_ring(ioat, ioat->alloc_order-1);
324 		spin_unlock_bh(&ioat->prep_lock);
325 		spin_unlock_bh(&chan->cleanup_lock);
326 
327 		/* keep shrinking until we get back to our minimum
328 		 * default size
329 		 */
330 		if (ioat->alloc_order > ioat_get_alloc_order())
331 			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
332 	}
333 }
334 
ioat2_reset_hw(struct ioat_chan_common * chan)335 static int ioat2_reset_hw(struct ioat_chan_common *chan)
336 {
337 	/* throw away whatever the channel was doing and get it initialized */
338 	u32 chanerr;
339 
340 	ioat2_quiesce(chan, msecs_to_jiffies(100));
341 
342 	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
343 	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
344 
345 	return ioat2_reset_sync(chan, msecs_to_jiffies(200));
346 }
347 
348 /**
349  * ioat2_enumerate_channels - find and initialize the device's channels
350  * @device: the device to be enumerated
351  */
ioat2_enumerate_channels(struct ioatdma_device * device)352 int ioat2_enumerate_channels(struct ioatdma_device *device)
353 {
354 	struct ioat2_dma_chan *ioat;
355 	struct device *dev = &device->pdev->dev;
356 	struct dma_device *dma = &device->common;
357 	u8 xfercap_log;
358 	int i;
359 
360 	INIT_LIST_HEAD(&dma->channels);
361 	dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
362 	dma->chancnt &= 0x1f; /* bits [4:0] valid */
363 	if (dma->chancnt > ARRAY_SIZE(device->idx)) {
364 		dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
365 			 dma->chancnt, ARRAY_SIZE(device->idx));
366 		dma->chancnt = ARRAY_SIZE(device->idx);
367 	}
368 	xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
369 	xfercap_log &= 0x1f; /* bits [4:0] valid */
370 	if (xfercap_log == 0)
371 		return 0;
372 	dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
373 
374 	/* FIXME which i/oat version is i7300? */
375 #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
376 	if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
377 		dma->chancnt--;
378 #endif
379 	for (i = 0; i < dma->chancnt; i++) {
380 		ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
381 		if (!ioat)
382 			break;
383 
384 		ioat_init_channel(device, &ioat->base, i);
385 		ioat->xfercap_log = xfercap_log;
386 		spin_lock_init(&ioat->prep_lock);
387 		if (device->reset_hw(&ioat->base)) {
388 			i = 0;
389 			break;
390 		}
391 	}
392 	dma->chancnt = i;
393 	return i;
394 }
395 
ioat2_tx_submit_unlock(struct dma_async_tx_descriptor * tx)396 static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
397 {
398 	struct dma_chan *c = tx->chan;
399 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
400 	struct ioat_chan_common *chan = &ioat->base;
401 	dma_cookie_t cookie = c->cookie;
402 
403 	cookie++;
404 	if (cookie < 0)
405 		cookie = 1;
406 	tx->cookie = cookie;
407 	c->cookie = cookie;
408 	dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
409 
410 	if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
411 		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
412 
413 	/* make descriptor updates visible before advancing ioat->head,
414 	 * this is purposefully not smp_wmb() since we are also
415 	 * publishing the descriptor updates to a dma device
416 	 */
417 	wmb();
418 
419 	ioat->head += ioat->produce;
420 
421 	ioat2_update_pending(ioat);
422 	spin_unlock_bh(&ioat->prep_lock);
423 
424 	return cookie;
425 }
426 
ioat2_alloc_ring_ent(struct dma_chan * chan,gfp_t flags)427 static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
428 {
429 	struct ioat_dma_descriptor *hw;
430 	struct ioat_ring_ent *desc;
431 	struct ioatdma_device *dma;
432 	dma_addr_t phys;
433 
434 	dma = to_ioatdma_device(chan->device);
435 	hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
436 	if (!hw)
437 		return NULL;
438 	memset(hw, 0, sizeof(*hw));
439 
440 	desc = kmem_cache_alloc(ioat2_cache, flags);
441 	if (!desc) {
442 		pci_pool_free(dma->dma_pool, hw, phys);
443 		return NULL;
444 	}
445 	memset(desc, 0, sizeof(*desc));
446 
447 	dma_async_tx_descriptor_init(&desc->txd, chan);
448 	desc->txd.tx_submit = ioat2_tx_submit_unlock;
449 	desc->hw = hw;
450 	desc->txd.phys = phys;
451 	return desc;
452 }
453 
ioat2_free_ring_ent(struct ioat_ring_ent * desc,struct dma_chan * chan)454 static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
455 {
456 	struct ioatdma_device *dma;
457 
458 	dma = to_ioatdma_device(chan->device);
459 	pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
460 	kmem_cache_free(ioat2_cache, desc);
461 }
462 
ioat2_alloc_ring(struct dma_chan * c,int order,gfp_t flags)463 static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
464 {
465 	struct ioat_ring_ent **ring;
466 	int descs = 1 << order;
467 	int i;
468 
469 	if (order > ioat_get_max_alloc_order())
470 		return NULL;
471 
472 	/* allocate the array to hold the software ring */
473 	ring = kcalloc(descs, sizeof(*ring), flags);
474 	if (!ring)
475 		return NULL;
476 	for (i = 0; i < descs; i++) {
477 		ring[i] = ioat2_alloc_ring_ent(c, flags);
478 		if (!ring[i]) {
479 			while (i--)
480 				ioat2_free_ring_ent(ring[i], c);
481 			kfree(ring);
482 			return NULL;
483 		}
484 		set_desc_id(ring[i], i);
485 	}
486 
487 	/* link descs */
488 	for (i = 0; i < descs-1; i++) {
489 		struct ioat_ring_ent *next = ring[i+1];
490 		struct ioat_dma_descriptor *hw = ring[i]->hw;
491 
492 		hw->next = next->txd.phys;
493 	}
494 	ring[i]->hw->next = ring[0]->txd.phys;
495 
496 	return ring;
497 }
498 
499 void ioat2_free_chan_resources(struct dma_chan *c);
500 
501 /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
502  * @chan: channel to be initialized
503  */
ioat2_alloc_chan_resources(struct dma_chan * c)504 int ioat2_alloc_chan_resources(struct dma_chan *c)
505 {
506 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
507 	struct ioat_chan_common *chan = &ioat->base;
508 	struct ioat_ring_ent **ring;
509 	u64 status;
510 	int order;
511 	int i = 0;
512 
513 	/* have we already been set up? */
514 	if (ioat->ring)
515 		return 1 << ioat->alloc_order;
516 
517 	/* Setup register to interrupt and write completion status on error */
518 	writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
519 
520 	/* allocate a completion writeback area */
521 	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
522 	chan->completion = pci_pool_alloc(chan->device->completion_pool,
523 					  GFP_KERNEL, &chan->completion_dma);
524 	if (!chan->completion)
525 		return -ENOMEM;
526 
527 	memset(chan->completion, 0, sizeof(*chan->completion));
528 	writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
529 	       chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
530 	writel(((u64) chan->completion_dma) >> 32,
531 	       chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
532 
533 	order = ioat_get_alloc_order();
534 	ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
535 	if (!ring)
536 		return -ENOMEM;
537 
538 	spin_lock_bh(&chan->cleanup_lock);
539 	spin_lock_bh(&ioat->prep_lock);
540 	ioat->ring = ring;
541 	ioat->head = 0;
542 	ioat->issued = 0;
543 	ioat->tail = 0;
544 	ioat->alloc_order = order;
545 	spin_unlock_bh(&ioat->prep_lock);
546 	spin_unlock_bh(&chan->cleanup_lock);
547 
548 	tasklet_enable(&chan->cleanup_task);
549 	ioat2_start_null_desc(ioat);
550 
551 	/* check that we got off the ground */
552 	do {
553 		udelay(1);
554 		status = ioat_chansts(chan);
555 	} while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status));
556 
557 	if (is_ioat_active(status) || is_ioat_idle(status)) {
558 		set_bit(IOAT_RUN, &chan->state);
559 		return 1 << ioat->alloc_order;
560 	} else {
561 		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
562 
563 		dev_WARN(to_dev(chan),
564 			"failed to start channel chanerr: %#x\n", chanerr);
565 		ioat2_free_chan_resources(c);
566 		return -EFAULT;
567 	}
568 }
569 
reshape_ring(struct ioat2_dma_chan * ioat,int order)570 bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
571 {
572 	/* reshape differs from normal ring allocation in that we want
573 	 * to allocate a new software ring while only
574 	 * extending/truncating the hardware ring
575 	 */
576 	struct ioat_chan_common *chan = &ioat->base;
577 	struct dma_chan *c = &chan->common;
578 	const u16 curr_size = ioat2_ring_size(ioat);
579 	const u16 active = ioat2_ring_active(ioat);
580 	const u16 new_size = 1 << order;
581 	struct ioat_ring_ent **ring;
582 	u16 i;
583 
584 	if (order > ioat_get_max_alloc_order())
585 		return false;
586 
587 	/* double check that we have at least 1 free descriptor */
588 	if (active == curr_size)
589 		return false;
590 
591 	/* when shrinking, verify that we can hold the current active
592 	 * set in the new ring
593 	 */
594 	if (active >= new_size)
595 		return false;
596 
597 	/* allocate the array to hold the software ring */
598 	ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
599 	if (!ring)
600 		return false;
601 
602 	/* allocate/trim descriptors as needed */
603 	if (new_size > curr_size) {
604 		/* copy current descriptors to the new ring */
605 		for (i = 0; i < curr_size; i++) {
606 			u16 curr_idx = (ioat->tail+i) & (curr_size-1);
607 			u16 new_idx = (ioat->tail+i) & (new_size-1);
608 
609 			ring[new_idx] = ioat->ring[curr_idx];
610 			set_desc_id(ring[new_idx], new_idx);
611 		}
612 
613 		/* add new descriptors to the ring */
614 		for (i = curr_size; i < new_size; i++) {
615 			u16 new_idx = (ioat->tail+i) & (new_size-1);
616 
617 			ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
618 			if (!ring[new_idx]) {
619 				while (i--) {
620 					u16 new_idx = (ioat->tail+i) & (new_size-1);
621 
622 					ioat2_free_ring_ent(ring[new_idx], c);
623 				}
624 				kfree(ring);
625 				return false;
626 			}
627 			set_desc_id(ring[new_idx], new_idx);
628 		}
629 
630 		/* hw link new descriptors */
631 		for (i = curr_size-1; i < new_size; i++) {
632 			u16 new_idx = (ioat->tail+i) & (new_size-1);
633 			struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
634 			struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
635 
636 			hw->next = next->txd.phys;
637 		}
638 	} else {
639 		struct ioat_dma_descriptor *hw;
640 		struct ioat_ring_ent *next;
641 
642 		/* copy current descriptors to the new ring, dropping the
643 		 * removed descriptors
644 		 */
645 		for (i = 0; i < new_size; i++) {
646 			u16 curr_idx = (ioat->tail+i) & (curr_size-1);
647 			u16 new_idx = (ioat->tail+i) & (new_size-1);
648 
649 			ring[new_idx] = ioat->ring[curr_idx];
650 			set_desc_id(ring[new_idx], new_idx);
651 		}
652 
653 		/* free deleted descriptors */
654 		for (i = new_size; i < curr_size; i++) {
655 			struct ioat_ring_ent *ent;
656 
657 			ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
658 			ioat2_free_ring_ent(ent, c);
659 		}
660 
661 		/* fix up hardware ring */
662 		hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
663 		next = ring[(ioat->tail+new_size) & (new_size-1)];
664 		hw->next = next->txd.phys;
665 	}
666 
667 	dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
668 		__func__, new_size);
669 
670 	kfree(ioat->ring);
671 	ioat->ring = ring;
672 	ioat->alloc_order = order;
673 
674 	return true;
675 }
676 
677 /**
678  * ioat2_check_space_lock - verify space and grab ring producer lock
679  * @ioat: ioat2,3 channel (ring) to operate on
680  * @num_descs: allocation length
681  */
ioat2_check_space_lock(struct ioat2_dma_chan * ioat,int num_descs)682 int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs)
683 {
684 	struct ioat_chan_common *chan = &ioat->base;
685 	bool retry;
686 
687  retry:
688 	spin_lock_bh(&ioat->prep_lock);
689 	/* never allow the last descriptor to be consumed, we need at
690 	 * least one free at all times to allow for on-the-fly ring
691 	 * resizing.
692 	 */
693 	if (likely(ioat2_ring_space(ioat) > num_descs)) {
694 		dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
695 			__func__, num_descs, ioat->head, ioat->tail, ioat->issued);
696 		ioat->produce = num_descs;
697 		return 0;  /* with ioat->prep_lock held */
698 	}
699 	retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state);
700 	spin_unlock_bh(&ioat->prep_lock);
701 
702 	/* is another cpu already trying to expand the ring? */
703 	if (retry)
704 		goto retry;
705 
706 	spin_lock_bh(&chan->cleanup_lock);
707 	spin_lock_bh(&ioat->prep_lock);
708 	retry = reshape_ring(ioat, ioat->alloc_order + 1);
709 	clear_bit(IOAT_RESHAPE_PENDING, &chan->state);
710 	spin_unlock_bh(&ioat->prep_lock);
711 	spin_unlock_bh(&chan->cleanup_lock);
712 
713 	/* if we were able to expand the ring retry the allocation */
714 	if (retry)
715 		goto retry;
716 
717 	if (printk_ratelimit())
718 		dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n",
719 			__func__, num_descs, ioat->head, ioat->tail, ioat->issued);
720 
721 	/* progress reclaim in the allocation failure case we may be
722 	 * called under bh_disabled so we need to trigger the timer
723 	 * event directly
724 	 */
725 	if (jiffies > chan->timer.expires && timer_pending(&chan->timer)) {
726 		struct ioatdma_device *device = chan->device;
727 
728 		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
729 		device->timer_fn((unsigned long) &chan->common);
730 	}
731 
732 	return -ENOMEM;
733 }
734 
735 struct dma_async_tx_descriptor *
ioat2_dma_prep_memcpy_lock(struct dma_chan * c,dma_addr_t dma_dest,dma_addr_t dma_src,size_t len,unsigned long flags)736 ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
737 			   dma_addr_t dma_src, size_t len, unsigned long flags)
738 {
739 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
740 	struct ioat_dma_descriptor *hw;
741 	struct ioat_ring_ent *desc;
742 	dma_addr_t dst = dma_dest;
743 	dma_addr_t src = dma_src;
744 	size_t total_len = len;
745 	int num_descs, idx, i;
746 
747 	num_descs = ioat2_xferlen_to_descs(ioat, len);
748 	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
749 		idx = ioat->head;
750 	else
751 		return NULL;
752 	i = 0;
753 	do {
754 		size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
755 
756 		desc = ioat2_get_ring_ent(ioat, idx + i);
757 		hw = desc->hw;
758 
759 		hw->size = copy;
760 		hw->ctl = 0;
761 		hw->src_addr = src;
762 		hw->dst_addr = dst;
763 
764 		len -= copy;
765 		dst += copy;
766 		src += copy;
767 		dump_desc_dbg(ioat, desc);
768 	} while (++i < num_descs);
769 
770 	desc->txd.flags = flags;
771 	desc->len = total_len;
772 	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
773 	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
774 	hw->ctl_f.compl_write = 1;
775 	dump_desc_dbg(ioat, desc);
776 	/* we leave the channel locked to ensure in order submission */
777 
778 	return &desc->txd;
779 }
780 
781 /**
782  * ioat2_free_chan_resources - release all the descriptors
783  * @chan: the channel to be cleaned
784  */
ioat2_free_chan_resources(struct dma_chan * c)785 void ioat2_free_chan_resources(struct dma_chan *c)
786 {
787 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
788 	struct ioat_chan_common *chan = &ioat->base;
789 	struct ioatdma_device *device = chan->device;
790 	struct ioat_ring_ent *desc;
791 	const u16 total_descs = 1 << ioat->alloc_order;
792 	int descs;
793 	int i;
794 
795 	/* Before freeing channel resources first check
796 	 * if they have been previously allocated for this channel.
797 	 */
798 	if (!ioat->ring)
799 		return;
800 
801 	tasklet_disable(&chan->cleanup_task);
802 	del_timer_sync(&chan->timer);
803 	device->cleanup_fn((unsigned long) c);
804 	device->reset_hw(chan);
805 	clear_bit(IOAT_RUN, &chan->state);
806 
807 	spin_lock_bh(&chan->cleanup_lock);
808 	spin_lock_bh(&ioat->prep_lock);
809 	descs = ioat2_ring_space(ioat);
810 	dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
811 	for (i = 0; i < descs; i++) {
812 		desc = ioat2_get_ring_ent(ioat, ioat->head + i);
813 		ioat2_free_ring_ent(desc, c);
814 	}
815 
816 	if (descs < total_descs)
817 		dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
818 			total_descs - descs);
819 
820 	for (i = 0; i < total_descs - descs; i++) {
821 		desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
822 		dump_desc_dbg(ioat, desc);
823 		ioat2_free_ring_ent(desc, c);
824 	}
825 
826 	kfree(ioat->ring);
827 	ioat->ring = NULL;
828 	ioat->alloc_order = 0;
829 	pci_pool_free(device->completion_pool, chan->completion,
830 		      chan->completion_dma);
831 	spin_unlock_bh(&ioat->prep_lock);
832 	spin_unlock_bh(&chan->cleanup_lock);
833 
834 	chan->last_completion = 0;
835 	chan->completion_dma = 0;
836 	ioat->dmacount = 0;
837 }
838 
ring_size_show(struct dma_chan * c,char * page)839 static ssize_t ring_size_show(struct dma_chan *c, char *page)
840 {
841 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
842 
843 	return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
844 }
845 static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
846 
ring_active_show(struct dma_chan * c,char * page)847 static ssize_t ring_active_show(struct dma_chan *c, char *page)
848 {
849 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
850 
851 	/* ...taken outside the lock, no need to be precise */
852 	return sprintf(page, "%d\n", ioat2_ring_active(ioat));
853 }
854 static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
855 
856 static struct attribute *ioat2_attrs[] = {
857 	&ring_size_attr.attr,
858 	&ring_active_attr.attr,
859 	&ioat_cap_attr.attr,
860 	&ioat_version_attr.attr,
861 	NULL,
862 };
863 
864 struct kobj_type ioat2_ktype = {
865 	.sysfs_ops = &ioat_sysfs_ops,
866 	.default_attrs = ioat2_attrs,
867 };
868 
ioat2_dma_probe(struct ioatdma_device * device,int dca)869 int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
870 {
871 	struct pci_dev *pdev = device->pdev;
872 	struct dma_device *dma;
873 	struct dma_chan *c;
874 	struct ioat_chan_common *chan;
875 	int err;
876 
877 	device->enumerate_channels = ioat2_enumerate_channels;
878 	device->reset_hw = ioat2_reset_hw;
879 	device->cleanup_fn = ioat2_cleanup_event;
880 	device->timer_fn = ioat2_timer_event;
881 	device->self_test = ioat_dma_self_test;
882 	dma = &device->common;
883 	dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
884 	dma->device_issue_pending = ioat2_issue_pending;
885 	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
886 	dma->device_free_chan_resources = ioat2_free_chan_resources;
887 	dma->device_tx_status = ioat_dma_tx_status;
888 
889 	err = ioat_probe(device);
890 	if (err)
891 		return err;
892 	ioat_set_tcp_copy_break(2048);
893 
894 	list_for_each_entry(c, &dma->channels, device_node) {
895 		chan = to_chan_common(c);
896 		writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
897 		       chan->reg_base + IOAT_DCACTRL_OFFSET);
898 	}
899 
900 	err = ioat_register(device);
901 	if (err)
902 		return err;
903 
904 	ioat_kobject_add(device, &ioat2_ktype);
905 
906 	if (dca)
907 		device->dca = ioat2_dca_init(pdev, device->reg_base);
908 
909 	return err;
910 }
911