Gentoo Archives: gentoo-sparc

From: Leif Sawyer <lsawyer@×××.com>
To: gentoo-sparc@l.g.o
Subject: RE: [gentoo-sparc] [Fwd: Re: [Repost] Ultra2 SMP freezes with heavy disk I/O]
Date: Tue, 01 May 2007 17:45:19
Message-Id: 38D04BF3A4B7B2499D19EB1DB54285EA049C002D@FNB1EX01.gci.com
In Reply to: [gentoo-sparc] [Fwd: Re: [Repost] Ultra2 SMP freezes with heavy disk I/O] by Ferris McCormick
Just saw that 2.6.21-gentoo  was released, so I'm walking
through the build on my U2.

I'll post a report after some more testing.



> -----Original Message----- > From: Ferris McCormick [mailto:fmccor@g.o] > Sent: Thursday, April 12, 2007 4:41 AM > To: Gentoo Sparc > Subject: [gentoo-sparc] [Fwd: Re: [Repost] Ultra2 SMP freezes > with heavy disk I/O] > > In case you missed it. > > -------- Forwarded Message -------- > From: David Miller <davem@×××××××××.net> > To: joel.bertrand@××××××××.fr > Cc: mt1@××××××××.fr, sparclinux@×××××××××××.org > Subject: Re: [Repost] Ultra2 SMP freezes with heavy disk I/O > Date: Wed, 11 Apr 2007 21:55:11 -0700 (PDT) > > From: David Miller <davem@×××××××××.net> > Date: Tue, 10 Apr 2007 14:04:58 -0700 (PDT) > > > From: BERTRAND Joël <joel.bertrand@××××××××.fr> > > Date: Tue, 20 Mar 2007 21:53:26 +0100 > > > > > I have tried the last 2.6.21-rc4. Same bug. > > > > The good news is that I can reproduce this problem now on > my Ultra2, > > I'll try to figure out what's wrong. > > > > Running "dbench 32" a few time is enough to trigger it. > > Ok, I think I might have killed at least the bug I was able to hit. > > I make no promises that this will fix the issues everyone > else was seeing, but let's cross our fingers :-))) > > Please give the following patch some testing if you can. And > please be careful, messing with the DMA mapping code can do > things like eat your disk :-) > > If all goes well I'll push this bug fix around, and I have a > secret treat for your poor sparc64 SBUS users once we get past this. > > Thanks! > > commit af822e612593c1feef5052e685bca0f8d087d120 > Author: David S. Miller <davem@××××××××××××××××.net> > Date: Wed Apr 11 21:38:45 2007 -0700 > > [SPARC64]: Fix SBUS IOMMU allocation code. > > There are several IOMMU allocator bugs. Instead of > trying to fix this > overly complicated code, just mirror the PCI IOMMU arena allocator > which is very stable and well stress tested. > > I tried to make the code as identical as possible so we can switch > sun4u PCI and SBUS over to a common piece of IOMMU code. All that > will be need are two callbacks, one to do a full IOMMU > flush and one > to do a streaming buffer flush. > > This patch gets rid of a lot of hangs and mysterious > crashes on SBUS > sparc64 systems, at least for me. > > Signed-off-by: David S. Miller <davem@×××××××××.net> > > diff --git a/arch/sparc64/kernel/sbus.c > b/arch/sparc64/kernel/sbus.c index 01d6d86..14f78fb 100644 > --- a/arch/sparc64/kernel/sbus.c > +++ b/arch/sparc64/kernel/sbus.c > @@ -24,48 +24,25 @@ > > #include "iommu_common.h" > > -/* These should be allocated on an SMP_CACHE_BYTES > - * aligned boundary for optimal performance. > - * > - * On SYSIO, using an 8K page size we have 1GB of SBUS > - * DMA space mapped. We divide this space into equally > - * sized clusters. We allocate a DMA mapping from the > - * cluster that matches the order of the allocation, or > - * if the order is greater than the number of clusters, > - * we try to allocate from the last cluster. > - */ > - > -#define NCLUSTERS 8UL > -#define ONE_GIG (1UL * 1024UL * 1024UL * 1024UL) > -#define CLUSTER_SIZE (ONE_GIG / NCLUSTERS) > -#define CLUSTER_MASK (CLUSTER_SIZE - 1) > -#define CLUSTER_NPAGES (CLUSTER_SIZE >> IO_PAGE_SHIFT) > #define MAP_BASE ((u32)0xc0000000) > > +struct sbus_iommu_arena { > + unsigned long *map; > + unsigned int hint; > + unsigned int limit; > +}; > + > struct sbus_iommu { > -/*0x00*/spinlock_t lock; > + spinlock_t lock; > > -/*0x08*/iopte_t *page_table; > -/*0x10*/unsigned long strbuf_regs; > -/*0x18*/unsigned long iommu_regs; > -/*0x20*/unsigned long sbus_control_reg; > + struct sbus_iommu_arena arena; > > -/*0x28*/volatile unsigned long strbuf_flushflag; > + iopte_t *page_table; > + unsigned long strbuf_regs; > + unsigned long iommu_regs; > + unsigned long sbus_control_reg; > > - /* If NCLUSTERS is ever decresed to 4 or lower, > - * you must increase the size of the type of > - * these counters. You have been duly warned. -DaveM > - */ > -/*0x30*/struct { > - u16 next; > - u16 flush; > - } alloc_info[NCLUSTERS]; > - > - /* The lowest used consistent mapping entry. Since > - * we allocate consistent maps out of cluster 0 this > - * is relative to the beginning of closter 0. > - */ > -/*0x50*/u32 lowest_consistent_map; > + volatile unsigned long strbuf_flushflag; > }; > > /* Offsets from iommu_regs */ > @@ -91,19 +68,6 @@ static void __iommu_flushall(struct > sbus_iommu *iommu) > tag += 8UL; > } > upa_readq(iommu->sbus_control_reg); > - > - for (entry = 0; entry < NCLUSTERS; entry++) { > - iommu->alloc_info[entry].flush = > - iommu->alloc_info[entry].next; > - } > -} > - > -static void iommu_flush(struct sbus_iommu *iommu, u32 base, > unsigned long npages) -{ > - while (npages--) > - upa_writeq(base + (npages << IO_PAGE_SHIFT), > - iommu->iommu_regs + IOMMU_FLUSH); > - upa_readq(iommu->sbus_control_reg); > } > > /* Offsets from strbuf_regs */ > @@ -156,178 +120,115 @@ static void sbus_strbuf_flush(struct > sbus_iommu *iommu, u32 base, unsigned long > base, npages); > } > > -static iopte_t *alloc_streaming_cluster(struct sbus_iommu > *iommu, unsigned long npages) > +/* Based largely upon the ppc64 iommu allocator. */ static long > +sbus_arena_alloc(struct sbus_iommu *iommu, unsigned long npages) > { > - iopte_t *iopte, *limit, *first, *cluster; > - unsigned long cnum, ent, nent, flush_point, found; > - > - cnum = 0; > - nent = 1; > - while ((1UL << cnum) < npages) > - cnum++; > - if(cnum >= NCLUSTERS) { > - nent = 1UL << (cnum - NCLUSTERS); > - cnum = NCLUSTERS - 1; > - } > - iopte = iommu->page_table + (cnum * CLUSTER_NPAGES); > - > - if (cnum == 0) > - limit = (iommu->page_table + > - iommu->lowest_consistent_map); > - else > - limit = (iopte + CLUSTER_NPAGES); > - > - iopte += ((ent = iommu->alloc_info[cnum].next) << cnum); > - flush_point = iommu->alloc_info[cnum].flush; > - > - first = iopte; > - cluster = NULL; > - found = 0; > - for (;;) { > - if (iopte_val(*iopte) == 0UL) { > - found++; > - if (!cluster) > - cluster = iopte; > + struct sbus_iommu_arena *arena = &iommu->arena; > + unsigned long n, i, start, end, limit; > + int pass; > + > + limit = arena->limit; > + start = arena->hint; > + pass = 0; > + > +again: > + n = find_next_zero_bit(arena->map, limit, start); > + end = n + npages; > + if (unlikely(end >= limit)) { > + if (likely(pass < 1)) { > + limit = start; > + start = 0; > + __iommu_flushall(iommu); > + pass++; > + goto again; > } else { > - /* Used cluster in the way */ > - cluster = NULL; > - found = 0; > + /* Scanned the whole thing, give up. */ > + return -1; > } > + } > > - if (found == nent) > - break; > - > - iopte += (1 << cnum); > - ent++; > - if (iopte >= limit) { > - iopte = (iommu->page_table + (cnum * > CLUSTER_NPAGES)); > - ent = 0; > - > - /* Multiple cluster allocations must not wrap */ > - cluster = NULL; > - found = 0; > + for (i = n; i < end; i++) { > + if (test_bit(i, arena->map)) { > + start = i + 1; > + goto again; > } > - if (ent == flush_point) > - __iommu_flushall(iommu); > - if (iopte == first) > - goto bad; > } > > - /* ent/iopte points to the last cluster entry we're > going to use, > - * so save our place for the next allocation. > - */ > - if ((iopte + (1 << cnum)) >= limit) > - ent = 0; > - else > - ent = ent + 1; > - iommu->alloc_info[cnum].next = ent; > - if (ent == flush_point) > - __iommu_flushall(iommu); > - > - /* I've got your streaming cluster right here buddy boy... */ > - return cluster; > - > -bad: > - printk(KERN_EMERG "sbus: alloc_streaming_cluster of > npages(%ld) failed!\n", > - npages); > - return NULL; > + for (i = n; i < end; i++) > + __set_bit(i, arena->map); > + > + arena->hint = end; > + > + return n; > } > > -static void free_streaming_cluster(struct sbus_iommu *iommu, > u32 base, unsigned long npages) > +static void sbus_arena_free(struct sbus_iommu_arena *arena, unsigned > +long base, unsigned long npages) > { > - unsigned long cnum, ent, nent; > - iopte_t *iopte; > + unsigned long i; > > - cnum = 0; > - nent = 1; > - while ((1UL << cnum) < npages) > - cnum++; > - if(cnum >= NCLUSTERS) { > - nent = 1UL << (cnum - NCLUSTERS); > - cnum = NCLUSTERS - 1; > - } > - ent = (base & CLUSTER_MASK) >> (IO_PAGE_SHIFT + cnum); > - iopte = iommu->page_table + ((base - MAP_BASE) >> > IO_PAGE_SHIFT); > - do { > - iopte_val(*iopte) = 0UL; > - iopte += 1 << cnum; > - } while(--nent); > - > - /* If the global flush might not have caught this entry, > - * adjust the flush point such that we will flush before > - * ever trying to reuse it. > - */ > -#define between(X,Y,Z) (((Z) - (Y)) >= ((X) - (Y))) > - if (between(ent, iommu->alloc_info[cnum].next, > iommu->alloc_info[cnum].flush)) > - iommu->alloc_info[cnum].flush = ent; > -#undef between > + for (i = base; i < (base + npages); i++) > + __clear_bit(i, arena->map); > } > > -/* We allocate consistent mappings from the end of cluster > zero. */ -static iopte_t *alloc_consistent_cluster(struct > sbus_iommu *iommu, unsigned long npages) > +static void sbus_iommu_table_init(struct sbus_iommu *iommu, unsigned > +int tsbsize) > { > - iopte_t *iopte; > + unsigned long tsbbase, order, sz, num_tsb_entries; > > - iopte = iommu->page_table + (1 * CLUSTER_NPAGES); > - while (iopte > iommu->page_table) { > - iopte--; > - if (!(iopte_val(*iopte) & IOPTE_VALID)) { > - unsigned long tmp = npages; > + num_tsb_entries = tsbsize / sizeof(iopte_t); > > - while (--tmp) { > - iopte--; > - if (iopte_val(*iopte) & IOPTE_VALID) > - break; > - } > - if (tmp == 0) { > - u32 entry = (iopte - iommu->page_table); > + /* Setup initial software IOMMU state. */ > + spin_lock_init(&iommu->lock); > > - if (entry < > iommu->lowest_consistent_map) > - > iommu->lowest_consistent_map = entry; > - return iopte; > - } > - } > + /* Allocate and initialize the free area map. */ > + sz = num_tsb_entries / 8; > + sz = (sz + 7UL) & ~7UL; > + iommu->arena.map = kzalloc(sz, GFP_KERNEL); > + if (!iommu->arena.map) { > + prom_printf("PCI_IOMMU: Error, > kmalloc(arena.map) failed.\n"); > + prom_halt(); > + } > + iommu->arena.limit = num_tsb_entries; > + > + /* Now allocate and setup the IOMMU page table itself. */ > + order = get_order(tsbsize); > + tsbbase = __get_free_pages(GFP_KERNEL, order); > + if (!tsbbase) { > + prom_printf("IOMMU: Error, gfp(tsb) failed.\n"); > + prom_halt(); > } > - return NULL; > + iommu->page_table = (iopte_t *)tsbbase; > + memset(iommu->page_table, 0, tsbsize); > } > > -static void free_consistent_cluster(struct sbus_iommu > *iommu, u32 base, unsigned long npages) > +static inline iopte_t *alloc_npages(struct sbus_iommu > *iommu, unsigned > +long npages) > { > - iopte_t *iopte = iommu->page_table + ((base - MAP_BASE) > >> IO_PAGE_SHIFT); > + long entry; > > - if ((iopte - iommu->page_table) == > iommu->lowest_consistent_map) { > - iopte_t *walk = iopte + npages; > - iopte_t *limit; > + entry = sbus_arena_alloc(iommu, npages); > + if (unlikely(entry < 0)) > + return NULL; > > - limit = iommu->page_table + CLUSTER_NPAGES; > - while (walk < limit) { > - if (iopte_val(*walk) != 0UL) > - break; > - walk++; > - } > - iommu->lowest_consistent_map = > - (walk - iommu->page_table); > - } > + return iommu->page_table + entry; > +} > > - while (npages--) > - *iopte++ = __iopte(0UL); > +static inline void free_npages(struct sbus_iommu *iommu, dma_addr_t > +base, unsigned long npages) { > + sbus_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages); > } > > void *sbus_alloc_consistent(struct sbus_dev *sdev, size_t > size, dma_addr_t *dvma_addr) { > - unsigned long order, first_page, flags; > struct sbus_iommu *iommu; > iopte_t *iopte; > + unsigned long flags, order, first_page; > void *ret; > int npages; > > - if (size <= 0 || sdev == NULL || dvma_addr == NULL) > - return NULL; > - > size = IO_PAGE_ALIGN(size); > order = get_order(size); > if (order >= 10) > return NULL; > + > first_page = __get_free_pages(GFP_KERNEL|__GFP_COMP, order); > if (first_page == 0UL) > return NULL; > @@ -336,108 +237,121 @@ void *sbus_alloc_consistent(struct > sbus_dev *sdev, size_t size, dma_addr_t *dvma > iommu = sdev->bus->iommu; > > spin_lock_irqsave(&iommu->lock, flags); > - iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT); > - if (iopte == NULL) { > - spin_unlock_irqrestore(&iommu->lock, flags); > + iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT); > + spin_unlock_irqrestore(&iommu->lock, flags); > + > + if (unlikely(iopte == NULL)) { > free_pages(first_page, order); > return NULL; > } > > - /* Ok, we're committed at this point. */ > - *dvma_addr = MAP_BASE + ((iopte - iommu->page_table) << > IO_PAGE_SHIFT); > + *dvma_addr = (MAP_BASE + > + ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); > ret = (void *) first_page; > npages = size >> IO_PAGE_SHIFT; > + first_page = __pa(first_page); > while (npages--) { > - *iopte++ = __iopte(IOPTE_VALID | IOPTE_CACHE | > IOPTE_WRITE | > - (__pa(first_page) & IOPTE_PAGE)); > + iopte_val(*iopte) = (IOPTE_VALID | IOPTE_CACHE | > + IOPTE_WRITE | > + (first_page & IOPTE_PAGE)); > + iopte++; > first_page += IO_PAGE_SIZE; > } > - iommu_flush(iommu, *dvma_addr, size >> IO_PAGE_SHIFT); > - spin_unlock_irqrestore(&iommu->lock, flags); > > return ret; > } > > void sbus_free_consistent(struct sbus_dev *sdev, size_t > size, void *cpu, dma_addr_t dvma) { > - unsigned long order, npages; > struct sbus_iommu *iommu; > - > - if (size <= 0 || sdev == NULL || cpu == NULL) > - return; > + iopte_t *iopte; > + unsigned long flags, order, npages; > > npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; > iommu = sdev->bus->iommu; > + iopte = iommu->page_table + > + ((dvma - MAP_BASE) >> IO_PAGE_SHIFT); > + > + spin_lock_irqsave(&iommu->lock, flags); > + > + free_npages(iommu, dvma - MAP_BASE, npages); > > - spin_lock_irq(&iommu->lock); > - free_consistent_cluster(iommu, dvma, npages); > - iommu_flush(iommu, dvma, npages); > - spin_unlock_irq(&iommu->lock); > + spin_unlock_irqrestore(&iommu->lock, flags); > > order = get_order(size); > if (order < 10) > free_pages((unsigned long)cpu, order); } > > -dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, > size_t size, int dir) > +dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, > size_t sz, > +int direction) > { > - struct sbus_iommu *iommu = sdev->bus->iommu; > - unsigned long npages, pbase, flags; > - iopte_t *iopte; > - u32 dma_base, offset; > - unsigned long iopte_bits; > + struct sbus_iommu *iommu; > + iopte_t *base; > + unsigned long flags, npages, oaddr; > + unsigned long i, base_paddr; > + u32 bus_addr, ret; > + unsigned long iopte_protection; > + > + iommu = sdev->bus->iommu; > > - if (dir == SBUS_DMA_NONE) > + if (unlikely(direction == SBUS_DMA_NONE)) > BUG(); > > - pbase = (unsigned long) ptr; > - offset = (u32) (pbase & ~IO_PAGE_MASK); > - size = (IO_PAGE_ALIGN(pbase + size) - (pbase & IO_PAGE_MASK)); > - pbase = (unsigned long) __pa(pbase & IO_PAGE_MASK); > + oaddr = (unsigned long)ptr; > + npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); > + npages >>= IO_PAGE_SHIFT; > > spin_lock_irqsave(&iommu->lock, flags); > - npages = size >> IO_PAGE_SHIFT; > - iopte = alloc_streaming_cluster(iommu, npages); > - if (iopte == NULL) > - goto bad; > - dma_base = MAP_BASE + ((iopte - iommu->page_table) << > IO_PAGE_SHIFT); > - npages = size >> IO_PAGE_SHIFT; > - iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; > - if (dir != SBUS_DMA_TODEVICE) > - iopte_bits |= IOPTE_WRITE; > - while (npages--) { > - *iopte++ = __iopte(iopte_bits | (pbase & IOPTE_PAGE)); > - pbase += IO_PAGE_SIZE; > - } > - npages = size >> IO_PAGE_SHIFT; > + base = alloc_npages(iommu, npages); > spin_unlock_irqrestore(&iommu->lock, flags); > > - return (dma_base | offset); > + if (unlikely(!base)) > + BUG(); > > -bad: > - spin_unlock_irqrestore(&iommu->lock, flags); > - BUG(); > - return 0; > + bus_addr = (MAP_BASE + > + ((base - iommu->page_table) << IO_PAGE_SHIFT)); > + ret = bus_addr | (oaddr & ~IO_PAGE_MASK); > + base_paddr = __pa(oaddr & IO_PAGE_MASK); > + > + iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; > + if (direction != SBUS_DMA_TODEVICE) > + iopte_protection |= IOPTE_WRITE; > + > + for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE) > + iopte_val(*base) = iopte_protection | base_paddr; > + > + return ret; > } > > -void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t > dma_addr, size_t size, int direction) > +void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t bus_addr, > +size_t sz, int direction) > { > struct sbus_iommu *iommu = sdev->bus->iommu; > - u32 dma_base = dma_addr & IO_PAGE_MASK; > - unsigned long flags; > + iopte_t *base; > + unsigned long flags, npages, i; > + > + if (unlikely(direction == SBUS_DMA_NONE)) > + BUG(); > + > + npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & > IO_PAGE_MASK); > + npages >>= IO_PAGE_SHIFT; > + base = iommu->page_table + > + ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT); > > - size = (IO_PAGE_ALIGN(dma_addr + size) - dma_base); > + bus_addr &= IO_PAGE_MASK; > > spin_lock_irqsave(&iommu->lock, flags); > - free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT); > - sbus_strbuf_flush(iommu, dma_base, size >> > IO_PAGE_SHIFT, direction); > + sbus_strbuf_flush(iommu, bus_addr, npages, direction); > + for (i = 0; i < npages; i++) > + iopte_val(base[i]) = 0UL; > + free_npages(iommu, bus_addr - MAP_BASE, npages); > spin_unlock_irqrestore(&iommu->lock, flags); } > > #define SG_ENT_PHYS_ADDRESS(SG) \ > (__pa(page_address((SG)->page)) + (SG)->offset) > > -static inline void fill_sg(iopte_t *iopte, struct > scatterlist *sg, int nused, int nelems, unsigned long iopte_bits) > +static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, > + int nused, int nelems, unsigned long > iopte_protection) > { > struct scatterlist *dma_sg = sg; > struct scatterlist *sg_end = sg + nelems; @@ -462,7 > +376,7 @@ static inline void fill_sg(iopte_t *iopte, struct > scatterlist *sg, int nused, in > for (;;) { > unsigned long tmp; > > - tmp = (unsigned long) > SG_ENT_PHYS_ADDRESS(sg); > + tmp = SG_ENT_PHYS_ADDRESS(sg); > len = sg->length; > if (((tmp ^ pteval) >> > IO_PAGE_SHIFT) != 0UL) { > pteval = tmp & IO_PAGE_MASK; > @@ -478,7 +392,7 @@ static inline void fill_sg(iopte_t > *iopte, struct scatterlist *sg, int nused, in > sg++; > } > > - pteval = ((pteval & IOPTE_PAGE) | iopte_bits); > + pteval = iopte_protection | (pteval & > IOPTE_PAGE); > while (len > 0) { > *iopte++ = __iopte(pteval); > pteval += IO_PAGE_SIZE; > @@ -509,103 +423,111 @@ static inline void fill_sg(iopte_t > *iopte, struct scatterlist *sg, int nused, in > } > } > > -int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist > *sg, int nents, int dir) > +int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist > *sglist, int > +nelems, int direction) > { > - struct sbus_iommu *iommu = sdev->bus->iommu; > - unsigned long flags, npages; > - iopte_t *iopte; > + struct sbus_iommu *iommu; > + unsigned long flags, npages, iopte_protection; > + iopte_t *base; > u32 dma_base; > struct scatterlist *sgtmp; > int used; > - unsigned long iopte_bits; > - > - if (dir == SBUS_DMA_NONE) > - BUG(); > > /* Fast path single entry scatterlists. */ > - if (nents == 1) { > - sg->dma_address = > + if (nelems == 1) { > + sglist->dma_address = > sbus_map_single(sdev, > - (page_address(sg->page) > + sg->offset), > - sg->length, dir); > - sg->dma_length = sg->length; > + > (page_address(sglist->page) + sglist->offset), > + sglist->length, direction); > + sglist->dma_length = sglist->length; > return 1; > } > > - npages = prepare_sg(sg, nents); > + iommu = sdev->bus->iommu; > + > + if (unlikely(direction == SBUS_DMA_NONE)) > + BUG(); > + > + npages = prepare_sg(sglist, nelems); > > spin_lock_irqsave(&iommu->lock, flags); > - iopte = alloc_streaming_cluster(iommu, npages); > - if (iopte == NULL) > - goto bad; > - dma_base = MAP_BASE + ((iopte - iommu->page_table) << > IO_PAGE_SHIFT); > + base = alloc_npages(iommu, npages); > + spin_unlock_irqrestore(&iommu->lock, flags); > + > + if (unlikely(base == NULL)) > + BUG(); > + > + dma_base = MAP_BASE + > + ((base - iommu->page_table) << IO_PAGE_SHIFT); > > /* Normalize DVMA addresses. */ > - sgtmp = sg; > - used = nents; > + used = nelems; > > + sgtmp = sglist; > while (used && sgtmp->dma_length) { > sgtmp->dma_address += dma_base; > sgtmp++; > used--; > } > - used = nents - used; > + used = nelems - used; > > - iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; > - if (dir != SBUS_DMA_TODEVICE) > - iopte_bits |= IOPTE_WRITE; > + iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; > + if (direction != SBUS_DMA_TODEVICE) > + iopte_protection |= IOPTE_WRITE; > + > + fill_sg(base, sglist, used, nelems, iopte_protection); > > - fill_sg(iopte, sg, used, nents, iopte_bits); > #ifdef VERIFY_SG > - verify_sglist(sg, nents, iopte, npages); > + verify_sglist(sglist, nelems, base, npages); > #endif > - spin_unlock_irqrestore(&iommu->lock, flags); > > return used; > - > -bad: > - spin_unlock_irqrestore(&iommu->lock, flags); > - BUG(); > - return 0; > } > > -void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist > *sg, int nents, int direction) > +void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist > *sglist, > +int nelems, int direction) > { > - unsigned long size, flags; > struct sbus_iommu *iommu; > - u32 dvma_base; > - int i; > + iopte_t *base; > + unsigned long flags, i, npages; > + u32 bus_addr; > > - /* Fast path single entry scatterlists. */ > - if (nents == 1) { > - sbus_unmap_single(sdev, sg->dma_address, > sg->dma_length, direction); > - return; > - } > + if (unlikely(direction == SBUS_DMA_NONE)) > + BUG(); > + > + iommu = sdev->bus->iommu; > + > + bus_addr = sglist->dma_address & IO_PAGE_MASK; > > - dvma_base = sg[0].dma_address & IO_PAGE_MASK; > - for (i = 0; i < nents; i++) { > - if (sg[i].dma_length == 0) > + for (i = 1; i < nelems; i++) > + if (sglist[i].dma_length == 0) > break; > - } > i--; > - size = IO_PAGE_ALIGN(sg[i].dma_address + > sg[i].dma_length) - dvma_base; > + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + > sglist[i].dma_length) - > + bus_addr) >> IO_PAGE_SHIFT; > + > + base = iommu->page_table + > + ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT); > > - iommu = sdev->bus->iommu; > spin_lock_irqsave(&iommu->lock, flags); > - free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT); > - sbus_strbuf_flush(iommu, dvma_base, size >> > IO_PAGE_SHIFT, direction); > + sbus_strbuf_flush(iommu, bus_addr, npages, direction); > + for (i = 0; i < npages; i++) > + iopte_val(base[i]) = 0UL; > + free_npages(iommu, bus_addr - MAP_BASE, npages); > spin_unlock_irqrestore(&iommu->lock, flags); } > > -void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, > dma_addr_t base, size_t size, int direction) > +void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t > +bus_addr, size_t sz, int direction) > { > - struct sbus_iommu *iommu = sdev->bus->iommu; > - unsigned long flags; > + struct sbus_iommu *iommu; > + unsigned long flags, npages; > + > + iommu = sdev->bus->iommu; > > - size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK)); > + npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & > IO_PAGE_MASK); > + npages >>= IO_PAGE_SHIFT; > + bus_addr &= IO_PAGE_MASK; > > spin_lock_irqsave(&iommu->lock, flags); > - sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> > IO_PAGE_SHIFT, direction); > + sbus_strbuf_flush(iommu, bus_addr, npages, direction); > spin_unlock_irqrestore(&iommu->lock, flags); } > > @@ -613,23 +535,25 @@ void > sbus_dma_sync_single_for_device(struct sbus_dev *sdev, > dma_addr_t base, siz { } > > -void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct > scatterlist *sg, int nents, int direction) > +void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct > scatterlist > +*sglist, int nelems, int direction) > { > - struct sbus_iommu *iommu = sdev->bus->iommu; > - unsigned long flags, size; > - u32 base; > - int i; > + struct sbus_iommu *iommu; > + unsigned long flags, npages, i; > + u32 bus_addr; > + > + iommu = sdev->bus->iommu; > > - base = sg[0].dma_address & IO_PAGE_MASK; > - for (i = 0; i < nents; i++) { > - if (sg[i].dma_length == 0) > + bus_addr = sglist[0].dma_address & IO_PAGE_MASK; > + for (i = 0; i < nelems; i++) { > + if (!sglist[i].dma_length) > break; > } > i--; > - size = IO_PAGE_ALIGN(sg[i].dma_address + > sg[i].dma_length) - base; > + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + > sglist[i].dma_length) > + - bus_addr) >> IO_PAGE_SHIFT; > > spin_lock_irqsave(&iommu->lock, flags); > - sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, > direction); > + sbus_strbuf_flush(iommu, bus_addr, npages, direction); > spin_unlock_irqrestore(&iommu->lock, flags); } > > @@ -1104,7 +1028,7 @@ static void __init sbus_iommu_init(int > __node, struct sbus_bus *sbus) > struct linux_prom64_registers *pr; > struct device_node *dp; > struct sbus_iommu *iommu; > - unsigned long regs, tsb_base; > + unsigned long regs; > u64 control; > int i; > > @@ -1132,14 +1056,6 @@ static void __init sbus_iommu_init(int > __node, struct sbus_bus *sbus) > > memset(iommu, 0, sizeof(*iommu)); > > - /* We start with no consistent mappings. */ > - iommu->lowest_consistent_map = CLUSTER_NPAGES; > - > - for (i = 0; i < NCLUSTERS; i++) { > - iommu->alloc_info[i].flush = 0; > - iommu->alloc_info[i].next = 0; > - } > - > /* Setup spinlock. */ > spin_lock_init(&iommu->lock); > > @@ -1159,25 +1075,13 @@ static void __init > sbus_iommu_init(int __node, struct sbus_bus *sbus) > sbus->portid, regs); > > /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ > + sbus_iommu_table_init(iommu, IO_TSB_SIZE); > + > control = upa_readq(iommu->iommu_regs + IOMMU_CONTROL); > control = ((7UL << 16UL) | > (0UL << 2UL) | > (1UL << 1UL) | > (1UL << 0UL)); > - > - /* Using the above configuration we need 1MB iommu page > - * table (128K ioptes * 8 bytes per iopte). This is > - * page order 7 on UltraSparc. > - */ > - tsb_base = __get_free_pages(GFP_ATOMIC, get_order(IO_TSB_SIZE)); > - if (tsb_base == 0UL) { > - prom_printf("sbus_iommu_init: Fatal error, > cannot alloc TSB table.\n"); > - prom_halt(); > - } > - > - iommu->page_table = (iopte_t *) tsb_base; > - memset(iommu->page_table, 0, IO_TSB_SIZE); > - > upa_writeq(control, iommu->iommu_regs + IOMMU_CONTROL); > > /* Clean out any cruft in the IOMMU using @@ -1195,7 > +1099,7 @@ static void __init sbus_iommu_init(int __node, > struct sbus_bus *sbus) > upa_readq(iommu->sbus_control_reg); > > /* Give the TSB to SYSIO. */ > - upa_writeq(__pa(tsb_base), iommu->iommu_regs + IOMMU_TSBBASE); > + upa_writeq(__pa(iommu->page_table), iommu->iommu_regs + > +IOMMU_TSBBASE); > > /* Setup streaming buffer, DE=1 SB_EN=1 */ > control = (1UL << 1UL) | (1UL << 0UL); > - > To unsubscribe from this list: send the line "unsubscribe > sparclinux" in the body of a message to > majordomo@×××××××××××.org More majordomo info at > http://vger.kernel.org/majordomo-info.html > > Regards, > -- > Ferris McCormick (P44646, MI) <fmccor@g.o> Developer, > Gentoo Linux (Devrel, Sparc) > >
-- gentoo-sparc@g.o mailing list

Replies