At Thu, 21 Aug 2008 08:55:12 -0500,
James Bottomley wrote:
>
> On Thu, 2008-08-21 at 12:19 +0200, Takashi Iwai wrote:
> > At Wed, 20 Aug 2008 12:58:08 -0500,
> > James Bottomley wrote:
> > >
> > > On Wed, 2008-08-20 at 18:53 +0200, Takashi Iwai wrote:
> > > > > I'm afraid there are several problems. The first is that it doesn't
> > > > > do
> > > > > what you want. You can't map a coherent page to userspace (which is
> > > > > at
> > > > > a non congruent address on parisc) and still expect it to be
> > > > > coherent ... there's going to have to be fiddling with the page table
> > > > > caches to make sure coherency isn't destroyed by aliasing effects
> > > >
> > > > Hmm... how bad would be the coherency with such a simple mmap method?
> > > > In most cases, we don't need the "perfect" coherency. Usually one
> > > > process mmaps the whole buffer and keep reading/writing. There is
> > > > another use case (sharing the mmapped buffer by multiple processes),
> > > > but this can be disabled if we know it's not feasible beforehand.
> > >
> > > Unfortunately, the incoherency is between the user and the kernel.
> > > That's where the aliasing effects occur, so realistically, even though
> > > you've mapped coherent memory to the user, the coherency of that memory
> > > is only device <-> kernel. When the any single user space process
> > > writes to it, the device won't see the write unless the user issues a
> > > flush.
> >
> > I see. In the case of ALSA mmap mode, a user issues an ioctl to
> > notify after the read/write access, so it'd be relatively easy to add
> > a sync operation.
> >
> > Does the call of dma_sync_*_for_device() suffice for that purpose?
>
> No ... dma_sync_* sync's from the kernel to the device ... you don't
> need that if the memory is already coherent.
>
> The problem is that the data you want the device to see is held in a
> cache above the user space mapping ... it's that cache that has to be
> flushed (i.e. you need to flush through the user mappings, not through
> the kernel ones).
>
> > (BTW, how does the fb driver work on this?)
>
> It sets the shared page to uncached on all its mappings. Turning off
> caching (assuming the platform can do it ... not all can) is a good way
> to eliminate aliasing issues.
Thanks for clarification.
How about the revised patch below (for PARISC)?
Takashi
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index ccd61b9..680b075 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -545,6 +545,20 @@ static void pa11_dma_sync_sg_for_device(struct device
*dev, struct scatterlist *
flush_kernel_dcache_range(sg_virt_addr(sglist), sglist->length);
}
+static int pa11_dma_mmap_coherent(struct device *dev,
+ struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t handle,
+ size_t size)
+{
+ struct page *pg;
+ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+ cpu_addr = __va(handle);
+ pg = virt_to_page(cpu_addr);
+ return remap_pfn_range(vma, vma->vm_start,
+ page_to_pfn(pg) + vma->vm_pgoff,
+ size, vma->vm_page_prot);
+}
+
struct hppa_dma_ops pcxl_dma_ops = {
.dma_supported = pa11_dma_supported,
.alloc_consistent = pa11_dma_alloc_consistent,
@@ -558,6 +572,7 @@ struct hppa_dma_ops pcxl_dma_ops = {
.dma_sync_single_for_device = pa11_dma_sync_single_for_device,
.dma_sync_sg_for_cpu = pa11_dma_sync_sg_for_cpu,
.dma_sync_sg_for_device = pa11_dma_sync_sg_for_device,
+ .mmap_coherent = pa11_dma_mmap_coherent,
};
static void *fail_alloc_consistent(struct device *dev, size_t size,
@@ -598,4 +613,5 @@ struct hppa_dma_ops pcx_dma_ops = {
.dma_sync_single_for_device = pa11_dma_sync_single_for_device,
.dma_sync_sg_for_cpu = pa11_dma_sync_sg_for_cpu,
.dma_sync_sg_for_device = pa11_dma_sync_sg_for_device,
+ .mmap_coherent = pa11_dma_mmap_coherent,
};
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b30e38f..dd2ab2c 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1014,6 +1014,19 @@ ccio_unmap_sg(struct device *dev, struct scatterlist
*sglist, int nents,
DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents);
}
+static int ccio_dma_mmap_coherent(struct device *dev,
+ struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t handle,
+ size_t size)
+{
+ struct page *pg;
+ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+ pg = virt_to_page(cpu_addr);
+ return remap_pfn_range(vma, vma->vm_start,
+ page_to_pfn(pg) + vma->vm_pgoff,
+ size, vma->vm_page_prot);
+}
+
static struct hppa_dma_ops ccio_ops = {
.dma_supported = ccio_dma_supported,
.alloc_consistent = ccio_alloc_consistent,
@@ -1027,6 +1040,7 @@ static struct hppa_dma_ops ccio_ops = {
.dma_sync_single_for_device = NULL, /* NOP for U2/Uturn */
.dma_sync_sg_for_cpu = NULL, /* ditto */
.dma_sync_sg_for_device = NULL, /* ditto */
+ .mmap_coherent = ccio_dma_mmap_coherent,
};
#ifdef CONFIG_PROC_FS
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index bc73b96..403d66d 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1057,6 +1057,19 @@ sba_unmap_sg(struct device *dev, struct scatterlist
*sglist, int nents,
}
+static int sba_dma_mmap_coherent(struct device *dev,
+ struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t handle,
+ size_t size)
+{
+ struct page *pg;
+ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+ pg = virt_to_page(cpu_addr);
+ return remap_pfn_range(vma, vma->vm_start,
+ page_to_pfn(pg) + vma->vm_pgoff,
+ size, vma->vm_page_prot);
+}
+
static struct hppa_dma_ops sba_ops = {
.dma_supported = sba_dma_supported,
.alloc_consistent = sba_alloc_consistent,
@@ -1070,6 +1083,7 @@ static struct hppa_dma_ops sba_ops = {
.dma_sync_single_for_device = NULL,
.dma_sync_sg_for_cpu = NULL,
.dma_sync_sg_for_device = NULL,
+ .mmap_coherent = sba_dma_mmap_coherent,
};
diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h
index 53af696..5b357b3 100644
--- a/include/asm-parisc/dma-mapping.h
+++ b/include/asm-parisc/dma-mapping.h
@@ -19,6 +19,9 @@ struct hppa_dma_ops {
void (*dma_sync_single_for_device)(struct device *dev, dma_addr_t iova,
unsigned long offset, size_t size, enum dma_data_direction direction);
void (*dma_sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction direction);
void (*dma_sync_sg_for_device)(struct device *dev, struct scatterlist
*sg, int nelems, enum dma_data_direction direction);
+ int (*mmap_coherent)(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t handle, size_t size);
+
};
/*
@@ -204,6 +207,15 @@ dma_cache_sync(struct device *dev, void *vaddr, size_t
size,
flush_kernel_dcache_range((unsigned long)vaddr, size);
}
+static inline int
+dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t handle, size_t size)
+{
+ if (!hppa_dma_ops->mmap_coherent)
+ return -ENXIO;
+ return hppa_dma_ops->mmap_coherent(dev, vma, cpu_addr, handle, size);
+}
+
static inline void *
parisc_walk_tree(struct device *dev)
{
|