Sophie

Sophie

distrib > Scientific%20Linux > 5x > i386 > by-pkgid > 351d529f9beeb4e5d936a6d5e3e7813a > files > 1909

kernel-2.6.18-128.29.1.el5.src.rpm

From: Scott Moser <smoser@redhat.com>
Subject: [RHEL5.1 PATCH] bz228081 1/7 [PPC] Cell SPE and Performance
Date: Mon, 7 May 2007 11:03:23 -0400 (EDT)
Bugzilla: 228128
Message-Id: <200705071503.l47F3NDu015038@mail.boston.redhat.com>
Changelog: [PPC64] Cell SPE and Performance


RHBZ#: 228128 [FEATURE] Cell SPE Exploitation and Performance
------
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=228128

Description:
------------
This is prerequisite #1 for the update to 2.6.21 level of spufs.

It adds the vm_insert_pfn() function to the core mm which was added during
the 2.6.21 dev. cycle.  This is just a new helper function that is used by
spufs, there should be no side effect to this patch.

This is needed as part of the requested feature.

RHEL Version Found:
-------------------
requested as a feature for 5.1

Upstream Status:
----------------
Gitweb:     http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e0dc0d8f4a327d033bfb63d43f113d5f31d11b3c
Commit:     e0dc0d8f4a327d033bfb63d43f113d5f31d11b3c
Parent:     2ca48ed5cc5935cbd2a6f5d14fecd4ddbbdb4315
Author:     Nick Piggin <npiggin@suse.de>
AuthorDate: Mon Feb 12 00:51:36 2007 -0800
Committer:  Linus Torvalds <torvalds@woody.linux-foundation.org>
CommitDate: Mon Feb 12 09:48:27 2007 -0800

    [PATCH] add vm_insert_pfn()
    
    Add a vm_insert_pfn helper, so that ->fault handlers can have nopfn
    functionality by installing their own pte and returning NULL.
    
    Signed-off-by: Nick Piggin <npiggin@suse.de>
    Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
    Cc: Arnd Bergmann <arnd.bergmann@de.ibm.com>
    Cc: Hugh Dickins <hugh@veritas.com>
    Cc: Christoph Hellwig <hch@lst.de>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>


Test Status:
------------
A brew scratch build of kernel-2.6.18-17.el5 with this patch applied
is available at http://brewweb.devel.redhat.com/brew/taskinfo?taskID=773808 .

These changes are part of the upstream Cell SDK, and will be tested by IBM
with the Cell SPUFS test suite.

Proposed Patch:
----------------
Please review and ACK for RHEL5.1

-- 
 include/linux/mm.h |    2 ++
 mm/memory.c        |   45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

Index: b/include/linux/mm.h
===================================================================
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1080,6 +1080,8 @@ unsigned long vmalloc_to_pfn(void *addr)
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
+int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn);
 
 struct page *follow_page(struct vm_area_struct *, unsigned long address,
 			unsigned int foll_flags);
Index: b/mm/memory.c
===================================================================
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1277,6 +1277,51 @@ int vm_insert_page(struct vm_area_struct
 }
 EXPORT_SYMBOL(vm_insert_page);
 
+/**
+ * vm_insert_pfn - insert single pfn into user vma
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ *
+ * Similar to vm_inert_page, this allows drivers to insert individual pages
+ * they've allocated into a user vma. Same comments apply.
+ *
+ * This function should only be called from a vm_ops->fault handler, and
+ * in that case the handler should return NULL.
+ */
+int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+		unsigned long pfn)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int retval;
+	pte_t *pte, entry;
+	spinlock_t *ptl;
+
+	BUG_ON(!(vma->vm_flags & VM_PFNMAP));
+	BUG_ON(is_cow_mapping(vma->vm_flags));
+
+	retval = -ENOMEM;
+	pte = get_locked_pte(mm, addr, &ptl);
+	if (!pte)
+		goto out;
+	retval = -EBUSY;
+	if (!pte_none(*pte))
+		goto out_unlock;
+
+	/* Ok, finally just insert the thing.. */
+	entry = pfn_pte(pfn, vma->vm_page_prot);
+	set_pte_at(mm, addr, pte, entry);
+	update_mmu_cache(vma, addr, entry);
+
+	retval = 0;
+out_unlock:
+	pte_unmap_unlock(pte, ptl);
+
+out:
+	return retval;
+}
+EXPORT_SYMBOL(vm_insert_pfn);
+
 /*
  * maps a range of physical memory into the requested pages. the old
  * mappings are removed. any references to nonexistent pages results

RHBZ#: 228128 [FEATURE]
------
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=228128

Description:
------------
This is prerequisite #2 for the update to 2.6.21 level of spufs.

It adds support for having the vm operations' nopfn() function return a
special result code of NOPFN_REFAULT to trigger a replay of the faulting
instruction.

This patch should have no adverse side effect and doesn't remove or modify
an existing ABI or data structure.

RHEL Version Found:
-------------------
requested as a feature for 5.1

Upstream Status:
----------------
Gitweb:     http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=22cd25ed31bbf849acaa06ab220dc4f526153f13
Commit:     22cd25ed31bbf849acaa06ab220dc4f526153f13
Parent:     e0dc0d8f4a327d033bfb63d43f113d5f31d11b3c
Author:     Benjamin Herrenschmidt <benh@kernel.crashing.org>
AuthorDate: Mon Feb 12 00:51:38 2007 -0800
Committer:  Linus Torvalds <torvalds@woody.linux-foundation.org>
CommitDate: Mon Feb 12 09:48:27 2007 -0800

    [PATCH] Add NOPFN_REFAULT result from vm_ops->nopfn()
    
    Add a NOPFN_REFAULT return code for vm_ops->nopfn() equivalent to
    NOPAGE_REFAULT for vmops->nopage() indicating that the handler requests a
    re-execution of the faulting instruction
    
    Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
    Cc: Arnd Bergmann <arnd.bergmann@de.ibm.com>
    Cc: Hugh Dickins <hugh@veritas.com>
    Cc: Christoph Hellwig <hch@lst.de>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Test Status:
------------
A brew scratch build of kernel-2.6.18-17.el5 with this patch applied
is available at http://brewweb.devel.redhat.com/brew/taskinfo?taskID=773808 .

These changes are part of the upstream Cell SDK, and will be tested by IBM
with the Cell SPUFS test suite.

Proposed Patch:
----------------
Please review and ACK for RHEL5.1

-- 
 include/linux/mm.h |    1 +
 mm/memory.c        |    6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

Index: b/include/linux/mm.h
===================================================================
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -635,6 +635,7 @@ static inline int page_mapped(struct pag
  */
 #define NOPFN_SIGBUS	((unsigned long) -1)
 #define NOPFN_OOM	((unsigned long) -2)
+#define NOPFN_REFAULT	((unsigned long) -3)
 
 /*
  * Different kinds of faults, as returned by handle_mm_fault().
Index: b/mm/memory.c
===================================================================
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2435,10 +2435,12 @@ static noinline int do_no_pfn(struct mm_
 	BUG_ON(is_cow_mapping(vma->vm_flags));
 
 	pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
-	if (pfn == NOPFN_OOM)
+	if (unlikely(pfn == NOPFN_OOM))
 		return VM_FAULT_OOM;
-	if (pfn == NOPFN_SIGBUS)
+	else if (unlikely(pfn == NOPFN_SIGBUS))
 		return VM_FAULT_SIGBUS;
+	else if (unlikely(pfn == NOPFN_REFAULT))
+		return VM_FAULT_MINOR;
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 

RHBZ#: 228128 [FEATURE]
------
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=228128

Description:
------------
Prereq. #3 - Add support for extra hook in core dumps

This is prerequisite #3 for the update to 2.6.21 level of spufs.

It adds hooks to the ELF core dump code that can be used by the platform
to write additional notes to the resulting core.  spufs uses that to dump
the SPU context state to the core dump of applications using it.

RHEL Version Found:
-------------------
requested as a feature for 5.1

Upstream Status:
----------------
Portions of git commit bf1ab978be2318c5a564de9aa0f1a217b44170d4
http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=bf1ab978be2318c5a564de9aa0f1a217b44170d4

Author: Dwayne Grant McConnell <decimal@us.ibm.com>

    [POWERPC] coredump: Add SPU elf notes to coredump.

    ...<snip>...

    A new macro, ARCH_HAVE_EXTRA_NOTES, was created for architectures to
    specify they have extra elf core notes.
    
    A new macro, ELF_CORE_EXTRA_NOTES_SIZE, was created so the size of the
    additional notes could be calculated and added to the notes phdr
entry.

    ...<snip>...

    Cc: <linux-arch@vger.kernel.org>
    Signed-off-by: Dwayne Grant McConnell <decimal@us.ibm.com>
    Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>

Test Status:
A brew scratch build of kernel-2.6.18-17.el5 with this patch applied
is available at http://brewweb.devel.redhat.com/brew/taskinfo?taskID=773808 .
------------
These changes are part of the upstream Cell SDK, and will be tested by IBM
with the Cell SPUFS test suite.

Proposed Patch:
----------------
Please review and ACK for RHEL5.1

-- 
 fs/binfmt_elf.c     |    8 ++++++++
 include/linux/elf.h |    8 +++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

Index: b/fs/binfmt_elf.c
===================================================================
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1648,6 +1648,10 @@ static int elf_core_dump(long signr, str
 		
 		sz += thread_status_size;
 
+#ifdef ELF_CORE_WRITE_EXTRA_NOTES
+		sz += ELF_CORE_EXTRA_NOTES_SIZE;
+#endif
+
 		fill_elf_note_phdr(&phdr, sz, offset);
 		offset += sz;
 		DUMP_WRITE(&phdr, sizeof(phdr));
@@ -1689,6 +1693,10 @@ static int elf_core_dump(long signr, str
 		if (!writenote(notes + i, file))
 			goto end_coredump;
 
+#ifdef ELF_CORE_WRITE_EXTRA_NOTES
+	ELF_CORE_WRITE_EXTRA_NOTES;
+#endif
+
 	/* write out the thread status notes section */
 	list_for_each(t, &thread_list) {
 		struct elf_thread_status *tmp =
Index: b/include/linux/elf.h
===================================================================
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -354,8 +354,14 @@ typedef struct elf64_shdr {
 #define NT_AUXV		6
 #define NT_PRXFPREG     0x46e62b7f      /* copied from gdb5.1/include/elf/common.h */
 
-
 #ifndef __ASSEMBLY__
+#ifndef ARCH_HAVE_EXTRA_ELF_NOTES
+static inline int arch_notes_size(void) { return 0; }
+static inline void arch_write_notes(struct file *file) { }
+
+#define ELF_CORE_EXTRA_NOTES_SIZE arch_notes_size()
+#define ELF_CORE_WRITE_EXTRA_NOTES arch_write_notes(file)
+#endif /* ARCH_HAVE_EXTRA_ELF_NOTES */
 
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {

RHBZ#: 228128 [FEATURE]
------
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=228128

Description:
------------
This backports spufs from kernel 2.6.21 (final)

Note that the version of spufs in 2.6.21 is -not- the latest version we
have in the cell SDK2.1. The later had additional fixes along with support
for affinity. Subsequently provided is a separate patch that applies on
-top- of the 2.6.21 backport that provides the SDK2.1 level of
functionality so it can be audited separately.

RHEL Version Found:
-------------------
requested as a feature for 5.1

Upstream Status:
----------------
portions of several upstream git commits are contained in the backport,
including:
  6263203ed6e9ff107129a1ebe613290b342a4465
    [POWERPC] spufs: Add infrastructure needed for gang scheduling
  bf1ab978be2318c5a564de9aa0f1a217b44170d4
    [POWERPC] coredump: Add SPU elf notes to coredump.
  5737edd1ddbde5ab7f63bb3cb36015edbdb7c295
    [POWERPC] spufs: add support for nonschedulable contexts
  eb758ce5b0d84e13cb643b6cc7cb429f6fa28258
    [POWERPC] spufs: "stautus" isnt a word.
  b9e3bd774bb1a90fee9b90f461a51e4ba295fe6d
    [POWERPC] spufs: Add /lslr, /dma_info and /proxydma files
  e28b003136b5b2f10c25b49c32df9b7742550c23
    [POWERPC] cell: abstract spu management routines


Test Status:
------------
A brew scratch build of kernel-2.6.18-17.el5 with this patch applied
is available at http://brewweb.devel.redhat.com/brew/taskinfo?taskID=773808 .

These changes are part of the upstream Cell SDK, and will be tested by IBM
with the Cell SPUFS test suite.

Proposed Patch:
----------------
Please review and ACK for RHEL5.1

-- 
 arch/powerpc/mm/hash_utils_64.c                 |   10 
 arch/powerpc/mm/hugetlbpage.c                   |    4 
 arch/powerpc/platforms/cell/Makefile            |    7 
 arch/powerpc/platforms/cell/setup.c             |    3 
 arch/powerpc/platforms/cell/spu_base.c          |  424 ++++-----
 arch/powerpc/platforms/cell/spu_coredump.c      |   81 +
 arch/powerpc/platforms/cell/spu_manage.c        |  366 ++++++++
 arch/powerpc/platforms/cell/spu_priv1_mmio.c    |   19 
 arch/powerpc/platforms/cell/spu_priv1_mmio.h    |   26 
 arch/powerpc/platforms/cell/spufs/Makefile      |    4 
 arch/powerpc/platforms/cell/spufs/backing_ops.c |   31 
 arch/powerpc/platforms/cell/spufs/context.c     |  144 ++-
 arch/powerpc/platforms/cell/spufs/coredump.c    |  238 +++++
 arch/powerpc/platforms/cell/spufs/file.c        | 1035 ++++++++++++++++++------
 arch/powerpc/platforms/cell/spufs/gang.c        |   81 +
 arch/powerpc/platforms/cell/spufs/hw_ops.c      |   55 -
 arch/powerpc/platforms/cell/spufs/inode.c       |  282 +++++-
 arch/powerpc/platforms/cell/spufs/run.c         |  205 ++++
 arch/powerpc/platforms/cell/spufs/sched.c       |  611 +++++++-------
 arch/powerpc/platforms/cell/spufs/spufs.h       |   96 +-
 arch/powerpc/platforms/cell/spufs/switch.c      |  134 +--
 arch/powerpc/platforms/cell/spufs/syscalls.c    |    9 
 include/asm-powerpc/elf.h                       |   13 
 include/asm-powerpc/spu.h                       |   89 +-
 include/asm-powerpc/spu_csa.h                   |    5 
 include/asm-powerpc/spu_info.h                  |   54 +
 include/asm-powerpc/spu_priv1.h                 |   48 -
 27 files changed, 3047 insertions(+), 1027 deletions(-)

Index: b/arch/powerpc/platforms/cell/spufs/Makefile
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,8 +1,8 @@
 obj-y += switch.o
 
 obj-$(CONFIG_SPU_FS) += spufs.o
-spufs-y += inode.o file.o context.o syscalls.o
-spufs-y += sched.o backing_ops.o hw_ops.o run.o
+spufs-y += inode.o file.o context.o syscalls.o coredump.o
+spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
 
 # Rules to build switch.o with the help of SPU tool chain
 SPU_CROSS	:= spu-
Index: b/arch/powerpc/platforms/cell/spufs/backing_ops.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -36,6 +36,7 @@
 #include <asm/io.h>
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
+#include <asm/spu_info.h>
 #include <asm/mmu_context.h>
 #include "spufs.h"
 
@@ -267,6 +268,11 @@ static char *spu_backing_get_ls(struct s
 	return ctx->csa.lscsa->ls;
 }
 
+static u32 spu_backing_runcntl_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_runcntl_RW;
+}
+
 static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
 {
 	spin_lock(&ctx->csa.register_lock);
@@ -279,9 +285,26 @@ static void spu_backing_runcntl_write(st
 	spin_unlock(&ctx->csa.register_lock);
 }
 
-static void spu_backing_runcntl_stop(struct spu_context *ctx)
+static void spu_backing_master_start(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static void spu_backing_master_stop(struct spu_context *ctx)
 {
-	spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
 }
 
 static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
@@ -345,8 +368,10 @@ struct spu_context_ops spu_backing_ops =
 	.npc_write = spu_backing_npc_write,
 	.status_read = spu_backing_status_read,
 	.get_ls = spu_backing_get_ls,
+	.runcntl_read = spu_backing_runcntl_read,
 	.runcntl_write = spu_backing_runcntl_write,
-	.runcntl_stop = spu_backing_runcntl_stop,
+	.master_start = spu_backing_master_start,
+	.master_stop = spu_backing_master_stop,
 	.set_mfc_query = spu_backing_set_mfc_query,
 	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
 	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
Index: b/arch/powerpc/platforms/cell/spufs/context.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -27,7 +27,7 @@
 #include <asm/spu_csa.h>
 #include "spufs.h"
 
-struct spu_context *alloc_spu_context(void)
+struct spu_context *alloc_spu_context(struct spu_gang *gang)
 {
 	struct spu_context *ctx;
 	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
@@ -42,7 +42,7 @@ struct spu_context *alloc_spu_context(vo
 	}
 	spin_lock_init(&ctx->mmio_lock);
 	kref_init(&ctx->kref);
-	init_rwsem(&ctx->state_sema);
+	mutex_init(&ctx->state_mutex);
 	init_MUTEX(&ctx->run_sema);
 	init_waitqueue_head(&ctx->ibox_wq);
 	init_waitqueue_head(&ctx->wbox_wq);
@@ -51,6 +51,12 @@ struct spu_context *alloc_spu_context(vo
 	ctx->state = SPU_STATE_SAVED;
 	ctx->ops = &spu_backing_ops;
 	ctx->owner = get_task_mm(current);
+	if (gang)
+		spu_gang_add_ctx(gang, ctx);
+	ctx->rt_priority = current->rt_priority;
+	ctx->policy = current->policy;
+	ctx->prio = current->prio;
+	INIT_WORK(&ctx->sched_work, spu_sched_tick, ctx);
 	goto out;
 out_free:
 	kfree(ctx);
@@ -63,10 +69,12 @@ void destroy_spu_context(struct kref *kr
 {
 	struct spu_context *ctx;
 	ctx = container_of(kref, struct spu_context, kref);
-	down_write(&ctx->state_sema);
+	mutex_lock(&ctx->state_mutex);
 	spu_deactivate(ctx);
-	up_write(&ctx->state_sema);
+	mutex_unlock(&ctx->state_mutex);
 	spu_fini_csa(&ctx->csa);
+	if (ctx->gang)
+		spu_gang_remove_ctx(ctx->gang, ctx);
 	kfree(ctx);
 }
 
@@ -92,80 +100,102 @@ void spu_forget(struct spu_context *ctx)
 	spu_release(ctx);
 }
 
-void spu_acquire(struct spu_context *ctx)
-{
-	down_read(&ctx->state_sema);
-}
-
-void spu_release(struct spu_context *ctx)
-{
-	up_read(&ctx->state_sema);
-}
-
 void spu_unmap_mappings(struct spu_context *ctx)
 {
 	if (ctx->local_store)
 		unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
 	if (ctx->mfc)
-		unmap_mapping_range(ctx->mfc, 0, 0x4000, 1);
+		unmap_mapping_range(ctx->mfc, 0, 0x1000, 1);
 	if (ctx->cntl)
-		unmap_mapping_range(ctx->cntl, 0, 0x4000, 1);
+		unmap_mapping_range(ctx->cntl, 0, 0x1000, 1);
 	if (ctx->signal1)
-		unmap_mapping_range(ctx->signal1, 0, 0x4000, 1);
+		unmap_mapping_range(ctx->signal1, 0, PAGE_SIZE, 1);
 	if (ctx->signal2)
-		unmap_mapping_range(ctx->signal2, 0, 0x4000, 1);
-}
-
-int spu_acquire_runnable(struct spu_context *ctx)
-{
-	int ret = 0;
-
-	down_read(&ctx->state_sema);
-	if (ctx->state == SPU_STATE_RUNNABLE) {
-		ctx->spu->prio = current->prio;
-		return 0;
-	}
-	up_read(&ctx->state_sema);
-
-	down_write(&ctx->state_sema);
-	/* ctx is about to be freed, can't acquire any more */
-	if (!ctx->owner) {
-		ret = -EINVAL;
-		goto out;
-	}
+		unmap_mapping_range(ctx->signal2, 0, PAGE_SIZE, 1);
+	if (ctx->mss)
+		unmap_mapping_range(ctx->mss, 0, 0x1000, 1);
+	if (ctx->psmap)
+		unmap_mapping_range(ctx->psmap, 0, 0x20000, 1);
+}
+
+/**
+ * spu_acquire_exclusive - lock spu contex and protect against userspace access
+ * @ctx:	spu contex to lock
+ *
+ * Note:
+ *	Returns 0 and with the context locked on success
+ *	Returns negative error and with the context _unlocked_ on failure.
+ */
+int spu_acquire_exclusive(struct spu_context *ctx)
+{
+	int ret = -EINVAL;
+
+	spu_acquire(ctx);
+	/*
+	 * Context is about to be freed, so we can't acquire it anymore.
+	 */
+	if (!ctx->owner)
+		goto out_unlock;
 
 	if (ctx->state == SPU_STATE_SAVED) {
 		ret = spu_activate(ctx, 0);
 		if (ret)
-			goto out;
-		ctx->state = SPU_STATE_RUNNABLE;
+			goto out_unlock;
+	} else {
+		/*
+		 * We need to exclude userspace access to the context.
+		 *
+		 * To protect against memory access we invalidate all ptes
+		 * and make sure the pagefault handlers block on the mutex.
+		 */
+		spu_unmap_mappings(ctx);
 	}
 
-	downgrade_write(&ctx->state_sema);
-	/* On success, we return holding the lock */
-
-	return ret;
-out:
-	/* Release here, to simplify calling code. */
-	up_write(&ctx->state_sema);
+	return 0;
 
+ out_unlock:
+	spu_release(ctx);
 	return ret;
 }
 
-void spu_acquire_saved(struct spu_context *ctx)
+/**
+ * spu_acquire_runnable - lock spu contex and make sure it is in runnable state
+ * @ctx:	spu contex to lock
+ *
+ * Note:
+ *	Returns 0 and with the context locked on success
+ *	Returns negative error and with the context _unlocked_ on failure.
+ */
+int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags)
 {
-	down_read(&ctx->state_sema);
+	int ret = -EINVAL;
 
-	if (ctx->state == SPU_STATE_SAVED)
-		return;
+	spu_acquire(ctx);
+	if (ctx->state == SPU_STATE_SAVED) {
+		/*
+		 * Context is about to be freed, so we can't acquire it anymore.
+		 */
+		if (!ctx->owner)
+			goto out_unlock;
+		ret = spu_activate(ctx, flags);
+		if (ret)
+			goto out_unlock;
+	}
 
-	up_read(&ctx->state_sema);
-	down_write(&ctx->state_sema);
+	return 0;
 
-	if (ctx->state == SPU_STATE_RUNNABLE) {
-		spu_deactivate(ctx);
-		ctx->state = SPU_STATE_SAVED;
-	}
+ out_unlock:
+	spu_release(ctx);
+	return ret;
+}
 
-	downgrade_write(&ctx->state_sema);
+/**
+ * spu_acquire_saved - lock spu contex and make sure it is in saved state
+ * @ctx:	spu contex to lock
+ */
+void spu_acquire_saved(struct spu_context *ctx)
+{
+	spu_acquire(ctx);
+	if (ctx->state != SPU_STATE_SAVED)
+		spu_deactivate(ctx);
 }
Index: b/arch/powerpc/platforms/cell/spufs/coredump.c
===================================================================
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -0,0 +1,238 @@
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/elf.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+struct spufs_ctx_info {
+	struct list_head list;
+	int dfd;
+	int memsize; /* in bytes */
+	struct spu_context *ctx;
+};
+
+static LIST_HEAD(ctx_info_list);
+
+static ssize_t do_coredump_read(int num, struct spu_context *ctx, void __user *buffer,
+				size_t size, loff_t *off)
+{
+	u64 data;
+	int ret;
+
+	if (spufs_coredump_read[num].read)
+		return spufs_coredump_read[num].read(ctx, buffer, size, off);
+
+	data = spufs_coredump_read[num].get(ctx);
+	ret = copy_to_user(buffer, &data, 8);
+	return ret ? -EFAULT : 8;
+}
+
+/*
+ * These are the only things you should do on a core-file: use only these
+ * functions to write out all the necessary info.
+ */
+static int spufs_dump_write(struct file *file, const void *addr, int nr)
+{
+	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+
+static int spufs_dump_seek(struct file *file, loff_t off)
+{
+	if (file->f_op->llseek) {
+		if (file->f_op->llseek(file, off, 0) != off)
+			return 0;
+	} else
+		file->f_pos = off;
+	return 1;
+}
+
+static void spufs_fill_memsize(struct spufs_ctx_info *ctx_info)
+{
+	struct spu_context *ctx;
+	unsigned long long lslr;
+
+	ctx = ctx_info->ctx;
+	lslr = ctx->csa.priv2.spu_lslr_RW;
+	ctx_info->memsize = lslr + 1;
+}
+
+static int spufs_ctx_note_size(struct spufs_ctx_info *ctx_info)
+{
+	int dfd, memsize, i, sz, total = 0;
+	char *name;
+	char fullname[80];
+
+	dfd = ctx_info->dfd;
+	memsize = ctx_info->memsize;
+
+	for (i = 0; spufs_coredump_read[i].name; i++) {
+		name = spufs_coredump_read[i].name;
+		sz = spufs_coredump_read[i].size;
+
+		sprintf(fullname, "SPU/%d/%s", dfd, name);
+
+		total += sizeof(struct elf_note);
+		total += roundup(strlen(fullname) + 1, 4);
+		if (!strcmp(name, "mem"))
+			total += roundup(memsize, 4);
+		else
+			total += roundup(sz, 4);
+	}
+
+	return total;
+}
+
+static int spufs_add_one_context(struct file *file, int dfd)
+{
+	struct spu_context *ctx;
+	struct spufs_ctx_info *ctx_info;
+	int size;
+
+	ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		return 0;
+
+	ctx_info = kzalloc(sizeof(*ctx_info), GFP_KERNEL);
+	if (unlikely(!ctx_info))
+		return -ENOMEM;
+
+	ctx_info->dfd = dfd;
+	ctx_info->ctx = ctx;
+
+	spufs_fill_memsize(ctx_info);
+
+	size = spufs_ctx_note_size(ctx_info);
+	list_add(&ctx_info->list, &ctx_info_list);
+	return size;
+}
+
+/*
+ * The additional architecture-specific notes for Cell are various
+ * context files in the spu context.
+ *
+ * This function iterates over all open file descriptors and sees
+ * if they are a directory in spufs.  In that case we use spufs
+ * internal functionality to dump them without needing to actually
+ * open the files.
+ */
+static int spufs_arch_notes_size(void)
+{
+	struct fdtable *fdt = files_fdtable(current->files);
+	int size = 0, fd;
+
+	for (fd = 0; fd < fdt->max_fds; fd++) {
+		if (FD_ISSET(fd, fdt->open_fds)) {
+			struct file *file = fcheck(fd);
+
+			if (file && file->f_op == &spufs_context_fops) {
+				int rval = spufs_add_one_context(file, fd);
+				if (rval < 0)
+					break;
+				size += rval;
+			}
+		}
+	}
+
+	return size;
+}
+
+static void spufs_arch_write_note(struct spufs_ctx_info *ctx_info, int i,
+				struct file *file)
+{
+	struct spu_context *ctx;
+	loff_t pos = 0;
+	int sz, dfd, rc, total = 0;
+	const int bufsz = 4096;
+	char *name;
+	char fullname[80], *buf;
+	struct elf_note en;
+
+	buf = kmalloc(bufsz, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	dfd = ctx_info->dfd;
+	name = spufs_coredump_read[i].name;
+
+	if (!strcmp(name, "mem"))
+		sz = ctx_info->memsize;
+	else
+		sz = spufs_coredump_read[i].size;
+
+	ctx = ctx_info->ctx;
+	if (!ctx) {
+		return;
+	}
+
+	sprintf(fullname, "SPU/%d/%s", dfd, name);
+	en.n_namesz = strlen(fullname) + 1;
+	en.n_descsz = sz;
+	en.n_type = NT_SPU;
+
+	if (!spufs_dump_write(file, &en, sizeof(en)))
+		return;
+	if (!spufs_dump_write(file, fullname, en.n_namesz))
+		return;
+	if (!spufs_dump_seek(file, roundup((unsigned long)file->f_pos, 4)))
+		return;
+
+	do {
+		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
+		if (rc > 0) {
+			if (!spufs_dump_write(file, buf, rc))
+				return;
+			total += rc;
+		}
+	} while (rc == bufsz && total < sz);
+
+	spufs_dump_seek(file, roundup((unsigned long)file->f_pos
+						- total + sz, 4));
+}
+
+static void spufs_arch_write_notes(struct file *file)
+{
+	int j;
+	struct spufs_ctx_info *ctx_info, *next;
+
+	list_for_each_entry_safe(ctx_info, next, &ctx_info_list, list) {
+		spu_acquire_saved(ctx_info->ctx);
+		for (j = 0; j < spufs_coredump_num_notes; j++)
+			spufs_arch_write_note(ctx_info, j, file);
+		spu_release(ctx_info->ctx);
+		list_del(&ctx_info->list);
+		kfree(ctx_info);
+	}
+}
+
+struct spu_coredump_calls spufs_coredump_calls = {
+	.arch_notes_size = spufs_arch_notes_size,
+	.arch_write_notes = spufs_arch_write_notes,
+	.owner = THIS_MODULE,
+};
Index: b/arch/powerpc/platforms/cell/spufs/file.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -32,10 +32,12 @@
 #include <asm/io.h>
 #include <asm/semaphore.h>
 #include <asm/spu.h>
+#include <asm/spu_info.h>
 #include <asm/uaccess.h>
 
 #include "spufs.h"
 
+#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
 
 static int
 spufs_mem_open(struct inode *inode, struct file *file)
@@ -43,81 +45,92 @@ spufs_mem_open(struct inode *inode, stru
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 	file->private_data = ctx;
-	file->f_mapping = inode->i_mapping;
 	ctx->local_store = inode->i_mapping;
+	smp_wmb();
 	return 0;
 }
 
 static ssize_t
+__spufs_mem_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	char *local_store = ctx->ops->get_ls(ctx);
+	return simple_read_from_buffer(buffer, size, pos, local_store,
+					LS_SIZE);
+}
+
+static ssize_t
 spufs_mem_read(struct file *file, char __user *buffer,
 				size_t size, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	char *local_store;
-	int ret;
+	ssize_t ret;
 
 	spu_acquire(ctx);
-
-	local_store = ctx->ops->get_ls(ctx);
-	ret = simple_read_from_buffer(buffer, size, pos, local_store, LS_SIZE);
-
+	ret = __spufs_mem_read(ctx, buffer, size, pos);
 	spu_release(ctx);
 	return ret;
 }
 
 static ssize_t
 spufs_mem_write(struct file *file, const char __user *buffer,
-					size_t size, loff_t *pos)
+					size_t size, loff_t *ppos)
 {
 	struct spu_context *ctx = file->private_data;
 	char *local_store;
+	loff_t pos = *ppos;
 	int ret;
 
-	size = min_t(ssize_t, LS_SIZE - *pos, size);
-	if (size <= 0)
+	if (pos < 0)
+		return -EINVAL;
+	if (pos > LS_SIZE)
 		return -EFBIG;
-	*pos += size;
+	if (size > LS_SIZE - pos)
+		size = LS_SIZE - pos;
 
 	spu_acquire(ctx);
-
 	local_store = ctx->ops->get_ls(ctx);
-	ret = copy_from_user(local_store + *pos - size,
-			     buffer, size) ? -EFAULT : size;
-
+	ret = copy_from_user(local_store + pos, buffer, size);
 	spu_release(ctx);
-	return ret;
+
+	if (ret)
+		return -EFAULT;
+	*ppos = pos + size;
+	return size;
 }
 
-#ifdef CONFIG_SPUFS_MMAP
-static struct page *
-spufs_mem_mmap_nopage(struct vm_area_struct *vma,
-		      unsigned long address, int *type)
+static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma,
+					  unsigned long address)
 {
-	struct page *page = NOPAGE_SIGBUS;
-
 	struct spu_context *ctx = vma->vm_file->private_data;
-	unsigned long offset = address - vma->vm_start;
+	unsigned long pfn, offset = address - vma->vm_start;
+
 	offset += vma->vm_pgoff << PAGE_SHIFT;
 
+	if (offset >= LS_SIZE)
+		return NOPFN_SIGBUS;
+
 	spu_acquire(ctx);
 
-	if (ctx->state == SPU_STATE_SAVED)
-		page = vmalloc_to_page(ctx->csa.lscsa->ls + offset);
-	else
-		page = pfn_to_page((ctx->spu->local_store_phys + offset)
-				   >> PAGE_SHIFT);
+	if (ctx->state == SPU_STATE_SAVED) {
+		vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+							& ~_PAGE_NO_CACHE);
+		pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset);
+	} else {
+		vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+					     | _PAGE_NO_CACHE);
+		pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
+	}
+	vm_insert_pfn(vma, address, pfn);
 
 	spu_release(ctx);
 
-	if (type)
-		*type = VM_FAULT_MINOR;
-
-	page_cache_get(page);
-	return page;
+	return NOPFN_REFAULT;
 }
 
+
 static struct vm_operations_struct spufs_mem_mmap_vmops = {
-	.nopage = spufs_mem_mmap_nopage,
+	.nopfn = spufs_mem_mmap_nopfn,
 };
 
 static int
@@ -126,125 +139,118 @@ spufs_mem_mmap(struct file *file, struct
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
-	/* FIXME: */
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
 	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
 				     | _PAGE_NO_CACHE);
 
 	vma->vm_ops = &spufs_mem_mmap_vmops;
 	return 0;
 }
-#endif
 
-static struct file_operations spufs_mem_fops = {
+static const struct file_operations spufs_mem_fops = {
 	.open	 = spufs_mem_open,
 	.read    = spufs_mem_read,
 	.write   = spufs_mem_write,
 	.llseek  = generic_file_llseek,
-#ifdef CONFIG_SPUFS_MMAP
 	.mmap    = spufs_mem_mmap,
-#endif
 };
 
-#ifdef CONFIG_SPUFS_MMAP
-static struct page *spufs_ps_nopage(struct vm_area_struct *vma,
+static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
 				    unsigned long address,
-				    int *type, unsigned long ps_offs)
+				    unsigned long ps_offs,
+				    unsigned long ps_size)
 {
-	struct page *page = NOPAGE_SIGBUS;
-	int fault_type = VM_FAULT_SIGBUS;
 	struct spu_context *ctx = vma->vm_file->private_data;
-	unsigned long offset = address - vma->vm_start;
-	unsigned long area;
+	unsigned long area, offset = address - vma->vm_start;
 	int ret;
 
 	offset += vma->vm_pgoff << PAGE_SHIFT;
-	if (offset >= 0x4000)
-		goto out;
+	if (offset >= ps_size)
+		return NOPFN_SIGBUS;
 
-	ret = spu_acquire_runnable(ctx);
+	/* error here usually means a signal.. we might want to test
+	 * the error code more precisely though
+	 */
+	ret = spu_acquire_runnable(ctx, 0);
 	if (ret)
-		goto out;
+		return NOPFN_REFAULT;
 
 	area = ctx->spu->problem_phys + ps_offs;
-	page = pfn_to_page((area + offset) >> PAGE_SHIFT);
-	fault_type = VM_FAULT_MINOR;
-	page_cache_get(page);
-
+	vm_insert_pfn(vma, address, (area + offset) >> PAGE_SHIFT);
 	spu_release(ctx);
 
-      out:
-	if (type)
-		*type = fault_type;
-
-	return page;
+	return NOPFN_REFAULT;
 }
 
-static struct page *spufs_cntl_mmap_nopage(struct vm_area_struct *vma,
-					   unsigned long address, int *type)
+#if SPUFS_MMAP_4K
+static unsigned long spufs_cntl_mmap_nopfn(struct vm_area_struct *vma,
+					   unsigned long address)
 {
-	return spufs_ps_nopage(vma, address, type, 0x4000);
+	return spufs_ps_nopfn(vma, address, 0x4000, 0x1000);
 }
 
 static struct vm_operations_struct spufs_cntl_mmap_vmops = {
-	.nopage = spufs_cntl_mmap_nopage,
+	.nopfn = spufs_cntl_mmap_nopfn,
 };
 
 /*
  * mmap support for problem state control area [0x4000 - 0x4fff].
- * Mapping this area requires that the application have CAP_SYS_RAWIO,
- * as these registers require special care when read/writing.
  */
 static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
 	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
 				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
 
 	vma->vm_ops = &spufs_cntl_mmap_vmops;
 	return 0;
 }
-#endif
+#else /* SPUFS_MMAP_4K */
+#define spufs_cntl_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
 
-static int spufs_cntl_open(struct inode *inode, struct file *file)
+static u64 spufs_cntl_get(void *data)
 {
-	struct spufs_inode_info *i = SPUFS_I(inode);
-	struct spu_context *ctx = i->i_ctx;
+	struct spu_context *ctx = data;
+	u64 val;
 
-	file->private_data = ctx;
-	file->f_mapping = inode->i_mapping;
-	ctx->cntl = inode->i_mapping;
-	return 0;
+	spu_acquire(ctx);
+	val = ctx->ops->status_read(ctx);
+	spu_release(ctx);
+
+	return val;
 }
 
-static ssize_t
-spufs_cntl_read(struct file *file, char __user *buffer,
-		size_t size, loff_t *pos)
+static void spufs_cntl_set(void *data, u64 val)
 {
-	/* FIXME: read from spu status */
-	return -EINVAL;
+	struct spu_context *ctx = data;
+
+	spu_acquire(ctx);
+	ctx->ops->runcntl_write(ctx, val);
+	spu_release(ctx);
 }
 
-static ssize_t
-spufs_cntl_write(struct file *file, const char __user *buffer,
-		 size_t size, loff_t *pos)
+static int spufs_cntl_open(struct inode *inode, struct file *file)
 {
-	/* FIXME: write to runctl bit */
-	return -EINVAL;
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	file->private_data = ctx;
+	ctx->cntl = inode->i_mapping;
+	smp_wmb();
+	return simple_attr_open(inode, file, spufs_cntl_get,
+					spufs_cntl_set, "0x%08lx");
 }
 
-static struct file_operations spufs_cntl_fops = {
+static const struct file_operations spufs_cntl_fops = {
 	.open = spufs_cntl_open,
-	.read = spufs_cntl_read,
-	.write = spufs_cntl_write,
-#ifdef CONFIG_SPUFS_MMAP
+	.release = simple_attr_close,
+	.read = simple_attr_read,
+	.write = simple_attr_write,
 	.mmap = spufs_cntl_mmap,
-#endif
 };
 
 static int
@@ -256,18 +262,23 @@ spufs_regs_open(struct inode *inode, str
 }
 
 static ssize_t
+__spufs_regs_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      lscsa->gprs, sizeof lscsa->gprs);
+}
+
+static ssize_t
 spufs_regs_read(struct file *file, char __user *buffer,
 		size_t size, loff_t *pos)
 {
-	struct spu_context *ctx = file->private_data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
 	int ret;
+	struct spu_context *ctx = file->private_data;
 
 	spu_acquire_saved(ctx);
-
-	ret = simple_read_from_buffer(buffer, size, pos,
-				      lscsa->gprs, sizeof lscsa->gprs);
-
+	ret = __spufs_regs_read(ctx, buffer, size, pos);
 	spu_release(ctx);
 	return ret;
 }
@@ -294,7 +305,7 @@ spufs_regs_write(struct file *file, cons
 	return ret;
 }
 
-static struct file_operations spufs_regs_fops = {
+static const struct file_operations spufs_regs_fops = {
 	.open	 = spufs_regs_open,
 	.read    = spufs_regs_read,
 	.write   = spufs_regs_write,
@@ -302,18 +313,23 @@ static struct file_operations spufs_regs
 };
 
 static ssize_t
+__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer,
+			size_t size, loff_t * pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      &lscsa->fpcr, sizeof(lscsa->fpcr));
+}
+
+static ssize_t
 spufs_fpcr_read(struct file *file, char __user * buffer,
 		size_t size, loff_t * pos)
 {
-	struct spu_context *ctx = file->private_data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
 	int ret;
+	struct spu_context *ctx = file->private_data;
 
 	spu_acquire_saved(ctx);
-
-	ret = simple_read_from_buffer(buffer, size, pos,
-				      &lscsa->fpcr, sizeof(lscsa->fpcr));
-
+	ret = __spufs_fpcr_read(ctx, buffer, size, pos);
 	spu_release(ctx);
 	return ret;
 }
@@ -340,7 +356,7 @@ spufs_fpcr_write(struct file *file, cons
 	return ret;
 }
 
-static struct file_operations spufs_fpcr_fops = {
+static const struct file_operations spufs_fpcr_fops = {
 	.open = spufs_regs_open,
 	.read = spufs_fpcr_read,
 	.write = spufs_fpcr_write,
@@ -356,30 +372,57 @@ static int spufs_pipe_open(struct inode 
 	return nonseekable_open(inode, file);
 }
 
+/*
+ * Read as many bytes from the mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ */
 static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	u32 mbox_data;
-	int ret;
+	u32 mbox_data, __user *udata;
+	ssize_t count;
 
 	if (len < 4)
 		return -EINVAL;
 
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	udata = (void __user *)buf;
+
 	spu_acquire(ctx);
-	ret = ctx->ops->mbox_read(ctx, &mbox_data);
+	for (count = 0; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->mbox_read(ctx, &mbox_data);
+		if (ret == 0)
+			break;
+
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(mbox_data, udata);
+		if (ret) {
+			if (!count)
+				count = -EFAULT;
+			break;
+		}
+	}
 	spu_release(ctx);
 
-	if (!ret)
-		return -EAGAIN;
+	if (!count)
+		count = -EAGAIN;
 
-	if (copy_to_user(buf, &mbox_data, sizeof mbox_data))
-		return -EFAULT;
-
-	return 4;
+	return count;
 }
 
-static struct file_operations spufs_mbox_fops = {
+static const struct file_operations spufs_mbox_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_read,
 };
@@ -405,7 +448,7 @@ static ssize_t spufs_mbox_stat_read(stru
 	return 4;
 }
 
-static struct file_operations spufs_mbox_stat_fops = {
+static const struct file_operations spufs_mbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_stat_read,
 };
@@ -432,36 +475,70 @@ void spufs_ibox_callback(struct spu *spu
 	kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN);
 }
 
+/*
+ * Read as many bytes from the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * any data is available, but return when we have been able to
+ * read something.
+ */
 static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	u32 ibox_data;
-	ssize_t ret;
+	u32 ibox_data, __user *udata;
+	ssize_t count;
 
 	if (len < 4)
 		return -EINVAL;
 
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	udata = (void __user *)buf;
+
 	spu_acquire(ctx);
 
-	ret = 0;
+	/* wait only for the first element */
+	count = 0;
 	if (file->f_flags & O_NONBLOCK) {
 		if (!spu_ibox_read(ctx, &ibox_data))
-			ret = -EAGAIN;
+			count = -EAGAIN;
 	} else {
-		ret = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+		count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
 	}
+	if (count)
+		goto out;
 
-	spu_release(ctx);
+	/* if we can't write at all, return -EFAULT */
+	count = __put_user(ibox_data, udata);
+	if (count)
+		goto out;
 
-	if (ret)
-		return ret;
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->ibox_read(ctx, &ibox_data);
+		if (ret == 0)
+			break;
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(ibox_data, udata);
+		if (ret)
+			break;
+	}
 
-	ret = 4;
-	if (copy_to_user(buf, &ibox_data, sizeof ibox_data))
-		ret = -EFAULT;
+out:
+	spu_release(ctx);
 
-	return ret;
+	return count;
 }
 
 static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait)
@@ -478,7 +555,7 @@ static unsigned int spufs_ibox_poll(stru
 	return mask;
 }
 
-static struct file_operations spufs_ibox_fops = {
+static const struct file_operations spufs_ibox_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_ibox_read,
 	.poll	= spufs_ibox_poll,
@@ -504,7 +581,7 @@ static ssize_t spufs_ibox_stat_read(stru
 	return 4;
 }
 
-static struct file_operations spufs_ibox_stat_fops = {
+static const struct file_operations spufs_ibox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_ibox_stat_read,
 };
@@ -534,32 +611,67 @@ void spufs_wbox_callback(struct spu *spu
 	kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT);
 }
 
+/*
+ * Write as many bytes to the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - the mailbox is full
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * space is availabyl, but return when we have been able to
+ * write something.
+ */
 static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	u32 wbox_data;
-	int ret;
+	u32 wbox_data, __user *udata;
+	ssize_t count;
 
 	if (len < 4)
 		return -EINVAL;
 
-	if (copy_from_user(&wbox_data, buf, sizeof wbox_data))
+	udata = (void __user *)buf;
+	if (!access_ok(VERIFY_READ, buf, len))
+		return -EFAULT;
+
+	if (__get_user(wbox_data, udata))
 		return -EFAULT;
 
 	spu_acquire(ctx);
 
-	ret = 0;
+	/*
+	 * make sure we can at least write one element, by waiting
+	 * in case of !O_NONBLOCK
+	 */
+	count = 0;
 	if (file->f_flags & O_NONBLOCK) {
 		if (!spu_wbox_write(ctx, wbox_data))
-			ret = -EAGAIN;
+			count = -EAGAIN;
 	} else {
-		ret = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+		count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
 	}
 
-	spu_release(ctx);
+	if (count)
+		goto out;
 
-	return ret ? ret : sizeof wbox_data;
+	/* write aѕ much as possible */
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = __get_user(wbox_data, udata);
+		if (ret)
+			break;
+
+		ret = spu_wbox_write(ctx, wbox_data);
+		if (ret == 0)
+			break;
+	}
+
+out:
+	spu_release(ctx);
+	return count;
 }
 
 static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait)
@@ -576,7 +688,7 @@ static unsigned int spufs_wbox_poll(stru
 	return mask;
 }
 
-static struct file_operations spufs_wbox_fops = {
+static const struct file_operations spufs_wbox_fops = {
 	.open	= spufs_pipe_open,
 	.write	= spufs_wbox_write,
 	.poll	= spufs_wbox_poll,
@@ -602,7 +714,7 @@ static ssize_t spufs_wbox_stat_read(stru
 	return 4;
 }
 
-static struct file_operations spufs_wbox_stat_fops = {
+static const struct file_operations spufs_wbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_wbox_stat_read,
 };
@@ -612,28 +724,46 @@ static int spufs_signal1_open(struct ino
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 	file->private_data = ctx;
-	file->f_mapping = inode->i_mapping;
 	ctx->signal1 = inode->i_mapping;
+	smp_wmb();
 	return nonseekable_open(inode, file);
 }
 
-static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
-	struct spu_context *ctx = file->private_data;
+	int ret = 0;
 	u32 data;
 
 	if (len < 4)
 		return -EINVAL;
 
-	spu_acquire(ctx);
-	data = ctx->ops->signal1_read(ctx);
-	spu_release(ctx);
+	if (ctx->csa.spu_chnlcnt_RW[3]) {
+		data = ctx->csa.spu_chnldata_RW[3];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
 
 	if (copy_to_user(buf, &data, 4))
 		return -EFAULT;
 
-	return 4;
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	spu_acquire_saved(ctx);
+	ret = __spufs_signal1_read(ctx, buf, len, pos);
+	spu_release(ctx);
+
+	return ret;
 }
 
 static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
@@ -657,15 +787,23 @@ static ssize_t spufs_signal1_write(struc
 	return 4;
 }
 
-#ifdef CONFIG_SPUFS_MMAP
-static struct page *spufs_signal1_mmap_nopage(struct vm_area_struct *vma,
-					      unsigned long address, int *type)
+static unsigned long spufs_signal1_mmap_nopfn(struct vm_area_struct *vma,
+					      unsigned long address)
 {
-	return spufs_ps_nopage(vma, address, type, 0x14000);
+#if PAGE_SIZE == 0x1000
+	return spufs_ps_nopfn(vma, address, 0x14000, 0x1000);
+#elif PAGE_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_nopfn(vma, address, 0x10000, 0x10000);
+#else
+#error unsupported page size
+#endif
 }
 
 static struct vm_operations_struct spufs_signal1_mmap_vmops = {
-	.nopage = spufs_signal1_mmap_nopage,
+	.nopfn = spufs_signal1_mmap_nopfn,
 };
 
 static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
@@ -673,22 +811,19 @@ static int spufs_signal1_mmap(struct fil
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
 	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
 				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
 
 	vma->vm_ops = &spufs_signal1_mmap_vmops;
 	return 0;
 }
-#endif
 
-static struct file_operations spufs_signal1_fops = {
+static const struct file_operations spufs_signal1_fops = {
 	.open = spufs_signal1_open,
 	.read = spufs_signal1_read,
 	.write = spufs_signal1_write,
-#ifdef CONFIG_SPUFS_MMAP
 	.mmap = spufs_signal1_mmap,
-#endif
 };
 
 static int spufs_signal2_open(struct inode *inode, struct file *file)
@@ -696,30 +831,46 @@ static int spufs_signal2_open(struct ino
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 	file->private_data = ctx;
-	file->f_mapping = inode->i_mapping;
 	ctx->signal2 = inode->i_mapping;
+	smp_wmb();
 	return nonseekable_open(inode, file);
 }
 
-static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
-	struct spu_context *ctx;
+	int ret = 0;
 	u32 data;
 
-	ctx = file->private_data;
-
 	if (len < 4)
 		return -EINVAL;
 
-	spu_acquire(ctx);
-	data = ctx->ops->signal2_read(ctx);
-	spu_release(ctx);
+	if (ctx->csa.spu_chnlcnt_RW[4]) {
+		data =  ctx->csa.spu_chnldata_RW[4];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
 
 	if (copy_to_user(buf, &data, 4))
 		return -EFAULT;
 
-	return 4;
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	spu_acquire_saved(ctx);
+	ret = __spufs_signal2_read(ctx, buf, len, pos);
+	spu_release(ctx);
+
+	return ret;
 }
 
 static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
@@ -743,15 +894,24 @@ static ssize_t spufs_signal2_write(struc
 	return 4;
 }
 
-#ifdef CONFIG_SPUFS_MMAP
-static struct page *spufs_signal2_mmap_nopage(struct vm_area_struct *vma,
-					      unsigned long address, int *type)
-{
-	return spufs_ps_nopage(vma, address, type, 0x1c000);
+#if SPUFS_MMAP_4K
+static unsigned long spufs_signal2_mmap_nopfn(struct vm_area_struct *vma,
+					      unsigned long address)
+{
+#if PAGE_SIZE == 0x1000
+	return spufs_ps_nopfn(vma, address, 0x1c000, 0x1000);
+#elif PAGE_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_nopfn(vma, address, 0x10000, 0x10000);
+#else
+#error unsupported page size
+#endif
 }
 
 static struct vm_operations_struct spufs_signal2_mmap_vmops = {
-	.nopage = spufs_signal2_mmap_nopage,
+	.nopfn = spufs_signal2_mmap_nopfn,
 };
 
 static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
@@ -759,23 +919,22 @@ static int spufs_signal2_mmap(struct fil
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
-	/* FIXME: */
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
 	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
 				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
 
 	vma->vm_ops = &spufs_signal2_mmap_vmops;
 	return 0;
 }
-#endif
+#else /* SPUFS_MMAP_4K */
+#define spufs_signal2_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
 
-static struct file_operations spufs_signal2_fops = {
+static const struct file_operations spufs_signal2_fops = {
 	.open = spufs_signal2_open,
 	.read = spufs_signal2_read,
 	.write = spufs_signal2_write,
-#ifdef CONFIG_SPUFS_MMAP
 	.mmap = spufs_signal2_mmap,
-#endif
 };
 
 static void spufs_signal1_type_set(void *data, u64 val)
@@ -787,13 +946,19 @@ static void spufs_signal1_type_set(void 
 	spu_release(ctx);
 }
 
+static u64 __spufs_signal1_type_get(void *data)
+{
+	struct spu_context *ctx = data;
+	return ctx->ops->signal1_type_get(ctx);
+}
+
 static u64 spufs_signal1_type_get(void *data)
 {
 	struct spu_context *ctx = data;
 	u64 ret;
 
 	spu_acquire(ctx);
-	ret = ctx->ops->signal1_type_get(ctx);
+	ret = __spufs_signal1_type_get(data);
 	spu_release(ctx);
 
 	return ret;
@@ -810,13 +975,19 @@ static void spufs_signal2_type_set(void 
 	spu_release(ctx);
 }
 
+static u64 __spufs_signal2_type_get(void *data)
+{
+	struct spu_context *ctx = data;
+	return ctx->ops->signal2_type_get(ctx);
+}
+
 static u64 spufs_signal2_type_get(void *data)
 {
 	struct spu_context *ctx = data;
 	u64 ret;
 
 	spu_acquire(ctx);
-	ret = ctx->ops->signal2_type_get(ctx);
+	ret = __spufs_signal2_type_get(data);
 	spu_release(ctx);
 
 	return ret;
@@ -824,87 +995,124 @@ static u64 spufs_signal2_type_get(void *
 DEFINE_SIMPLE_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
 					spufs_signal2_type_set, "%llu");
 
-#ifdef CONFIG_SPUFS_MMAP
-static struct page *spufs_mss_mmap_nopage(struct vm_area_struct *vma,
-					   unsigned long address, int *type)
+#if SPUFS_MMAP_4K
+static unsigned long spufs_mss_mmap_nopfn(struct vm_area_struct *vma,
+					  unsigned long address)
 {
-	return spufs_ps_nopage(vma, address, type, 0x0000);
+	return spufs_ps_nopfn(vma, address, 0x0000, 0x1000);
 }
 
 static struct vm_operations_struct spufs_mss_mmap_vmops = {
-	.nopage = spufs_mss_mmap_nopage,
+	.nopfn = spufs_mss_mmap_nopfn,
 };
 
 /*
  * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
- * Mapping this area requires that the application have CAP_SYS_RAWIO,
- * as these registers require special care when read/writing.
  */
 static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
 	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
 				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
 
 	vma->vm_ops = &spufs_mss_mmap_vmops;
 	return 0;
 }
-#endif
+#else /* SPUFS_MMAP_4K */
+#define spufs_mss_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
 
 static int spufs_mss_open(struct inode *inode, struct file *file)
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
 
 	file->private_data = i->i_ctx;
+	ctx->mss = inode->i_mapping;
+	smp_wmb();
 	return nonseekable_open(inode, file);
 }
 
-static struct file_operations spufs_mss_fops = {
+static const struct file_operations spufs_mss_fops = {
 	.open	 = spufs_mss_open,
-#ifdef CONFIG_SPUFS_MMAP
 	.mmap	 = spufs_mss_mmap,
-#endif
 };
 
+static unsigned long spufs_psmap_mmap_nopfn(struct vm_area_struct *vma,
+					    unsigned long address)
+{
+	return spufs_ps_nopfn(vma, address, 0x0000, 0x20000);
+}
 
-#ifdef CONFIG_SPUFS_MMAP
-static struct page *spufs_mfc_mmap_nopage(struct vm_area_struct *vma,
-					   unsigned long address, int *type)
+static struct vm_operations_struct spufs_psmap_mmap_vmops = {
+	.nopfn = spufs_psmap_mmap_nopfn,
+};
+
+/*
+ * mmap support for full problem state area [0x00000 - 0x1ffff].
+ */
+static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	return spufs_ps_nopage(vma, address, type, 0x3000);
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
+				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
+
+	vma->vm_ops = &spufs_psmap_mmap_vmops;
+	return 0;
+}
+
+static int spufs_psmap_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	file->private_data = i->i_ctx;
+	ctx->psmap = inode->i_mapping;
+	smp_wmb();
+	return nonseekable_open(inode, file);
+}
+
+static const struct file_operations spufs_psmap_fops = {
+	.open	 = spufs_psmap_open,
+	.mmap	 = spufs_psmap_mmap,
+};
+
+
+#if SPUFS_MMAP_4K
+static unsigned long spufs_mfc_mmap_nopfn(struct vm_area_struct *vma,
+					  unsigned long address)
+{
+	return spufs_ps_nopfn(vma, address, 0x3000, 0x1000);
 }
 
 static struct vm_operations_struct spufs_mfc_mmap_vmops = {
-	.nopage = spufs_mfc_mmap_nopage,
+	.nopfn = spufs_mfc_mmap_nopfn,
 };
 
 /*
  * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
- * Mapping this area requires that the application have CAP_SYS_RAWIO,
- * as these registers require special care when read/writing.
  */
 static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
 	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
 				     | _PAGE_NO_CACHE | _PAGE_GUARDED);
 
 	vma->vm_ops = &spufs_mfc_mmap_vmops;
 	return 0;
 }
-#endif
+#else /* SPUFS_MMAP_4K */
+#define spufs_mfc_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
 
 static int spufs_mfc_open(struct inode *inode, struct file *file)
 {
@@ -919,6 +1127,8 @@ static int spufs_mfc_open(struct inode *
 		return -EBUSY;
 
 	file->private_data = ctx;
+	ctx->mfc = inode->i_mapping;
+	smp_wmb();
 	return nonseekable_open(inode, file);
 }
 
@@ -1103,7 +1313,7 @@ static ssize_t spufs_mfc_write(struct fi
 	if (ret)
 		goto out;
 
-	spu_acquire_runnable(ctx);
+	spu_acquire_runnable(ctx, 0);
 	if (file->f_flags & O_NONBLOCK) {
 		ret = ctx->ops->send_mfc_command(ctx, &cmd);
 	} else {
@@ -1119,6 +1329,7 @@ static ssize_t spufs_mfc_write(struct fi
 		goto out;
 
 	ctx->tagwait |= 1 << cmd.tag;
+	ret = size;
 
 out:
 	return ret;
@@ -1186,7 +1397,7 @@ static int spufs_mfc_fasync(int fd, stru
 	return fasync_helper(fd, file, on, &ctx->mfc_fasync);
 }
 
-static struct file_operations spufs_mfc_fops = {
+static const struct file_operations spufs_mfc_fops = {
 	.open	 = spufs_mfc_open,
 	.read	 = spufs_mfc_read,
 	.write	 = spufs_mfc_write,
@@ -1194,9 +1405,7 @@ static struct file_operations spufs_mfc_
 	.flush	 = spufs_mfc_flush,
 	.fsync	 = spufs_mfc_fsync,
 	.fasync	 = spufs_mfc_fasync,
-#ifdef CONFIG_SPUFS_MMAP
 	.mmap	 = spufs_mfc_mmap,
-#endif
 };
 
 static void spufs_npc_set(void *data, u64 val)
@@ -1216,7 +1425,8 @@ static u64 spufs_npc_get(void *data)
 	spu_release(ctx);
 	return ret;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, "%llx\n")
+DEFINE_SIMPLE_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set,
+			"0x%llx\n")
 
 static void spufs_decr_set(void *data, u64 val)
 {
@@ -1227,18 +1437,24 @@ static void spufs_decr_set(void *data, u
 	spu_release(ctx);
 }
 
-static u64 spufs_decr_get(void *data)
+static u64 __spufs_decr_get(void *data)
 {
 	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->decr.slot[0];
+}
+
+static u64 spufs_decr_get(void *data)
+{
+	struct spu_context *ctx = data;
 	u64 ret;
 	spu_acquire_saved(ctx);
-	ret = lscsa->decr.slot[0];
+	ret = __spufs_decr_get(data);
 	spu_release(ctx);
 	return ret;
 }
 DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
-			"%llx\n")
+			"0x%llx\n")
 
 static void spufs_decr_status_set(void *data, u64 val)
 {
@@ -1249,62 +1465,76 @@ static void spufs_decr_status_set(void *
 	spu_release(ctx);
 }
 
-static u64 spufs_decr_status_get(void *data)
+static u64 __spufs_decr_status_get(void *data)
 {
 	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->decr_status.slot[0];
+}
+
+static u64 spufs_decr_status_get(void *data)
+{
+	struct spu_context *ctx = data;
 	u64 ret;
 	spu_acquire_saved(ctx);
-	ret = lscsa->decr_status.slot[0];
+	ret = __spufs_decr_status_get(data);
 	spu_release(ctx);
 	return ret;
 }
 DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
-			spufs_decr_status_set, "%llx\n")
+			spufs_decr_status_set, "0x%llx\n")
 
-static void spufs_spu_tag_mask_set(void *data, u64 val)
+static void spufs_event_mask_set(void *data, u64 val)
 {
 	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
 	spu_acquire_saved(ctx);
-	lscsa->tag_mask.slot[0] = (u32) val;
+	lscsa->event_mask.slot[0] = (u32) val;
 	spu_release(ctx);
 }
 
-static u64 spufs_spu_tag_mask_get(void *data)
+static u64 __spufs_event_mask_get(void *data)
 {
 	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->event_mask.slot[0];
+}
+
+static u64 spufs_event_mask_get(void *data)
+{
+	struct spu_context *ctx = data;
 	u64 ret;
 	spu_acquire_saved(ctx);
-	ret = lscsa->tag_mask.slot[0];
+	ret = __spufs_event_mask_get(data);
 	spu_release(ctx);
 	return ret;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_spu_tag_mask_ops, spufs_spu_tag_mask_get,
-			spufs_spu_tag_mask_set, "%llx\n")
+DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
+			spufs_event_mask_set, "0x%llx\n")
 
-static void spufs_event_mask_set(void *data, u64 val)
+static u64 __spufs_event_status_get(void *data)
 {
 	struct spu_context *ctx = data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	spu_acquire_saved(ctx);
-	lscsa->event_mask.slot[0] = (u32) val;
-	spu_release(ctx);
+	struct spu_state *state = &ctx->csa;
+	u64 stat;
+	stat = state->spu_chnlcnt_RW[0];
+	if (stat)
+		return state->spu_chnldata_RW[0];
+	return 0;
 }
 
-static u64 spufs_event_mask_get(void *data)
+static u64 spufs_event_status_get(void *data)
 {
 	struct spu_context *ctx = data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	u64 ret;
+	u64 ret = 0;
+
 	spu_acquire_saved(ctx);
-	ret = lscsa->event_mask.slot[0];
+	ret = __spufs_event_status_get(data);
 	spu_release(ctx);
 	return ret;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
-			spufs_event_mask_set, "%llx\n")
+DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
+			NULL, "0x%llx\n")
 
 static void spufs_srr0_set(void *data, u64 val)
 {
@@ -1326,7 +1556,7 @@ static u64 spufs_srr0_get(void *data)
 	return ret;
 }
 DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
-			"%llx\n")
+			"0x%llx\n")
 
 static u64 spufs_id_get(void *data)
 {
@@ -1342,7 +1572,272 @@ static u64 spufs_id_get(void *data)
 
 	return num;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_id_ops, spufs_id_get, 0, "0x%llx\n")
+DEFINE_SIMPLE_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n")
+
+static u64 __spufs_object_id_get(void *data)
+{
+	struct spu_context *ctx = data;
+	return ctx->object_id;
+}
+
+static u64 spufs_object_id_get(void *data)
+{
+	/* FIXME: Should there really be no locking here? */
+	return __spufs_object_id_get(data);
+}
+
+static void spufs_object_id_set(void *data, u64 id)
+{
+	struct spu_context *ctx = data;
+	ctx->object_id = id;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get,
+		spufs_object_id_set, "0x%llx\n");
+
+static u64 __spufs_lslr_get(void *data)
+{
+	struct spu_context *ctx = data;
+	return ctx->csa.priv2.spu_lslr_RW;
+}
+
+static u64 spufs_lslr_get(void *data)
+{
+	struct spu_context *ctx = data;
+	u64 ret;
+
+	spu_acquire_saved(ctx);
+	ret = __spufs_lslr_get(data);
+	spu_release(ctx);
+
+	return ret;
+}
+DEFINE_SIMPLE_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n")
+
+static int spufs_info_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+	file->private_data = ctx;
+	return 0;
+}
+
+static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	u32 mbox_stat;
+	u32 data;
+
+	mbox_stat = ctx->csa.prob.mb_stat_R;
+	if (mbox_stat & 0x0000ff) {
+		data = ctx->csa.prob.pu_mb_R;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	spu_acquire_saved(ctx);
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_mbox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_mbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_mbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_ibox_info_read(struct spu_context *ctx,
+				char __user *buf, size_t len, loff_t *pos)
+{
+	u32 ibox_stat;
+	u32 data;
+
+	ibox_stat = ctx->csa.prob.mb_stat_R;
+	if (ibox_stat & 0xff0000) {
+		data = ctx->csa.priv2.puint_mb_R;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	spu_acquire_saved(ctx);
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_ibox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_ibox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_ibox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_wbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	int i, cnt;
+	u32 data[4];
+	u32 wbox_stat;
+
+	wbox_stat = ctx->csa.prob.mb_stat_R;
+	cnt = 4 - ((wbox_stat & 0x00ff00) >> 8);
+	for (i = 0; i < cnt; i++) {
+		data[i] = ctx->csa.spu_mailbox_data[i];
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &data,
+				cnt * sizeof(u32));
+}
+
+static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	spu_acquire_saved(ctx);
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_wbox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_wbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_wbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_dma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_dma_info info;
+	struct mfc_cq_sr *qp, *spuqp;
+	int i;
+
+	info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+	info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+	info.dma_info_status = ctx->csa.spu_chnldata_RW[24];
+	info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+	info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+	for (i = 0; i < 16; i++) {
+		qp = &info.dma_info_command_data[i];
+		spuqp = &ctx->csa.priv2.spuq[i];
+
+		qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	spu_acquire_saved(ctx);
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_dma_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_dma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_dma_info_read,
+};
+
+static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_proxydma_info info;
+	struct mfc_cq_sr *qp, *puqp;
+	int ret = sizeof info;
+	int i;
+
+	if (len < ret)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+	info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+	info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
+	for (i = 0; i < 8; i++) {
+		qp = &info.proxydma_info_command_data[i];
+		puqp = &ctx->csa.priv2.puq[i];
+
+		qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	spu_acquire_saved(ctx);
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_proxydma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_proxydma_info_read,
+};
 
 struct tree_descr spufs_dir_contents[] = {
 	{ "mem",  &spufs_mem_fops,  0666, },
@@ -1357,16 +1852,70 @@ struct tree_descr spufs_dir_contents[] =
 	{ "signal2", &spufs_signal2_fops, 0666, },
 	{ "signal1_type", &spufs_signal1_type, 0666, },
 	{ "signal2_type", &spufs_signal2_type, 0666, },
-	{ "mss", &spufs_mss_fops, 0666, },
-	{ "mfc", &spufs_mfc_fops, 0666, },
 	{ "cntl", &spufs_cntl_fops,  0666, },
-	{ "npc", &spufs_npc_ops, 0666, },
 	{ "fpcr", &spufs_fpcr_fops, 0666, },
+	{ "lslr", &spufs_lslr_ops, 0444, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "srr0", &spufs_srr0_ops, 0666, },
 	{ "decr", &spufs_decr_ops, 0666, },
 	{ "decr_status", &spufs_decr_status_ops, 0666, },
-	{ "spu_tag_mask", &spufs_spu_tag_mask_ops, 0666, },
 	{ "event_mask", &spufs_event_mask_ops, 0666, },
-	{ "srr0", &spufs_srr0_ops, 0666, },
+	{ "event_status", &spufs_event_status_ops, 0444, },
+	{ "psmap", &spufs_psmap_fops, 0666, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "mbox_info", &spufs_mbox_info_fops, 0444, },
+	{ "ibox_info", &spufs_ibox_info_fops, 0444, },
+	{ "wbox_info", &spufs_wbox_info_fops, 0444, },
+	{ "dma_info", &spufs_dma_info_fops, 0444, },
+	{ "proxydma_info", &spufs_proxydma_info_fops, 0444, },
+	{},
+};
+
+struct tree_descr spufs_dir_nosched_contents[] = {
+	{ "mem",  &spufs_mem_fops,  0666, },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, },
+	{ "signal1", &spufs_signal1_fops, 0666, },
+	{ "signal2", &spufs_signal2_fops, 0666, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "psmap", &spufs_psmap_fops, 0666, },
 	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
 	{},
 };
+
+struct spufs_coredump_reader spufs_coredump_read[] = {
+	{ "regs", __spufs_regs_read, NULL, 128 * 16 },
+	{ "fpcr", __spufs_fpcr_read, NULL, 16 },
+	{ "lslr", NULL, __spufs_lslr_get, 11 },
+	{ "decr", NULL, __spufs_decr_get, 11 },
+	{ "decr_status", NULL, __spufs_decr_status_get, 11 },
+	{ "mem", __spufs_mem_read, NULL, 256 * 1024, },
+	{ "signal1", __spufs_signal1_read, NULL, 4 },
+	{ "signal1_type", NULL, __spufs_signal1_type_get, 2 },
+	{ "signal2", __spufs_signal2_read, NULL, 4 },
+	{ "signal2_type", NULL, __spufs_signal2_type_get, 2 },
+	{ "event_mask", NULL, __spufs_event_mask_get, 8 },
+	{ "event_status", NULL, __spufs_event_status_get, 8 },
+	{ "mbox_info", __spufs_mbox_info_read, NULL, 4 },
+	{ "ibox_info", __spufs_ibox_info_read, NULL, 4 },
+	{ "wbox_info", __spufs_wbox_info_read, NULL, 16 },
+	{ "dma_info", __spufs_dma_info_read, NULL, 69 * 8 },
+	{ "proxydma_info", __spufs_proxydma_info_read, NULL, 35 * 8 },
+	{ "object-id", NULL, __spufs_object_id_get, 19 },
+	{ },
+};
+int spufs_coredump_num_notes = ARRAY_SIZE(spufs_coredump_read) - 1;
+
Index: b/arch/powerpc/platforms/cell/spufs/gang.c
===================================================================
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -0,0 +1,81 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "spufs.h"
+
+struct spu_gang *alloc_spu_gang(void)
+{
+	struct spu_gang *gang;
+
+	gang = kzalloc(sizeof *gang, GFP_KERNEL);
+	if (!gang)
+		goto out;
+
+	kref_init(&gang->kref);
+	mutex_init(&gang->mutex);
+	INIT_LIST_HEAD(&gang->list);
+
+out:
+	return gang;
+}
+
+static void destroy_spu_gang(struct kref *kref)
+{
+	struct spu_gang *gang;
+	gang = container_of(kref, struct spu_gang, kref);
+	WARN_ON(gang->contexts || !list_empty(&gang->list));
+	kfree(gang);
+}
+
+struct spu_gang *get_spu_gang(struct spu_gang *gang)
+{
+	kref_get(&gang->kref);
+	return gang;
+}
+
+int put_spu_gang(struct spu_gang *gang)
+{
+	return kref_put(&gang->kref, &destroy_spu_gang);
+}
+
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	ctx->gang = get_spu_gang(gang);
+	list_add(&ctx->gang_list, &gang->list);
+	gang->contexts++;
+	mutex_unlock(&gang->mutex);
+}
+
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	WARN_ON(ctx->gang != gang);
+	list_del_init(&ctx->gang_list);
+	gang->contexts--;
+	mutex_unlock(&gang->mutex);
+
+	put_spu_gang(gang);
+}
Index: b/arch/powerpc/platforms/cell/spufs/hw_ops.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -135,21 +135,11 @@ static int spu_hw_wbox_write(struct spu_
 	return ret;
 }
 
-static u32 spu_hw_signal1_read(struct spu_context *ctx)
-{
-	return in_be32(&ctx->spu->problem->signal_notify1);
-}
-
 static void spu_hw_signal1_write(struct spu_context *ctx, u32 data)
 {
 	out_be32(&ctx->spu->problem->signal_notify1, data);
 }
 
-static u32 spu_hw_signal2_read(struct spu_context *ctx)
-{
-	return in_be32(&ctx->spu->problem->signal_notify1);
-}
-
 static void spu_hw_signal2_write(struct spu_context *ctx, u32 data)
 {
 	out_be32(&ctx->spu->problem->signal_notify2, data);
@@ -217,24 +207,45 @@ static char *spu_hw_get_ls(struct spu_co
 	return ctx->spu->local_store;
 }
 
-static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
+static u32 spu_hw_runcntl_read(struct spu_context *ctx)
 {
-	eieio();
-	out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
+	return in_be32(&ctx->spu->problem->spu_runcntl_RW);
 }
 
-static void spu_hw_runcntl_stop(struct spu_context *ctx)
+static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
 {
 	spin_lock_irq(&ctx->spu->register_lock);
-	out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP);
-	while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING)
-		cpu_relax();
+	if (val & SPU_RUNCNTL_ISOLATE)
+		out_be64(&ctx->spu->priv2->spu_privcntl_RW, 4LL);
+	out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
 	spin_unlock_irq(&ctx->spu->register_lock);
 }
 
+static void spu_hw_master_start(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static void spu_hw_master_stop(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
 static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
 {
-	struct spu_problem *prob = ctx->spu->problem;
+	struct spu_problem __iomem *prob = ctx->spu->problem;
 	int ret;
 
 	spin_lock_irq(&ctx->spu->register_lock);
@@ -263,7 +274,7 @@ static int spu_hw_send_mfc_command(struc
 					struct mfc_dma_command *cmd)
 {
 	u32 status;
-	struct spu_problem *prob = ctx->spu->problem;
+	struct spu_problem __iomem *prob = ctx->spu->problem;
 
 	spin_lock_irq(&ctx->spu->register_lock);
 	out_be32(&prob->mfc_lsa_W, cmd->lsa);
@@ -291,9 +302,7 @@ struct spu_context_ops spu_hw_ops = {
 	.mbox_stat_poll = spu_hw_mbox_stat_poll,
 	.ibox_read = spu_hw_ibox_read,
 	.wbox_write = spu_hw_wbox_write,
-	.signal1_read = spu_hw_signal1_read,
 	.signal1_write = spu_hw_signal1_write,
-	.signal2_read = spu_hw_signal2_read,
 	.signal2_write = spu_hw_signal2_write,
 	.signal1_type_set = spu_hw_signal1_type_set,
 	.signal1_type_get = spu_hw_signal1_type_get,
@@ -303,8 +312,10 @@ struct spu_context_ops spu_hw_ops = {
 	.npc_write = spu_hw_npc_write,
 	.status_read = spu_hw_status_read,
 	.get_ls = spu_hw_get_ls,
+	.runcntl_read = spu_hw_runcntl_read,
 	.runcntl_write = spu_hw_runcntl_write,
-	.runcntl_stop = spu_hw_runcntl_stop,
+	.master_start = spu_hw_master_start,
+	.master_stop = spu_hw_master_stop,
 	.set_mfc_query = spu_hw_set_mfc_query,
 	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
 	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
Index: b/arch/powerpc/platforms/cell/spufs/inode.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -33,23 +33,28 @@
 #include <linux/slab.h>
 #include <linux/parser.h>
 
-#include <asm/io.h>
+#include <asm/prom.h>
 #include <asm/semaphore.h>
 #include <asm/spu.h>
 #include <asm/uaccess.h>
 
 #include "spufs.h"
 
-static kmem_cache_t *spufs_inode_cache;
+static struct kmem_cache *spufs_inode_cache;
+char *isolated_loader;
 
 static struct inode *
 spufs_alloc_inode(struct super_block *sb)
 {
 	struct spufs_inode_info *ei;
 
-	ei = kmem_cache_alloc(spufs_inode_cache, SLAB_KERNEL);
+	ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
 	if (!ei)
 		return NULL;
+
+	ei->i_gang = NULL;
+	ei->i_ctx = NULL;
+
 	return &ei->vfs_inode;
 }
 
@@ -60,7 +65,7 @@ spufs_destroy_inode(struct inode *inode)
 }
 
 static void
-spufs_init_once(void *p, kmem_cache_t * cachep, unsigned long flags)
+spufs_init_once(void *p, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct spufs_inode_info *ei = p;
 
@@ -128,14 +133,19 @@ out:
 static void
 spufs_delete_inode(struct inode *inode)
 {
-	if (SPUFS_I(inode)->i_ctx)
-		put_spu_context(SPUFS_I(inode)->i_ctx);
+	struct spufs_inode_info *ei = SPUFS_I(inode);
+
+	if (ei->i_ctx)
+		put_spu_context(ei->i_ctx);
+	if (ei->i_gang)
+		put_spu_gang(ei->i_gang);
 	clear_inode(inode);
 }
 
 static void spufs_prune_dir(struct dentry *dir)
 {
 	struct dentry *dentry, *tmp;
+
 	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
 		spin_lock(&dcache_lock);
@@ -156,13 +166,13 @@ static void spufs_prune_dir(struct dentr
 	mutex_unlock(&dir->d_inode->i_mutex);
 }
 
-/* Caller must hold root->i_mutex */
-static int spufs_rmdir(struct inode *root, struct dentry *dir_dentry)
+/* Caller must hold parent->i_mutex */
+static int spufs_rmdir(struct inode *parent, struct dentry *dir)
 {
 	/* remove all entries */
-	spufs_prune_dir(dir_dentry);
+	spufs_prune_dir(dir);
 
-	return simple_rmdir(root, dir_dentry);
+	return simple_rmdir(parent, dir);
 }
 
 static int spufs_fill_dir(struct dentry *dir, struct tree_descr *files,
@@ -210,11 +220,11 @@ static int spufs_dir_close(struct inode 
 	return dcache_dir_close(inode, file);
 }
 
-struct inode_operations spufs_dir_inode_operations = {
+/* const */struct inode_operations spufs_dir_inode_operations = {
 	.lookup = simple_lookup,
 };
 
-struct file_operations spufs_context_fops = {
+const struct file_operations spufs_context_fops = {
 	.open		= dcache_dir_open,
 	.release	= spufs_dir_close,
 	.llseek		= dcache_dir_lseek,
@@ -222,9 +232,11 @@ struct file_operations spufs_context_fop
 	.readdir	= dcache_readdir,
 	.fsync		= simple_sync_file,
 };
+EXPORT_SYMBOL_GPL(spufs_context_fops);
 
 static int
-spufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
+		int mode)
 {
 	int ret;
 	struct inode *inode;
@@ -239,14 +251,20 @@ spufs_mkdir(struct inode *dir, struct de
 		inode->i_gid = dir->i_gid;
 		inode->i_mode &= S_ISGID;
 	}
-	ctx = alloc_spu_context();
+	ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
 	SPUFS_I(inode)->i_ctx = ctx;
 	if (!ctx)
 		goto out_iput;
 
+	ctx->flags = flags;
 	inode->i_op = &spufs_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
-	ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+	if (flags & SPU_CREATE_NOSCHED)
+		ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
+					 mode, ctx);
+	else
+		ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+
 	if (ret)
 		goto out_free_ctx;
 
@@ -289,24 +307,191 @@ out:
 	return ret;
 }
 
+static int spufs_create_context(struct inode *inode,
+			struct dentry *dentry,
+			struct vfsmount *mnt, int flags, int mode)
+{
+	int ret;
+
+	ret = -EPERM;
+	if ((flags & SPU_CREATE_NOSCHED) &&
+	    !capable(CAP_SYS_NICE))
+		goto out_unlock;
+
+	ret = -EINVAL;
+	if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
+	    == SPU_CREATE_ISOLATE)
+		goto out_unlock;
+
+	ret = -ENODEV;
+	if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
+		goto out_unlock;
+
+	ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
+	if (ret)
+		goto out_unlock;
+
+	/*
+	 * get references for dget and mntget, will be released
+	 * in error path of *_open().
+	 */
+	ret = spufs_context_open(dget(dentry), mntget(mnt));
+	if (ret < 0) {
+		WARN_ON(spufs_rmdir(inode, dentry));
+		mutex_unlock(&inode->i_mutex);
+		spu_forget(SPUFS_I(dentry->d_inode)->i_ctx);
+		goto out;
+	}
+
+out_unlock:
+	mutex_unlock(&inode->i_mutex);
+out:
+	dput(dentry);
+	return ret;
+}
+
+static int spufs_rmgang(struct inode *root, struct dentry *dir)
+{
+	/* FIXME: this fails if the dir is not empty,
+	          which causes a leak of gangs. */
+	return simple_rmdir(root, dir);
+}
+
+static int spufs_gang_close(struct inode *inode, struct file *file)
+{
+	struct inode *parent;
+	struct dentry *dir;
+	int ret;
+
+	dir = file->f_dentry;
+	parent = dir->d_parent->d_inode;
+
+	ret = spufs_rmgang(parent, dir);
+	WARN_ON(ret);
+
+	return dcache_dir_close(inode, file);
+}
+
+const struct file_operations spufs_gang_fops = {
+	.open		= dcache_dir_open,
+	.release	= spufs_gang_close,
+	.llseek		= dcache_dir_lseek,
+	.read		= generic_read_dir,
+	.readdir	= dcache_readdir,
+	.fsync		= simple_sync_file,
+};
+
+static int
+spufs_mkgang(struct inode *dir, struct dentry *dentry, int mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_gang *gang;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		inode->i_mode &= S_ISGID;
+	}
+	gang = alloc_spu_gang();
+	SPUFS_I(inode)->i_ctx = NULL;
+	SPUFS_I(inode)->i_gang = gang;
+	if (!gang)
+		goto out_iput;
+
+	inode->i_op = &spufs_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+
+	d_instantiate(dentry, inode);
+	dget(dentry);
+	dir->i_nlink++;
+	dentry->d_inode->i_nlink++;
+	return ret;
+
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd();
+	if (ret < 0) {
+		dput(dentry);
+		mntput(mnt);
+		goto out;
+	}
+
+	filp = dentry_open(dentry, mnt, O_RDONLY);
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		ret = PTR_ERR(filp);
+		goto out;
+	}
+
+	filp->f_op = &spufs_gang_fops;
+	fd_install(ret, filp);
+out:
+	return ret;
+}
+
+static int spufs_create_gang(struct inode *inode,
+			struct dentry *dentry,
+			struct vfsmount *mnt, int mode)
+{
+	int ret;
+
+	ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
+	if (ret)
+		goto out;
+
+	/*
+	 * get references for dget and mntget, will be released
+	 * in error path of *_open().
+	 */
+	ret = spufs_gang_open(dget(dentry), mntget(mnt));
+	if (ret < 0)
+		WARN_ON(spufs_rmgang(inode, dentry));
+
+out:
+	mutex_unlock(&inode->i_mutex);
+	dput(dentry);
+	return ret;
+}
+
+
 static struct file_system_type spufs_type;
 
-long spufs_create_thread(struct nameidata *nd,
-			 unsigned int flags, mode_t mode)
+long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
 {
 	struct dentry *dentry;
 	int ret;
 
-	/* need to be at the root of spufs */
 	ret = -EINVAL;
-	if (nd->dentry->d_sb->s_type != &spufs_type ||
-	    nd->dentry != nd->dentry->d_sb->s_root)
+	/* check if we are on spufs */
+	if (nd->dentry->d_sb->s_type != &spufs_type)
 		goto out;
 
-	/* all flags are reserved */
-	if (flags)
+	/* don't accept undefined flags */
+	if (flags & (~SPU_CREATE_FLAG_ALL))
 		goto out;
 
+	/* only threads can be underneath a gang */
+	if (nd->dentry != nd->dentry->d_sb->s_root) {
+		if ((flags & SPU_CREATE_GANG) ||
+		    !SPUFS_I(nd->dentry->d_inode)->i_gang)
+			goto out;
+	}
+
 	dentry = lookup_create(nd, 1);
 	ret = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
@@ -317,22 +502,13 @@ long spufs_create_thread(struct nameidat
 		goto out_dput;
 
 	mode &= ~current->fs->umask;
-	ret = spufs_mkdir(nd->dentry->d_inode, dentry, mode & S_IRWXUGO);
-	if (ret)
-		goto out_dput;
 
-	/*
-	 * get references for dget and mntget, will be released
-	 * in error path of *_open().
-	 */
-	ret = spufs_context_open(dget(dentry), mntget(nd->mnt));
-	if (ret < 0) {
-		WARN_ON(spufs_rmdir(nd->dentry->d_inode, dentry));
-		mutex_unlock(&nd->dentry->d_inode->i_mutex);
-		spu_forget(SPUFS_I(dentry->d_inode)->i_ctx);
-		dput(dentry);
-		goto out;
-	}
+	if (flags & SPU_CREATE_GANG)
+		return spufs_create_gang(nd->dentry->d_inode,
+					dentry, nd->mnt, mode);
+	else
+		return spufs_create_context(nd->dentry->d_inode,
+					dentry, nd->mnt, flags, mode);
 
 out_dput:
 	dput(dentry);
@@ -384,6 +560,30 @@ spufs_parse_options(char *options, struc
 	return 1;
 }
 
+static void
+spufs_init_isolated_loader(void)
+{
+	struct device_node *dn;
+	const char *loader;
+	int size;
+
+	dn = of_find_node_by_path("/spu-isolation");
+	if (!dn)
+		return;
+
+	loader = get_property(dn, "loader", &size);
+	if (!loader)
+		return;
+
+	/* kmalloc should align on a 16 byte boundary..* */
+	isolated_loader = kmalloc(size, GFP_KERNEL);
+	if (!isolated_loader)
+		return;
+
+	memcpy(isolated_loader, loader, size);
+	printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
+}
+
 static int
 spufs_create_root(struct super_block *sb, void *data)
 {
@@ -452,6 +652,7 @@ static struct file_system_type spufs_typ
 static int __init spufs_init(void)
 {
 	int ret;
+
 	ret = -ENOMEM;
 	spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
 			sizeof(struct spufs_inode_info), 0,
@@ -469,6 +670,12 @@ static int __init spufs_init(void)
 	ret = register_spu_syscalls(&spufs_calls);
 	if (ret)
 		goto out_fs;
+	ret = register_arch_coredump_calls(&spufs_coredump_calls);
+	if (ret)
+		goto out_fs;
+
+	spufs_init_isolated_loader();
+
 	return 0;
 out_fs:
 	unregister_filesystem(&spufs_type);
@@ -482,6 +689,7 @@ module_init(spufs_init);
 static void __exit spufs_exit(void)
 {
 	spu_sched_exit();
+	unregister_arch_coredump_calls(&spufs_coredump_calls);
 	unregister_spu_syscalls(&spufs_calls);
 	unregister_filesystem(&spufs_type);
 	kmem_cache_destroy(spufs_inode_cache);
Index: b/arch/powerpc/platforms/cell/spufs/run.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -1,7 +1,11 @@
+#define DEBUG
+
 #include <linux/wait.h>
 #include <linux/ptrace.h>
 
 #include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/io.h>
 #include <asm/unistd.h>
 
 #include "spufs.h"
@@ -14,6 +18,27 @@ void spufs_stop_callback(struct spu *spu
 	wake_up_all(&ctx->stop_wq);
 }
 
+void spufs_dma_callback(struct spu *spu, int type)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+		ctx->event_return |= type;
+		wake_up_all(&ctx->stop_wq);
+	} else {
+		switch (type) {
+		case SPE_EVENT_DMA_ALIGNMENT:
+		case SPE_EVENT_SPE_DATA_STORAGE:
+		case SPE_EVENT_INVALID_DMA:
+			force_sig(SIGBUS, /* info, */ current);
+			break;
+		case SPE_EVENT_SPE_ERROR:
+			force_sig(SIGILL, /* info */ current);
+			break;
+		}
+	}
+}
+
 static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
 {
 	struct spu *spu;
@@ -28,16 +53,124 @@ static inline int spu_stopped(struct spu
 	return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0;
 }
 
-static inline int spu_run_init(struct spu_context *ctx, u32 * npc,
-			       u32 * status)
+static int spu_setup_isolated(struct spu_context *ctx)
 {
 	int ret;
+	u64 __iomem *mfc_cntl;
+	u64 sr1;
+	u32 status;
+	unsigned long timeout;
+	const u32 status_loading = SPU_STATUS_RUNNING
+		| SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
+
+	if (!isolated_loader)
+		return -ENODEV;
+
+	ret = spu_acquire_exclusive(ctx);
+	if (ret)
+		goto out;
+
+	mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
+
+	/* purge the MFC DMA queue to ensure no spurious accesses before we
+	 * enter kernel mode */
+	timeout = jiffies + HZ;
+	out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST);
+	while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK)
+			!= MFC_CNTL_PURGE_DMA_COMPLETE) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
+					__FUNCTION__);
+			ret = -EIO;
+			goto out_unlock;
+		}
+		cond_resched();
+	}
+
+	/* put the SPE in kernel mode to allow access to the loader */
+	sr1 = spu_mfc_sr1_get(ctx->spu);
+	sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+	/* start the loader */
+	ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32);
+	ctx->ops->signal2_write(ctx,
+			(unsigned long)isolated_loader & 0xffffffff);
+
+	ctx->ops->runcntl_write(ctx,
+			SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+
+	ret = 0;
+	timeout = jiffies + HZ;
+	while (((status = ctx->ops->status_read(ctx)) & status_loading) ==
+				status_loading) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout waiting for loader\n",
+					__FUNCTION__);
+			ret = -EIO;
+			goto out_drop_priv;
+		}
+		cond_resched();
+	}
+
+	if (!(status & SPU_STATUS_RUNNING)) {
+		/* If isolated LOAD has failed: run SPU, we will get a stop-and
+		 * signal later. */
+		pr_debug("%s: isolated LOAD failed\n", __FUNCTION__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+		ret = -EACCES;
+
+	} else if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+		/* This isn't allowed by the CBEA, but check anyway */
+		pr_debug("%s: SPU fell out of isolated mode?\n", __FUNCTION__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
+		ret = -EINVAL;
+	}
 
-	if ((ret = spu_acquire_runnable(ctx)) != 0)
+out_drop_priv:
+	/* Finished accessing the loader. Drop kernel mode */
+	sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return ret;
+}
+
+static inline int spu_run_init(struct spu_context *ctx, u32 * npc)
+{
+	int ret;
+	unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
+
+	ret = spu_acquire_runnable(ctx, 0);
+	if (ret)
 		return ret;
-	ctx->ops->npc_write(ctx, *npc);
-	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
-	return 0;
+
+	if (ctx->flags & SPU_CREATE_ISOLATE) {
+		if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
+			/* Need to release ctx, because spu_setup_isolated will
+			 * acquire it exclusively.
+			 */
+			spu_release(ctx);
+			ret = spu_setup_isolated(ctx);
+			if (!ret)
+				ret = spu_acquire_runnable(ctx, 0);
+		}
+
+		/* if userspace has set the runcntrl register (eg, to issue an
+		 * isolated exit), we need to re-set it here */
+		runcntl = ctx->ops->runcntl_read(ctx) &
+			(SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+		if (runcntl == 0)
+			runcntl = SPU_RUNCNTL_RUNNABLE;
+	} else {
+		spu_start_tick(ctx);
+		ctx->ops->npc_write(ctx, *npc);
+	}
+
+	ctx->ops->runcntl_write(ctx, runcntl);
+	return ret;
 }
 
 static inline int spu_run_fini(struct spu_context *ctx, u32 * npc,
@@ -45,21 +178,14 @@ static inline int spu_run_fini(struct sp
 {
 	int ret = 0;
 
+	spu_stop_tick(ctx);
 	*status = ctx->ops->status_read(ctx);
 	*npc = ctx->ops->npc_read(ctx);
 	spu_release(ctx);
 
 	if (signal_pending(current))
 		ret = -ERESTARTSYS;
-#if 0 /* XXX */
-	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if ((*status & SPU_STATUS_STOPPED_BY_STOP)
-		    && (*status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) {
-			force_sig(SIGTRAP, current);
-			ret = -ERESTARTSYS;
-		}
-	}
-#endif
+
 	return ret;
 }
 
@@ -74,7 +200,7 @@ static inline int spu_reacquire_runnable
 		       SPU_STATUS_STOPPED_BY_HALT)) {
 		return *status;
 	}
-	if ((ret = spu_run_init(ctx, npc, status)) != 0)
+	if ((ret = spu_run_init(ctx, npc)) != 0)
 		return ret;
 	return 0;
 }
@@ -179,47 +305,64 @@ static inline int spu_process_events(str
 }
 
 long spufs_run_spu(struct file *file, struct spu_context *ctx,
-		   u32 * npc, u32 * status)
+		   u32 *npc, u32 *event)
 {
 	int ret;
+	u32 status;
 
 	if (down_interruptible(&ctx->run_sema))
 		return -ERESTARTSYS;
 
-	ret = spu_run_init(ctx, npc, status);
+	ctx->ops->master_start(ctx);
+	ctx->event_return = 0;
+	ret = spu_run_init(ctx, npc);
 	if (ret)
 		goto out;
 
 	do {
-		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, status));
+		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
 		if (unlikely(ret))
 			break;
-		if ((*status & SPU_STATUS_STOPPED_BY_STOP) &&
-		    (*status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+		if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+		    (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
 			ret = spu_process_callback(ctx);
 			if (ret)
 				break;
-			*status &= ~SPU_STATUS_STOPPED_BY_STOP;
+			status &= ~SPU_STATUS_STOPPED_BY_STOP;
 		}
 		if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
-			ret = spu_reacquire_runnable(ctx, npc, status);
-			if (ret)
-				goto out;
+			ret = spu_reacquire_runnable(ctx, npc, &status);
+			if (ret) {
+				spu_stop_tick(ctx);
+				goto out2;
+			}
 			continue;
 		}
 		ret = spu_process_events(ctx);
 
-	} while (!ret && !(*status & (SPU_STATUS_STOPPED_BY_STOP |
+	} while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
 				      SPU_STATUS_STOPPED_BY_HALT)));
 
-	ctx->ops->runcntl_stop(ctx);
-	ret = spu_run_fini(ctx, npc, status);
-	if (!ret)
-		ret = *status;
+	ctx->ops->master_stop(ctx);
+	ret = spu_run_fini(ctx, npc, &status);
 	spu_yield(ctx);
 
+out2:
+	if ((ret == 0) ||
+	    ((ret == -ERESTARTSYS) &&
+	     ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+	      ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	       (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+		ret = status;
+
+	if ((status & SPU_STATUS_STOPPED_BY_STOP)
+	    && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) {
+		force_sig(SIGTRAP, current);
+		ret = -ERESTARTSYS;
+	}
+
 out:
+	*event = ctx->event_return;
 	up(&ctx->run_sema);
 	return ret;
 }
-
Index: b/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -3,11 +3,7 @@
  * Copyright (C) IBM 2005
  * Author: Mark Nutter <mnutter@us.ibm.com>
  *
- * SPU scheduler, based on Linux thread priority.  For now use
- * a simple "cooperative" yield model with no preemption.  SPU
- * scheduling will eventually be preemptive: When a thread with
- * a higher static priority gets ready to run, then an active SPU
- * context will be preempted and returned to the waitq.
+ * 2006-03-31	NUMA domains added.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -37,6 +33,9 @@
 #include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
+#include <linux/numa.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
@@ -45,152 +44,148 @@
 #include <asm/spu_priv1.h>
 #include "spufs.h"
 
-#define SPU_MIN_TIMESLICE 	(100 * HZ / 1000)
+#define SPU_TIMESLICE	(HZ)
 
-#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1)
 struct spu_prio_array {
-	atomic_t nr_blocked;
-	unsigned long bitmap[SPU_BITMAP_SIZE];
-	wait_queue_head_t waitq[MAX_PRIO];
+	DECLARE_BITMAP(bitmap, MAX_PRIO);
+	struct list_head runq[MAX_PRIO];
+	spinlock_t runq_lock;
+	struct list_head active_list[MAX_NUMNODES];
+	struct mutex active_mutex[MAX_NUMNODES];
 };
 
-/* spu_runqueue - This is the main runqueue data structure for SPUs. */
-struct spu_runqueue {
-	struct semaphore sem;
-	unsigned long nr_active;
-	unsigned long nr_idle;
-	unsigned long nr_switches;
-	struct list_head active_list;
-	struct list_head idle_list;
-	struct spu_prio_array prio;
-};
-
-static struct spu_runqueue *spu_runqueues = NULL;
+static struct spu_prio_array *spu_prio;
+static struct workqueue_struct *spu_sched_wq;
 
-static inline struct spu_runqueue *spu_rq(void)
+static inline int node_allowed(int node)
 {
-	/* Future: make this a per-NODE array,
-	 * and use cpu_to_node(smp_processor_id())
-	 */
-	return spu_runqueues;
-}
-
-static inline struct spu *del_idle(struct spu_runqueue *rq)
-{
-	struct spu *spu;
+	cpumask_t mask;
 
-	BUG_ON(rq->nr_idle <= 0);
-	BUG_ON(list_empty(&rq->idle_list));
-	/* Future: Move SPU out of low-power SRI state. */
-	spu = list_entry(rq->idle_list.next, struct spu, sched_list);
-	list_del_init(&spu->sched_list);
-	rq->nr_idle--;
-	return spu;
+	if (!nr_cpus_node(node))
+		return 0;
+	mask = node_to_cpumask(node);
+	if (!cpus_intersects(mask, current->cpus_allowed))
+		return 0;
+	return 1;
 }
 
-static inline void del_active(struct spu_runqueue *rq, struct spu *spu)
+void spu_start_tick(struct spu_context *ctx)
 {
-	BUG_ON(rq->nr_active <= 0);
-	BUG_ON(list_empty(&rq->active_list));
-	list_del_init(&spu->sched_list);
-	rq->nr_active--;
+	if (ctx->policy == SCHED_RR)
+		queue_delayed_work(spu_sched_wq, &ctx->sched_work, SPU_TIMESLICE);
 }
 
-static inline void add_idle(struct spu_runqueue *rq, struct spu *spu)
+void spu_stop_tick(struct spu_context *ctx)
 {
-	/* Future: Put SPU into low-power SRI state. */
-	list_add_tail(&spu->sched_list, &rq->idle_list);
-	rq->nr_idle++;
+	if (ctx->policy == SCHED_RR)
+		cancel_delayed_work(&ctx->sched_work);
 }
 
-static inline void add_active(struct spu_runqueue *rq, struct spu *spu)
+void spu_sched_tick(void *data)
 {
-	rq->nr_active++;
-	rq->nr_switches++;
-	list_add_tail(&spu->sched_list, &rq->active_list);
-}
+	struct spu_context *ctx = data;
+	struct spu *spu;
+	int rearm = 1;
 
-static void prio_wakeup(struct spu_runqueue *rq)
-{
-	if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) {
-		int best = sched_find_first_bit(rq->prio.bitmap);
-		if (best < MAX_PRIO) {
-			wait_queue_head_t *wq = &rq->prio.waitq[best];
-			wake_up_interruptible_nr(wq, 1);
+	mutex_lock(&ctx->state_mutex);
+	spu = ctx->spu;
+	if (spu) {
+		int best = sched_find_first_bit(spu_prio->bitmap);
+		if (best <= ctx->prio) {
+			spu_deactivate(ctx);
+			rearm = 0;
 		}
 	}
+	mutex_unlock(&ctx->state_mutex);
+
+	if (rearm)
+		spu_start_tick(ctx);
 }
 
-static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx,
-		      u64 flags)
+/**
+ * spu_add_to_active_list - add spu to active list
+ * @spu:	spu to add to the active list
+ */
+static void spu_add_to_active_list(struct spu *spu)
 {
-	int prio = current->prio;
-	wait_queue_head_t *wq = &rq->prio.waitq[prio];
-	DEFINE_WAIT(wait);
+	mutex_lock(&spu_prio->active_mutex[spu->node]);
+	list_add_tail(&spu->list, &spu_prio->active_list[spu->node]);
+	mutex_unlock(&spu_prio->active_mutex[spu->node]);
+}
 
-	__set_bit(prio, rq->prio.bitmap);
-	atomic_inc(&rq->prio.nr_blocked);
-	prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE);
-	if (!signal_pending(current)) {
-		up(&rq->sem);
-		up_write(&ctx->state_sema);
-		pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__,
-			 current->pid, current->prio);
-		schedule();
-		down_write(&ctx->state_sema);
-		down(&rq->sem);
-	}
-	finish_wait(wq, &wait);
-	atomic_dec(&rq->prio.nr_blocked);
-	if (!waitqueue_active(wq))
-		__clear_bit(prio, rq->prio.bitmap);
+/**
+ * spu_remove_from_active_list - remove spu from active list
+ * @spu:       spu to remove from the active list
+ */
+static void spu_remove_from_active_list(struct spu *spu)
+{
+	int node = spu->node;
+
+	mutex_lock(&spu_prio->active_mutex[node]);
+	list_del_init(&spu->list);
+	mutex_unlock(&spu_prio->active_mutex[node]);
 }
 
-static inline int is_best_prio(struct spu_runqueue *rq)
+static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
+
+static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
 {
-	int best_prio;
+	blocking_notifier_call_chain(&spu_switch_notifier,
+			    ctx ? ctx->object_id : 0, spu);
+}
 
-	best_prio = sched_find_first_bit(rq->prio.bitmap);
-	return (current->prio < best_prio) ? 1 : 0;
+int spu_switch_event_register(struct notifier_block * n)
+{
+	return blocking_notifier_chain_register(&spu_switch_notifier, n);
 }
 
-static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+int spu_switch_event_unregister(struct notifier_block * n)
 {
-	/* Global TLBIE broadcast required with SPEs. */
-#if (NR_CPUS > 1)
-	__cpus_setall(&mm->cpu_vm_mask, NR_CPUS);
-#else
-	__cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */
-#endif
+	return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
 }
 
-static inline void bind_context(struct spu *spu, struct spu_context *ctx)
+/**
+ * spu_bind_context - bind spu context to physical spu
+ * @spu:	physical spu to bind to
+ * @ctx:	context to bind
+ */
+static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 {
-	pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid,
-		 spu->number);
+	pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
+		 spu->number, spu->node);
 	spu->ctx = ctx;
 	spu->flags = 0;
-	ctx->flags = 0;
 	ctx->spu = spu;
 	ctx->ops = &spu_hw_ops;
 	spu->pid = current->pid;
-	spu->prio = current->prio;
-	spu->mm = ctx->owner;
-	mm_needs_global_tlbie(spu->mm);
+	spu_associate_mm(spu, ctx->owner);
 	spu->ibox_callback = spufs_ibox_callback;
 	spu->wbox_callback = spufs_wbox_callback;
 	spu->stop_callback = spufs_stop_callback;
 	spu->mfc_callback = spufs_mfc_callback;
+	spu->dma_callback = spufs_dma_callback;
 	mb();
 	spu_unmap_mappings(ctx);
 	spu_restore(&ctx->csa, spu);
 	spu->timestamp = jiffies;
+	spu_cpu_affinity_set(spu, raw_smp_processor_id());
+	spu_switch_notify(spu, ctx);
+	spu_add_to_active_list(spu);
+	ctx->state = SPU_STATE_RUNNABLE;
 }
 
-static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
+/**
+ * spu_unbind_context - unbind spu context from physical spu
+ * @spu:	physical spu to unbind from
+ * @ctx:	context to unbind
+ */
+static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
 {
-	pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__,
-		 spu->pid, spu->number);
+	pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
+		 spu->pid, spu->number, spu->node);
+
+	spu_remove_from_active_list(spu);
+	spu_switch_notify(spu, NULL);
 	spu_unmap_mappings(ctx);
 	spu_save(&ctx->csa, spu);
 	spu->timestamp = jiffies;
@@ -199,265 +194,309 @@ static inline void unbind_context(struct
 	spu->wbox_callback = NULL;
 	spu->stop_callback = NULL;
 	spu->mfc_callback = NULL;
-	spu->mm = NULL;
+	spu->dma_callback = NULL;
+	spu_associate_mm(spu, NULL);
 	spu->pid = 0;
-	spu->prio = MAX_PRIO;
 	ctx->ops = &spu_backing_ops;
 	ctx->spu = NULL;
-	ctx->flags = 0;
 	spu->flags = 0;
 	spu->ctx = NULL;
 }
 
-static void spu_reaper(void *data)
+/**
+ * spu_add_to_rq - add a context to the runqueue
+ * @ctx:       context to add
+ */
+static void spu_add_to_rq(struct spu_context *ctx)
 {
-	struct spu_context *ctx = data;
-	struct spu *spu;
+	spin_lock(&spu_prio->runq_lock);
+	list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+	set_bit(ctx->prio, spu_prio->bitmap);
+	spin_unlock(&spu_prio->runq_lock);
+}
 
-	down_write(&ctx->state_sema);
-	spu = ctx->spu;
-	if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) {
-		if (atomic_read(&spu->rq->prio.nr_blocked)) {
-			pr_debug("%s: spu=%d\n", __func__, spu->number);
-			ctx->ops->runcntl_stop(ctx);
-			spu_deactivate(ctx);
-			wake_up_all(&ctx->stop_wq);
-		} else {
-			clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
-		}
-	}
-	up_write(&ctx->state_sema);
-	put_spu_context(ctx);
+/**
+ * spu_del_from_rq - remove a context from the runqueue
+ * @ctx:       context to remove
+ */
+static void spu_del_from_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	list_del_init(&ctx->rq);
+	if (list_empty(&spu_prio->runq[ctx->prio]))
+		clear_bit(ctx->prio, spu_prio->bitmap);
+	spin_unlock(&spu_prio->runq_lock);
 }
 
-static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu)
+/**
+ * spu_grab_context - remove one context from the runqueue
+ * @prio:      priority of the context to be removed
+ *
+ * This function removes one context from the runqueue for priority @prio.
+ * If there is more than one context with the given priority the first
+ * task on the runqueue will be taken.
+ *
+ * Returns the spu_context it just removed.
+ *
+ * Must be called with spu_prio->runq_lock held.
+ */
+static struct spu_context *spu_grab_context(int prio)
 {
-	struct spu_context *ctx = get_spu_context(spu->ctx);
-	unsigned long now = jiffies;
-	unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE;
+	struct list_head *rq = &spu_prio->runq[prio];
 
-	set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
-	INIT_WORK(&ctx->reap_work, spu_reaper, ctx);
-	if (time_after(now, expire))
-		schedule_work(&ctx->reap_work);
-	else
-		schedule_delayed_work(&ctx->reap_work, expire - now);
+	if (list_empty(rq))
+		return NULL;
+	return list_entry(rq->next, struct spu_context, rq);
 }
 
-static void check_preempt_active(struct spu_runqueue *rq)
+static void spu_prio_wait(struct spu_context *ctx)
 {
-	struct list_head *p;
-	struct spu *worst = NULL;
+	DEFINE_WAIT(wait);
 
-	list_for_each(p, &rq->active_list) {
-		struct spu *spu = list_entry(p, struct spu, sched_list);
-		struct spu_context *ctx = spu->ctx;
-		if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) {
-			if (!worst || (spu->prio > worst->prio)) {
-				worst = spu;
-			}
-		}
+	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
+	if (!signal_pending(current)) {
+		mutex_unlock(&ctx->state_mutex);
+		schedule();
+		mutex_lock(&ctx->state_mutex);
 	}
-	if (worst && (current->prio < worst->prio))
-		schedule_spu_reaper(rq, worst);
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&ctx->stop_wq, &wait);
 }
 
-static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags)
+/**
+ * spu_reschedule - try to find a runnable context for a spu
+ * @spu:       spu available
+ *
+ * This function is called whenever a spu becomes idle.  It looks for the
+ * most suitable runnable spu context and schedules it for execution.
+ */
+static void spu_reschedule(struct spu *spu)
+{
+	int best;
+
+	spu_free(spu);
+
+	spin_lock(&spu_prio->runq_lock);
+	best = sched_find_first_bit(spu_prio->bitmap);
+	if (best < MAX_PRIO) {
+		struct spu_context *ctx = spu_grab_context(best);
+		if (ctx)
+			wake_up(&ctx->stop_wq);
+	}
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static struct spu *spu_get_idle(struct spu_context *ctx)
 {
-	struct spu_runqueue *rq;
 	struct spu *spu = NULL;
+	int node = cpu_to_node(raw_smp_processor_id());
+	int n;
 
-	rq = spu_rq();
-	down(&rq->sem);
-	for (;;) {
-		if (rq->nr_idle > 0) {
-			if (is_best_prio(rq)) {
-				/* Fall through. */
-				spu = del_idle(rq);
-				break;
-			} else {
-				prio_wakeup(rq);
-				up(&rq->sem);
-				yield();
-				if (signal_pending(current)) {
-					return NULL;
-				}
-				rq = spu_rq();
-				down(&rq->sem);
-				continue;
-			}
-		} else {
-			check_preempt_active(rq);
-			prio_wait(rq, ctx, flags);
-			if (signal_pending(current)) {
-				prio_wakeup(rq);
-				spu = NULL;
-				break;
-			}
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(node))
 			continue;
-		}
+		spu = spu_alloc_node(node);
+		if (spu)
+			break;
 	}
-	up(&rq->sem);
 	return spu;
 }
 
-static void put_idle_spu(struct spu *spu)
+/**
+ * find_victim - find a lower priority context to preempt
+ * @ctx:	canidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
 {
-	struct spu_runqueue *rq = spu->rq;
+	struct spu_context *victim = NULL;
+	struct spu *spu;
+	int node, n;
 
-	down(&rq->sem);
-	add_idle(rq, spu);
-	prio_wakeup(rq);
-	up(&rq->sem);
-}
+	/*
+	 * Look for a possible preemption candidate on the local node first.
+	 * If there is no candidate look at the other nodes.  This isn't
+	 * exactly fair, but so far the whole spu schedule tries to keep
+	 * a strong node affinity.  We might want to fine-tune this in
+	 * the future.
+	 */
+ restart:
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(node))
+			continue;
 
-static int get_active_spu(struct spu *spu)
-{
-	struct spu_runqueue *rq = spu->rq;
-	struct list_head *p;
-	struct spu *tmp;
-	int rc = 0;
+		mutex_lock(&spu_prio->active_mutex[node]);
+		list_for_each_entry(spu, &spu_prio->active_list[node], list) {
+			struct spu_context *tmp = spu->ctx;
+
+			if (tmp->rt_priority < ctx->rt_priority &&
+			    (!victim || tmp->rt_priority < victim->rt_priority))
+				victim = spu->ctx;
+		}
+		mutex_unlock(&spu_prio->active_mutex[node]);
 
-	down(&rq->sem);
-	list_for_each(p, &rq->active_list) {
-		tmp = list_entry(p, struct spu, sched_list);
-		if (tmp == spu) {
-			del_active(rq, spu);
-			rc = 1;
-			break;
+		if (victim) {
+			/*
+			 * This nests ctx->state_mutex, but we always lock
+			 * higher priority contexts before lower priority
+			 * ones, so this is safe until we introduce
+			 * priority inheritance schemes.
+			 */
+			if (!mutex_trylock(&victim->state_mutex)) {
+				victim = NULL;
+				goto restart;
+			}
+
+			spu = victim->spu;
+			if (!spu) {
+				/*
+				 * This race can happen because we've dropped
+				 * the active list mutex.  No a problem, just
+				 * restart the search.
+				 */
+				mutex_unlock(&victim->state_mutex);
+				victim = NULL;
+				goto restart;
+			}
+			spu_unbind_context(spu, victim);
+			mutex_unlock(&victim->state_mutex);
+			return spu;
 		}
 	}
-	up(&rq->sem);
-	return rc;
-}
-
-static void put_active_spu(struct spu *spu)
-{
-	struct spu_runqueue *rq = spu->rq;
 
-	down(&rq->sem);
-	add_active(rq, spu);
-	up(&rq->sem);
+	return NULL;
 }
 
-/* Lock order:
- *	spu_activate() & spu_deactivate() require the
- *	caller to have down_write(&ctx->state_sema).
+/**
+ * spu_activate - find a free spu for a context and execute it
+ * @ctx:	spu context to schedule
+ * @flags:	flags (currently ignored)
  *
- *	The rq->sem is breifly held (inside or outside a
- *	given ctx lock) for list management, but is never
- *	held during save/restore.
+ * Tries to find a free spu to run @ctx.  If no free spu is availble
+ * add the context to the runqueue so it gets woken up once an spu
+ * is available.
  */
-
-int spu_activate(struct spu_context *ctx, u64 flags)
+int spu_activate(struct spu_context *ctx, unsigned long flags)
 {
-	struct spu *spu;
 
 	if (ctx->spu)
 		return 0;
-	spu = get_idle_spu(ctx, flags);
-	if (!spu)
-		return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN;
-	bind_context(spu, ctx);
-	/*
-	 * We're likely to wait for interrupts on the same
-	 * CPU that we are now on, so send them here.
-	 */
-	spu_cpu_affinity_set(spu, raw_smp_processor_id());
-	put_active_spu(spu);
-	return 0;
+
+	do {
+		struct spu *spu;
+
+		spu = spu_get_idle(ctx);
+		/*
+		 * If this is a realtime thread we try to get it running by
+		 * preempting a lower priority thread.
+		 */
+		if (!spu && ctx->rt_priority)
+			spu = find_victim(ctx);
+		if (spu) {
+			spu_bind_context(spu, ctx);
+			return 0;
+		}
+
+		spu_add_to_rq(ctx);
+		spu_prio_wait(ctx);
+		spu_del_from_rq(ctx);
+	} while (!signal_pending(current));
+
+	return -ERESTARTSYS;
 }
 
+/**
+ * spu_deactivate - unbind a context from it's physical spu
+ * @ctx:	spu context to unbind
+ *
+ * Unbind @ctx from the physical spu it is running on and schedule
+ * the highest priority context to run on the freed physical spu.
+ */
 void spu_deactivate(struct spu_context *ctx)
 {
-	struct spu *spu;
-	int needs_idle;
+	struct spu *spu = ctx->spu;
 
-	spu = ctx->spu;
-	if (!spu)
-		return;
-	needs_idle = get_active_spu(spu);
-	unbind_context(spu, ctx);
-	if (needs_idle)
-		put_idle_spu(spu);
+	if (spu) {
+		spu_unbind_context(spu, ctx);
+		spu_reschedule(spu);
+	}
 }
 
+/**
+ * spu_yield -  yield a physical spu if others are waiting
+ * @ctx:	spu context to yield
+ *
+ * Check if there is a higher priority context waiting and if yes
+ * unbind @ctx from the physical spu and schedule the highest
+ * priority context to run on the freed physical spu instead.
+ */
 void spu_yield(struct spu_context *ctx)
 {
 	struct spu *spu;
 	int need_yield = 0;
 
-	down_write(&ctx->state_sema);
-	spu = ctx->spu;
-	if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) {
-		pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number);
-		spu_deactivate(ctx);
-		ctx->state = SPU_STATE_SAVED;
-		need_yield = 1;
-	} else if (spu) {
-		spu->prio = MAX_PRIO;
+	if (mutex_trylock(&ctx->state_mutex)) {
+		if ((spu = ctx->spu) != NULL) {
+			int best = sched_find_first_bit(spu_prio->bitmap);
+			if (best < MAX_PRIO) {
+				pr_debug("%s: yielding SPU %d NODE %d\n",
+					 __FUNCTION__, spu->number, spu->node);
+				spu_deactivate(ctx);
+				need_yield = 1;
+			}
+		}
+		mutex_unlock(&ctx->state_mutex);
 	}
-	up_write(&ctx->state_sema);
 	if (unlikely(need_yield))
 		yield();
 }
 
 int __init spu_sched_init(void)
 {
-	struct spu_runqueue *rq;
-	struct spu *spu;
 	int i;
 
-	rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL);
-	if (!rq) {
-		printk(KERN_WARNING "%s: Unable to allocate runqueues.\n",
+	spu_sched_wq = create_singlethread_workqueue("spusched");
+	if (!spu_sched_wq)
+		return 1;
+
+	spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
+	if (!spu_prio) {
+		printk(KERN_WARNING "%s: Unable to allocate priority queue.\n",
 		       __FUNCTION__);
+		       destroy_workqueue(spu_sched_wq);
 		return 1;
 	}
-	memset(rq, 0, sizeof(struct spu_runqueue));
-	init_MUTEX(&rq->sem);
-	INIT_LIST_HEAD(&rq->active_list);
-	INIT_LIST_HEAD(&rq->idle_list);
-	rq->nr_active = 0;
-	rq->nr_idle = 0;
-	rq->nr_switches = 0;
-	atomic_set(&rq->prio.nr_blocked, 0);
 	for (i = 0; i < MAX_PRIO; i++) {
-		init_waitqueue_head(&rq->prio.waitq[i]);
-		__clear_bit(i, rq->prio.bitmap);
+		INIT_LIST_HEAD(&spu_prio->runq[i]);
+		__clear_bit(i, spu_prio->bitmap);
 	}
-	__set_bit(MAX_PRIO, rq->prio.bitmap);
-	for (;;) {
-		spu = spu_alloc();
-		if (!spu)
-			break;
-		pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number);
-		add_idle(rq, spu);
-		spu->rq = rq;
-		spu->timestamp = jiffies;
-	}
-	if (!rq->nr_idle) {
-		printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__);
-		kfree(rq);
-		return 1;
+	__set_bit(MAX_PRIO, spu_prio->bitmap);
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		mutex_init(&spu_prio->active_mutex[i]);
+		INIT_LIST_HEAD(&spu_prio->active_list[i]);
 	}
+	spin_lock_init(&spu_prio->runq_lock);
 	return 0;
 }
 
 void __exit spu_sched_exit(void)
 {
-	struct spu_runqueue *rq = spu_rq();
-	struct spu *spu;
+	struct spu *spu, *tmp;
+	int node;
 
-	if (!rq) {
-		printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__);
-		return;
-	}
-	while (rq->nr_idle > 0) {
-		spu = del_idle(rq);
-		if (!spu)
-			break;
-		spu_free(spu);
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		mutex_lock(&spu_prio->active_mutex[node]);
+		list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
+					 list) {
+			list_del_init(&spu->list);
+			spu_free(spu);
+		}
+		mutex_unlock(&spu_prio->active_mutex[node]);
 	}
-	kfree(rq);
+	kfree(spu_prio);
+	destroy_workqueue(spu_sched_wq);
 }
Index: b/arch/powerpc/platforms/cell/spufs/spufs.h
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -23,12 +23,13 @@
 #define SPUFS_H
 
 #include <linux/kref.h>
-#include <linux/rwsem.h>
+#include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/fs.h>
 
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
+#include <asm/spu_info.h>
 
 /* The magic number for our file system */
 enum {
@@ -36,8 +37,12 @@ enum {
 };
 
 struct spu_context_ops;
+struct spu_gang;
 
-#define SPU_CONTEXT_PREEMPT          0UL
+/* ctx->sched_flags */
+enum {
+	SPU_SCHED_WAKE = 0, /* currently unused */
+};
 
 struct spu_context {
 	struct spu *spu;		  /* pointer to a physical SPU */
@@ -48,9 +53,12 @@ struct spu_context {
 	struct address_space *cntl; 	   /* 'control' area mappings. */
 	struct address_space *signal1; 	   /* 'signal1' area mappings. */
 	struct address_space *signal2; 	   /* 'signal2' area mappings. */
+	struct address_space *mss; 	   /* 'mss' area mappings. */
+	struct address_space *psmap; 	   /* 'psmap' area mappings. */
+	u64 object_id;		   /* user space pointer for oprofile */
 
 	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
-	struct rw_semaphore state_sema;
+	struct mutex state_mutex;
 	struct semaphore run_sema;
 
 	struct mm_struct *owner;
@@ -66,7 +74,26 @@ struct spu_context {
 	u32 tagwait;
 	struct spu_context_ops *ops;
 	struct work_struct reap_work;
-	u64 flags;
+	unsigned long flags;
+	unsigned long event_return;
+
+	struct list_head gang_list;
+	struct spu_gang *gang;
+
+	/* scheduler fields */
+ 	struct list_head rq;
+	struct work_struct sched_work;
+	unsigned long sched_flags;
+	unsigned long rt_priority;
+	int policy;
+	int prio;
+};
+
+struct spu_gang {
+	struct list_head list;
+	struct mutex mutex;
+	struct kref kref;
+	int contexts;
 };
 
 struct mfc_dma_command {
@@ -100,13 +127,19 @@ struct spu_context_ops {
 	void (*npc_write) (struct spu_context * ctx, u32 data);
 	 u32(*status_read) (struct spu_context * ctx);
 	char*(*get_ls) (struct spu_context * ctx);
+	 u32 (*runcntl_read) (struct spu_context * ctx);
 	void (*runcntl_write) (struct spu_context * ctx, u32 data);
-	void (*runcntl_stop) (struct spu_context * ctx);
+	void (*master_start) (struct spu_context * ctx);
+	void (*master_stop) (struct spu_context * ctx);
 	int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode);
 	u32 (*read_mfc_tagstatus)(struct spu_context * ctx);
 	u32 (*get_mfc_free_elements)(struct spu_context *ctx);
-	int (*send_mfc_command)(struct spu_context *ctx,
-					struct mfc_dma_command *cmd);
+	int (*send_mfc_command)(struct spu_context * ctx,
+				struct mfc_dma_command * cmd);
+	void (*dma_info_read) (struct spu_context * ctx,
+			       struct spu_dma_info * info);
+	void (*proxydma_info_read) (struct spu_context * ctx,
+				    struct spu_proxydma_info * info);
 };
 
 extern struct spu_context_ops spu_hw_ops;
@@ -114,39 +147,62 @@ extern struct spu_context_ops spu_backin
 
 struct spufs_inode_info {
 	struct spu_context *i_ctx;
+	struct spu_gang *i_gang;
 	struct inode vfs_inode;
 };
 #define SPUFS_I(inode) \
 	container_of(inode, struct spufs_inode_info, vfs_inode)
 
 extern struct tree_descr spufs_dir_contents[];
+extern struct tree_descr spufs_dir_nosched_contents[];
 
 /* system call implementation */
 long spufs_run_spu(struct file *file,
 		   struct spu_context *ctx, u32 *npc, u32 *status);
-long spufs_create_thread(struct nameidata *nd,
+long spufs_create(struct nameidata *nd,
 			 unsigned int flags, mode_t mode);
-extern struct file_operations spufs_context_fops;
+extern const struct file_operations spufs_context_fops;
+
+/* gang management */
+struct spu_gang *alloc_spu_gang(void);
+struct spu_gang *get_spu_gang(struct spu_gang *gang);
+int put_spu_gang(struct spu_gang *gang);
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
 
 /* context management */
-struct spu_context * alloc_spu_context(void);
+static inline void spu_acquire(struct spu_context *ctx)
+{
+	mutex_lock(&ctx->state_mutex);
+}
+
+static inline void spu_release(struct spu_context *ctx)
+{
+	mutex_unlock(&ctx->state_mutex);
+}
+
+struct spu_context * alloc_spu_context(struct spu_gang *gang);
 void destroy_spu_context(struct kref *kref);
 struct spu_context * get_spu_context(struct spu_context *ctx);
 int put_spu_context(struct spu_context *ctx);
 void spu_unmap_mappings(struct spu_context *ctx);
 
 void spu_forget(struct spu_context *ctx);
-void spu_acquire(struct spu_context *ctx);
-void spu_release(struct spu_context *ctx);
-int spu_acquire_runnable(struct spu_context *ctx);
+int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags);
 void spu_acquire_saved(struct spu_context *ctx);
+int spu_acquire_exclusive(struct spu_context *ctx);
 
-int spu_activate(struct spu_context *ctx, u64 flags);
+int spu_activate(struct spu_context *ctx, unsigned long flags);
 void spu_deactivate(struct spu_context *ctx);
 void spu_yield(struct spu_context *ctx);
+void spu_start_tick(struct spu_context *ctx);
+void spu_stop_tick(struct spu_context *ctx);
+void spu_sched_tick(void *data);
 int __init spu_sched_init(void);
 void __exit spu_sched_exit(void);
 
+extern char *isolated_loader;
+
 /*
  * spufs_wait
  * 	Same as wait_event_interruptible(), except that here
@@ -183,5 +239,17 @@ void spufs_ibox_callback(struct spu *spu
 void spufs_wbox_callback(struct spu *spu);
 void spufs_stop_callback(struct spu *spu);
 void spufs_mfc_callback(struct spu *spu);
+void spufs_dma_callback(struct spu *spu, int type);
+
+extern struct spu_coredump_calls spufs_coredump_calls;
+struct spufs_coredump_reader {
+	char *name;
+	ssize_t (*read)(struct spu_context *ctx,
+			char __user *buffer, size_t size, loff_t *pos);
+	u64 (*get)(void *data);
+	size_t size;
+};
+extern struct spufs_coredump_reader spufs_coredump_read[];
+extern int spufs_coredump_num_notes;
 
 #endif
Index: b/arch/powerpc/platforms/cell/spufs/switch.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -102,7 +102,7 @@ static inline int check_spu_isolate(stru
 	 *     saved at this time.
 	 */
 	isolate_state = SPU_STATUS_ISOLATED_STATE |
-	    SPU_STATUS_ISOLATED_LOAD_STAUTUS | SPU_STATUS_ISOLATED_EXIT_STAUTUS;
+	    SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS;
 	return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
 }
 
@@ -468,26 +468,6 @@ static inline void wait_purge_complete(s
 			 MFC_CNTL_PURGE_DMA_COMPLETE);
 }
 
-static inline void save_mfc_slbs(struct spu_state *csa, struct spu *spu)
-{
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	int i;
-
-	/* Save, Step 29:
-	 *     If MFC_SR1[R]='1', save SLBs in CSA.
-	 */
-	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) {
-		csa->priv2.slb_index_W = in_be64(&priv2->slb_index_W);
-		for (i = 0; i < 8; i++) {
-			out_be64(&priv2->slb_index_W, i);
-			eieio();
-			csa->slb_esid_RW[i] = in_be64(&priv2->slb_esid_RW);
-			csa->slb_vsid_RW[i] = in_be64(&priv2->slb_vsid_RW);
-			eieio();
-		}
-	}
-}
-
 static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
 {
 	/* Save, Step 30:
@@ -708,20 +688,6 @@ static inline void resume_mfc_queue(stru
 	out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
 }
 
-static inline void invalidate_slbs(struct spu_state *csa, struct spu *spu)
-{
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-
-	/* Save, Step 45:
-	 * Restore, Step 19:
-	 *     If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All.
-	 */
-	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) {
-		out_be64(&priv2->slb_invalidate_all_W, 0UL);
-		eieio();
-	}
-}
-
 static inline void get_kernel_slb(u64 ea, u64 slb[2])
 {
 	u64 llp;
@@ -765,7 +731,7 @@ static inline void setup_mfc_slbs(struct
 	 *     MFC_SR1[R]=1 (in other words, assume that
 	 *     translation is desired by OS environment).
 	 */
-	invalidate_slbs(csa, spu);
+	spu_invalidate_slbs(spu);
 	get_kernel_slb((unsigned long)&spu_save_code[0], code_slb);
 	get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb);
 	load_mfc_slb(spu, code_slb, 0);
@@ -1046,12 +1012,12 @@ static inline int suspend_spe(struct spu
 	 */
 	if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) {
 		if (in_be32(&prob->spu_status_R) &
-		    SPU_STATUS_ISOLATED_EXIT_STAUTUS) {
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
 			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
 					SPU_STATUS_RUNNING);
 		}
 		if ((in_be32(&prob->spu_status_R) &
-		     SPU_STATUS_ISOLATED_LOAD_STAUTUS)
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
 		    || (in_be32(&prob->spu_status_R) &
 			SPU_STATUS_ISOLATED_STATE)) {
 			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
@@ -1085,7 +1051,7 @@ static inline void clear_spu_status(stru
 	 */
 	if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) {
 		if (in_be32(&prob->spu_status_R) &
-		    SPU_STATUS_ISOLATED_EXIT_STAUTUS) {
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
 			spu_mfc_sr1_set(spu,
 					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
 			eieio();
@@ -1095,7 +1061,7 @@ static inline void clear_spu_status(stru
 					SPU_STATUS_RUNNING);
 		}
 		if ((in_be32(&prob->spu_status_R) &
-		     SPU_STATUS_ISOLATED_LOAD_STAUTUS)
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
 		    || (in_be32(&prob->spu_status_R) &
 			SPU_STATUS_ISOLATED_STATE)) {
 			spu_mfc_sr1_set(spu,
@@ -1718,27 +1684,6 @@ static inline void check_ppuint_mb_stat(
 	}
 }
 
-static inline void restore_mfc_slbs(struct spu_state *csa, struct spu *spu)
-{
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	int i;
-
-	/* Restore, Step 68:
-	 *     If MFC_SR1[R]='1', restore SLBs from CSA.
-	 */
-	if (csa->priv1.mfc_sr1_RW & MFC_STATE1_RELOCATE_MASK) {
-		for (i = 0; i < 8; i++) {
-			out_be64(&priv2->slb_index_W, i);
-			eieio();
-			out_be64(&priv2->slb_esid_RW, csa->slb_esid_RW[i]);
-			out_be64(&priv2->slb_vsid_RW, csa->slb_vsid_RW[i]);
-			eieio();
-		}
-		out_be64(&priv2->slb_index_W, csa->priv2.slb_index_W);
-		eieio();
-	}
-}
-
 static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
 {
 	/* Restore, Step 69:
@@ -1779,6 +1724,15 @@ static inline void restore_mfc_cntl(stru
 	 */
 	out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW);
 	eieio();
+	/*
+	 * FIXME: this is to restart a DMA that we were processing
+	 *        before the save. better remember the fault information
+	 *        in the csa instead.
+	 */
+	if ((csa->priv2.mfc_control_RW & MFC_CNTL_SUSPEND_DMA_QUEUE_MASK)) {
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+		eieio();
+	}
 }
 
 static inline void enable_user_access(struct spu_state *csa, struct spu *spu)
@@ -1866,7 +1820,6 @@ static void save_csa(struct spu_state *p
 	set_mfc_tclass_id(prev, spu);	/* Step 26. */
 	purge_mfc_queue(prev, spu);	/* Step 27. */
 	wait_purge_complete(prev, spu);	/* Step 28. */
-	save_mfc_slbs(prev, spu);	/* Step 29. */
 	setup_mfc_sr1(prev, spu);	/* Step 30. */
 	save_spu_npc(prev, spu);	/* Step 31. */
 	save_spu_privcntl(prev, spu);	/* Step 32. */
@@ -1907,6 +1860,51 @@ static void save_lscsa(struct spu_state 
 	wait_spu_stopped(prev, spu);	/* Step 57. */
 }
 
+static void force_spu_isolate_exit(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Stop SPE execution and wait for completion. */
+	out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	iobarrier_rw();
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	/* Restart SPE master runcntl. */
+	spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+	iobarrier_w();
+
+	/* Initiate isolate exit request and wait for completion. */
+	out_be64(&priv2->spu_privcntl_RW, 4LL);
+	iobarrier_w();
+	out_be32(&prob->spu_runcntl_RW, 2);
+	iobarrier_rw();
+	POLL_WHILE_FALSE((in_be32(&prob->spu_status_R)
+				& SPU_STATUS_STOPPED_BY_STOP));
+
+	/* Reset load request to normal. */
+	out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL);
+	iobarrier_w();
+}
+
+/**
+ * stop_spu_isolate
+ *	Check SPU run-control state and force isolated
+ *	exit function as necessary.
+ */
+static void stop_spu_isolate(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) {
+		/* The SPU is in isolated state; the only way
+		 * to get it out is to perform an isolated
+		 * exit (clean) operation.
+		 */
+		force_spu_isolate_exit(spu);
+	}
+}
+
 static void harvest(struct spu_state *prev, struct spu *spu)
 {
 	/*
@@ -1919,6 +1917,7 @@ static void harvest(struct spu_state *pr
 	inhibit_user_access(prev, spu);	        /* Step 3.  */
 	terminate_spu_app(prev, spu);	        /* Step 4.  */
 	set_switch_pending(prev, spu);	        /* Step 5.  */
+	stop_spu_isolate(spu);			/* NEW.     */
 	remove_other_spu_access(prev, spu);	/* Step 6.  */
 	suspend_mfc(prev, spu);	                /* Step 7.  */
 	wait_suspend_mfc_complete(prev, spu);	/* Step 8.  */
@@ -1932,7 +1931,7 @@ static void harvest(struct spu_state *pr
 	reset_spu_privcntl(prev, spu);	        /* Step 16. */
 	reset_spu_lslr(prev, spu);              /* Step 17. */
 	setup_mfc_sr1(prev, spu);	        /* Step 18. */
-	invalidate_slbs(prev, spu);	        /* Step 19. */
+	spu_invalidate_slbs(spu);        	/* Step 19. */
 	reset_ch_part1(prev, spu);	        /* Step 20. */
 	reset_ch_part2(prev, spu);	        /* Step 21. */
 	enable_interrupts(prev, spu);	        /* Step 22. */
@@ -2000,7 +1999,7 @@ static void restore_csa(struct spu_state
 	restore_spu_mb(next, spu);	        /* Step 65. */
 	check_ppu_mb_stat(next, spu);	        /* Step 66. */
 	check_ppuint_mb_stat(next, spu);	/* Step 67. */
-	restore_mfc_slbs(next, spu);	        /* Step 68. */
+	spu_invalidate_slbs(spu);		/* Modified Step 68. */
 	restore_mfc_sr1(next, spu);	        /* Step 69. */
 	restore_other_spu_access(next, spu);	/* Step 70. */
 	restore_spu_runcntl(next, spu);	        /* Step 71. */
@@ -2087,11 +2086,11 @@ int spu_save(struct spu_state *prev, str
 	acquire_spu_lock(spu);	        /* Step 1.     */
 	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
 	release_spu_lock(spu);
-	if (rc) {
+	if (rc != 0 && rc != 2 && rc != 6) {
 		panic("%s failed on SPU[%d], rc=%d.\n",
 		      __func__, spu->number, rc);
 	}
-	return rc;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(spu_save);
 
@@ -2156,9 +2155,6 @@ static void init_priv1(struct spu_state 
 	    MFC_STATE1_PROBLEM_STATE_MASK |
 	    MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK;
 
-	/* Set storage description.  */
-	csa->priv1.mfc_sdr_RW = mfspr(SPRN_SDR1);
-
 	/* Enable OS-specific set of interrupts. */
 	csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR |
 	    CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR |
Index: b/arch/powerpc/platforms/cell/spufs/syscalls.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -38,7 +38,7 @@ static long do_spu_run(struct file *filp
 	u32 npc, status;
 
 	ret = -EFAULT;
-	if (get_user(npc, unpc) || get_user(status, ustatus))
+	if (get_user(npc, unpc))
 		goto out;
 
 	/* check if this file was created by spu_create */
@@ -49,7 +49,10 @@ static long do_spu_run(struct file *filp
 	i = SPUFS_I(filp->f_dentry->d_inode);
 	ret = spufs_run_spu(filp, i->i_ctx, &npc, &status);
 
-	if (put_user(npc, unpc) || put_user(status, ustatus))
+	if (put_user(npc, unpc))
+		ret = -EFAULT;
+
+	if (ustatus && put_user(status, ustatus))
 		ret = -EFAULT;
 out:
 	return ret;
@@ -87,7 +90,7 @@ asmlinkage long sys_spu_create(const cha
 		ret = path_lookup(tmp, LOOKUP_PARENT|
 				LOOKUP_OPEN|LOOKUP_CREATE, &nd);
 		if (!ret) {
-			ret = spufs_create_thread(&nd, flags, mode);
+			ret = spufs_create(&nd, flags, mode);
 			path_release(&nd);
 		}
 		putname(tmp);
Index: b/include/asm-powerpc/spu.h
===================================================================
--- a/include/asm-powerpc/spu.h
+++ b/include/asm-powerpc/spu.h
@@ -104,21 +104,20 @@
 
 struct spu_context;
 struct spu_runqueue;
+struct device_node;
 
 struct spu {
-	char *name;
+	const char *name;
 	unsigned long local_store_phys;
 	u8 *local_store;
 	unsigned long problem_phys;
 	struct spu_problem __iomem *problem;
-	struct spu_priv1 __iomem *priv1;
 	struct spu_priv2 __iomem *priv2;
 	struct list_head list;
 	struct list_head sched_list;
+	struct list_head full_list;
 	int number;
-	int nid;
 	unsigned int irqs[3];
-	u32 isrc;
 	u32 node;
 	u64 flags;
 	u64 dar;
@@ -130,7 +129,6 @@ struct spu {
 	struct spu_runqueue *rq;
 	unsigned long long timestamp;
 	pid_t pid;
-	int prio;
 	int class_0_pending;
 	spinlock_t register_lock;
 
@@ -138,20 +136,42 @@ struct spu {
 	void (* ibox_callback)(struct spu *spu);
 	void (* stop_callback)(struct spu *spu);
 	void (* mfc_callback)(struct spu *spu);
+	void (* dma_callback)(struct spu *spu, int type);
 
 	char irq_c0[8];
 	char irq_c1[8];
 	char irq_c2[8];
 
+	u64 spe_id;
+
+	void* pdata; /* platform private data */
+
+	/* of based platforms only */
+	struct device_node *devnode;
+
+	/* native only */
+	struct spu_priv1 __iomem *priv1;
+
+	/* beat only */
+	u64 shadow_int_mask_RW[3];
+
 	struct sys_device sysdev;
 };
 
 struct spu *spu_alloc(void);
+struct spu *spu_alloc_node(int node);
 void spu_free(struct spu *spu);
 int spu_irq_class_0_bottom(struct spu *spu);
 int spu_irq_class_1_bottom(struct spu *spu);
 void spu_irq_setaffinity(struct spu *spu, int cpu);
 
+extern void spu_invalidate_slbs(struct spu *spu);
+extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
+
+/* Calls from the memory management to the SPU */
+struct mm_struct;
+extern void spu_flush_all_slbs(struct mm_struct *mm);
+
 /* system callbacks from the SPU */
 struct spu_syscall_block {
 	u64 nr_ret;
@@ -160,6 +180,7 @@ struct spu_syscall_block {
 extern long spu_sys_callback(struct spu_syscall_block *s);
 
 /* syscalls implemented in spufs */
+struct file;
 extern struct spufs_calls {
 	asmlinkage long (*create_thread)(const char __user *name,
 					unsigned int flags, mode_t mode);
@@ -168,6 +189,31 @@ extern struct spufs_calls {
 	struct module *owner;
 } spufs_calls;
 
+/* coredump calls implemented in spufs */
+struct spu_coredump_calls {
+	asmlinkage int (*arch_notes_size)(void);
+	asmlinkage void (*arch_write_notes)(struct file *file);
+	struct module *owner;
+};
+
+/* return status from spu_run, same as in libspe */
+#define SPE_EVENT_DMA_ALIGNMENT		0x0008	/*A DMA alignment error */
+#define SPE_EVENT_SPE_ERROR		0x0010	/*An illegal instruction error*/
+#define SPE_EVENT_SPE_DATA_SEGMENT	0x0020	/*A DMA segmentation error    */
+#define SPE_EVENT_SPE_DATA_STORAGE	0x0040	/*A DMA storage error */
+#define SPE_EVENT_INVALID_DMA		0x0800	/* Invalid MFC DMA */
+
+/*
+ * Flags for sys_spu_create.
+ */
+#define SPU_CREATE_EVENTS_ENABLED	0x0001
+#define SPU_CREATE_GANG			0x0002
+#define SPU_CREATE_NOSCHED		0x0004
+#define SPU_CREATE_ISOLATE		0x0008
+
+#define SPU_CREATE_FLAG_ALL		0x000f /* mask of all valid flags */
+
+
 #ifdef CONFIG_SPU_FS_MODULE
 int register_spu_syscalls(struct spufs_calls *calls);
 void unregister_spu_syscalls(struct spufs_calls *calls);
@@ -181,6 +227,34 @@ static inline void unregister_spu_syscal
 }
 #endif /* MODULE */
 
+int register_arch_coredump_calls(struct spu_coredump_calls *calls);
+void unregister_arch_coredump_calls(struct spu_coredump_calls *calls);
+
+int spu_add_sysdev_attr(struct sysdev_attribute *attr);
+void spu_remove_sysdev_attr(struct sysdev_attribute *attr);
+
+int spu_add_sysdev_attr_group(struct attribute_group *attrs);
+void spu_remove_sysdev_attr_group(struct attribute_group *attrs);
+
+
+/*
+ * Notifier blocks:
+ *
+ * oprofile can get notified when a context switch is performed
+ * on an spe. The notifer function that gets called is passed
+ * a pointer to the SPU structure as well as the object-id that
+ * identifies the binary running on that SPU now.
+ *
+ * For a context save, the object-id that is passed is zero,
+ * identifying that the kernel will run from that moment on.
+ *
+ * For a context restore, the object-id is the value written
+ * to object-id spufs file from user space and the notifer
+ * function can assume that spu->ctx is valid.
+ */
+struct notifier_block;
+int spu_switch_event_register(struct notifier_block * n);
+int spu_switch_event_unregister(struct notifier_block * n);
 
 /*
  * This defines the Local Store, Problem Area and Privlege Area of an SPU.
@@ -241,6 +315,7 @@ struct spu_problem {
 	u32 spu_runcntl_RW;					/* 0x401c */
 #define SPU_RUNCNTL_STOP	0L
 #define SPU_RUNCNTL_RUNNABLE	1L
+#define SPU_RUNCNTL_ISOLATE	2L
 	u8  pad_0x4020_0x4024[0x4];				/* 0x4020 */
 	u32 spu_status_R;					/* 0x4024 */
 #define SPU_STOP_STATUS_SHIFT           16
@@ -253,8 +328,8 @@ struct spu_problem {
 #define SPU_STATUS_INVALID_INSTR        0x20
 #define SPU_STATUS_INVALID_CH           0x40
 #define SPU_STATUS_ISOLATED_STATE       0x80
-#define SPU_STATUS_ISOLATED_LOAD_STAUTUS 0x200
-#define SPU_STATUS_ISOLATED_EXIT_STAUTUS 0x400
+#define SPU_STATUS_ISOLATED_LOAD_STATUS 0x200
+#define SPU_STATUS_ISOLATED_EXIT_STATUS 0x400
 	u8  pad_0x4028_0x402c[0x4];				/* 0x4028 */
 	u32 spu_spe_R;						/* 0x402c */
 	u8  pad_0x4030_0x4034[0x4];				/* 0x4030 */
Index: b/include/asm-powerpc/spu_csa.h
===================================================================
--- a/include/asm-powerpc/spu_csa.h
+++ b/include/asm-powerpc/spu_csa.h
@@ -151,7 +151,6 @@ struct spu_priv1_collapsed {
 	u64 mfc_fir_chkstp_enable_RW;
 	u64 smf_sbi_signal_sel;
 	u64 smf_ato_signal_sel;
-	u64 mfc_sdr_RW;
 	u64 tlb_index_hint_RO;
 	u64 tlb_index_W;
 	u64 tlb_vpn_RW;
@@ -222,8 +221,6 @@ struct spu_priv2_collapsed {
  * @spu_chnlcnt_RW: Array of saved channel counts.
  * @spu_chnldata_RW: Array of saved channel data.
  * @suspend_time: Time stamp when decrementer disabled.
- * @slb_esid_RW: Array of saved SLB esid entries.
- * @slb_vsid_RW: Array of saved SLB vsid entries.
  *
  * Structure representing the whole of the SPU
  * context save area (CSA).  This struct contains
@@ -246,8 +243,6 @@ struct spu_state {
 	u32 spu_mailbox_data[4];
 	u32 pu_mailbox_data[1];
 	unsigned long suspend_time;
-	u64 slb_esid_RW[8];
-	u64 slb_vsid_RW[8];
 	spinlock_t register_lock;
 };
 
Index: b/include/asm-powerpc/spu_info.h
===================================================================
--- /dev/null
+++ b/include/asm-powerpc/spu_info.h
@@ -0,0 +1,54 @@
+/*
+ * SPU info structures
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _SPU_INFO_H
+#define _SPU_INFO_H
+
+#ifdef __KERNEL__
+#include <asm/spu.h>
+#include <linux/types.h>
+#else
+struct mfc_cq_sr {
+	__u64 mfc_cq_data0_RW;
+	__u64 mfc_cq_data1_RW;
+	__u64 mfc_cq_data2_RW;
+	__u64 mfc_cq_data3_RW;
+};
+#endif /* __KERNEL__ */
+
+struct spu_dma_info {
+	__u64 dma_info_type;
+	__u64 dma_info_mask;
+	__u64 dma_info_status;
+	__u64 dma_info_stall_and_notify;
+	__u64 dma_info_atomic_command_status;
+	struct mfc_cq_sr dma_info_command_data[16];
+};
+
+struct spu_proxydma_info {
+	__u64 proxydma_info_type;
+	__u64 proxydma_info_mask;
+	__u64 proxydma_info_status;
+	struct mfc_cq_sr proxydma_info_command_data[8];
+};
+
+#endif
Index: b/include/asm-powerpc/spu_priv1.h
===================================================================
--- a/include/asm-powerpc/spu_priv1.h
+++ b/include/asm-powerpc/spu_priv1.h
@@ -21,12 +21,13 @@
 #define _SPU_PRIV1_H
 #if defined(__KERNEL__)
 
+#include <linux/types.h>
+
 struct spu;
 
 /* access to priv1 registers */
 
-struct spu_priv1_ops
-{
+struct spu_priv1_ops {
 	void (*int_mask_and) (struct spu *spu, int class, u64 mask);
 	void (*int_mask_or) (struct spu *spu, int class, u64 mask);
 	void (*int_mask_set) (struct spu *spu, int class, u64 mask);
@@ -37,7 +38,7 @@ struct spu_priv1_ops
 	u64 (*mfc_dar_get) (struct spu *spu);
 	u64 (*mfc_dsisr_get) (struct spu *spu);
 	void (*mfc_dsisr_set) (struct spu *spu, u64 dsisr);
-	void (*mfc_sdr_set) (struct spu *spu, u64 sdr);
+	void (*mfc_sdr_setup) (struct spu *spu);
 	void (*mfc_sr1_set) (struct spu *spu, u64 sr1);
 	u64 (*mfc_sr1_get) (struct spu *spu);
 	void (*mfc_tclass_id_set) (struct spu *spu, u64 tclass_id);
@@ -112,9 +113,9 @@ spu_mfc_dsisr_set (struct spu *spu, u64 
 }
 
 static inline void
-spu_mfc_sdr_set (struct spu *spu, u64 sdr)
+spu_mfc_sdr_setup (struct spu *spu)
 {
-	spu_priv1_ops->mfc_sdr_set(spu, sdr);
+	spu_priv1_ops->mfc_sdr_setup(spu);
 }
 
 static inline void
@@ -171,12 +172,43 @@ spu_resource_allocation_enable_get (stru
 	return spu_priv1_ops->resource_allocation_enable_get(spu);
 }
 
-/* The declarations folowing are put here for convenience
- * and only intended to be used by the platform setup code
- * for initializing spu_priv1_ops.
+/* spu management abstraction */
+
+struct spu_management_ops {
+	int (*enumerate_spus)(int (*fn)(void *data));
+	int (*create_spu)(struct spu *spu, void *data);
+	int (*destroy_spu)(struct spu *spu);
+};
+
+extern const struct spu_management_ops* spu_management_ops;
+
+static inline int
+spu_enumerate_spus (int (*fn)(void *data))
+{
+	return spu_management_ops->enumerate_spus(fn);
+}
+
+static inline int
+spu_create_spu (struct spu *spu, void *data)
+{
+	return spu_management_ops->create_spu(spu, data);
+}
+
+static inline int
+spu_destroy_spu (struct spu *spu)
+{
+	return spu_management_ops->destroy_spu(spu);
+}
+
+/*
+ * The declarations folowing are put here for convenience
+ * and only intended to be used by the platform setup code.
  */
 
 extern const struct spu_priv1_ops spu_priv1_mmio_ops;
+extern const struct spu_priv1_ops spu_priv1_beat_ops;
+
+extern const struct spu_management_ops spu_management_of_ops;
 
 #endif /* __KERNEL__ */
 #endif
Index: b/arch/powerpc/platforms/cell/Makefile
===================================================================
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -10,6 +10,11 @@ endif
 spufs-modular-$(CONFIG_SPU_FS)		+= spu_syscalls.o
 spu-priv1-$(CONFIG_PPC_CELL_NATIVE)	+= spu_priv1_mmio.o
 
+spu-manage-$(CONFIG_PPC_CELL_NATIVE)	+= spu_manage.o
+
 obj-$(CONFIG_SPU_BASE)			+= spu_callbacks.o spu_base.o \
+					   spu_coredump.o \
 					   $(spufs-modular-m) \
-					   $(spu-priv1-y) spufs/
+					   $(spu-priv1-y) \
+					   $(spu-manage-y) \
+					   spufs/
Index: b/arch/powerpc/platforms/cell/spu_base.c
===================================================================
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -25,42 +25,91 @@
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/module.h>
-#include <linux/poll.h>
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
-
-#include <asm/io.h>
-#include <asm/prom.h>
+#include <linux/mm.h>
+#include <linux/io.h>
 #include <linux/mutex.h>
 #include <asm/spu.h>
 #include <asm/spu_priv1.h>
-#include <asm/mmu_context.h>
-
-#include "interrupt.h"
 
+const struct spu_management_ops *spu_management_ops;
 const struct spu_priv1_ops *spu_priv1_ops;
 
+static struct list_head spu_list[MAX_NUMNODES];
+static LIST_HEAD(spu_full_list);
+static DEFINE_MUTEX(spu_mutex);
+static spinlock_t spu_list_lock = SPIN_LOCK_UNLOCKED;
+
 EXPORT_SYMBOL_GPL(spu_priv1_ops);
 
+void spu_invalidate_slbs(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+		out_be64(&priv2->slb_invalidate_all_W, 0UL);
+}
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
+
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
+{
+	struct spu *spu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_list_lock, flags);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		if (spu->mm == mm)
+			spu_invalidate_slbs(spu);
+	}
+	spin_unlock_irqrestore(&spu_list_lock, flags);
+}
+
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+{
+	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+	/* Global TLBIE broadcast required with SPEs. */
+	__cpus_setall(&mm->cpu_vm_mask, nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_list_lock, flags);
+	spu->mm = mm;
+	spin_unlock_irqrestore(&spu_list_lock, flags);
+	if (mm)
+		mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
 static int __spu_trap_invalid_dma(struct spu *spu)
 {
 	pr_debug("%s\n", __FUNCTION__);
-	force_sig(SIGBUS, /* info, */ current);
+	spu->dma_callback(spu, SPE_EVENT_INVALID_DMA);
 	return 0;
 }
 
 static int __spu_trap_dma_align(struct spu *spu)
 {
 	pr_debug("%s\n", __FUNCTION__);
-	force_sig(SIGBUS, /* info, */ current);
+	spu->dma_callback(spu, SPE_EVENT_DMA_ALIGNMENT);
 	return 0;
 }
 
 static int __spu_trap_error(struct spu *spu)
 {
 	pr_debug("%s\n", __FUNCTION__);
-	force_sig(SIGILL, /* info, */ current);
+	spu->dma_callback(spu, SPE_EVENT_SPE_ERROR);
 	return 0;
 }
 
@@ -77,6 +126,7 @@ static int __spu_trap_data_seg(struct sp
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
 	struct mm_struct *mm = spu->mm;
 	u64 esid, vsid, llp;
+	int psize;
 
 	pr_debug("%s\n", __FUNCTION__);
 
@@ -87,26 +137,43 @@ static int __spu_trap_data_seg(struct sp
 		printk("%s: invalid access during switch!\n", __func__);
 		return 1;
 	}
-	if (!mm || (REGION_ID(ea) != USER_REGION_ID)) {
+	esid = (ea & ESID_MASK) | SLB_ESID_V;
+
+	switch(REGION_ID(ea)) {
+	case USER_REGION_ID:
+#ifdef CONFIG_HUGETLB_PAGE
+		if (in_hugepage_area(mm->context, ea))
+			psize = mmu_huge_psize;
+		else
+#endif
+			psize = mm->context.user_psize;
+		vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
+				SLB_VSID_USER;
+		break;
+	case VMALLOC_REGION_ID:
+		if (ea < VMALLOC_END)
+			psize = mmu_vmalloc_psize;
+		else
+			psize = mmu_io_psize;
+		vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
+			SLB_VSID_KERNEL;
+		break;
+	case KERNEL_REGION_ID:
+		psize = mmu_linear_psize;
+		vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
+			SLB_VSID_KERNEL;
+		break;
+	default:
 		/* Future: support kernel segments so that drivers
 		 * can use SPUs.
 		 */
 		pr_debug("invalid region access at %016lx\n", ea);
 		return 1;
 	}
-
-	esid = (ea & ESID_MASK) | SLB_ESID_V;
-#ifdef CONFIG_HUGETLB_PAGE
-	if (in_hugepage_area(mm->context, ea))
-		llp = mmu_psize_defs[mmu_huge_psize].sllp;
-	else
-#endif
-		llp = mmu_psize_defs[mmu_virtual_psize].sllp;
-	vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
-			SLB_VSID_USER | llp;
+	llp = mmu_psize_defs[psize].sllp;
 
 	out_be64(&priv2->slb_index_W, spu->slb_replace);
-	out_be64(&priv2->slb_vsid_RW, vsid);
+	out_be64(&priv2->slb_vsid_RW, vsid | llp);
 	out_be64(&priv2->slb_esid_RW, esid);
 
 	spu->slb_replace++;
@@ -160,9 +227,11 @@ int
 spu_irq_class_0_bottom(struct spu *spu)
 {
 	unsigned long stat, mask;
+	unsigned long flags;
 
 	spu->class_0_pending = 0;
 
+	spin_lock_irqsave(&spu->register_lock, flags);
 	mask = spu_int_mask_get(spu, 0);
 	stat = spu_int_stat_get(spu, 0);
 
@@ -178,6 +247,7 @@ spu_irq_class_0_bottom(struct spu *spu)
 		__spu_trap_error(spu);
 
 	spu_int_stat_clear(spu, 0, stat);
+	spin_unlock_irqrestore(&spu->register_lock, flags);
 
 	return (stat & 0x7) ? -EIO : 0;
 }
@@ -317,9 +387,6 @@ static void spu_free_irqs(struct spu *sp
 		free_irq(spu->irqs[2], spu);
 }
 
-static LIST_HEAD(spu_list);
-static DEFINE_MUTEX(spu_mutex);
-
 static void spu_init_channels(struct spu *spu)
 {
 	static const struct {
@@ -354,32 +421,41 @@ static void spu_init_channels(struct spu
 	}
 }
 
-struct spu *spu_alloc(void)
+struct spu *spu_alloc_node(int node)
 {
-	struct spu *spu;
+	struct spu *spu = NULL;
 
 	mutex_lock(&spu_mutex);
-	if (!list_empty(&spu_list)) {
-		spu = list_entry(spu_list.next, struct spu, list);
+	if (!list_empty(&spu_list[node])) {
+		spu = list_entry(spu_list[node].next, struct spu, list);
 		list_del_init(&spu->list);
-		pr_debug("Got SPU %x %d\n", spu->isrc, spu->number);
-	} else {
-		pr_debug("No SPU left\n");
-		spu = NULL;
+		pr_debug("Got SPU %d %d\n", spu->number, spu->node);
+		spu_init_channels(spu);
 	}
 	mutex_unlock(&spu_mutex);
 
-	if (spu)
-		spu_init_channels(spu);
+	return spu;
+}
+EXPORT_SYMBOL_GPL(spu_alloc_node);
+
+struct spu *spu_alloc(void)
+{
+	struct spu *spu = NULL;
+	int node;
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		spu = spu_alloc_node(node);
+		if (spu)
+			break;
+	}
 
 	return spu;
 }
-EXPORT_SYMBOL_GPL(spu_alloc);
 
 void spu_free(struct spu *spu)
 {
 	mutex_lock(&spu_mutex);
-	list_add_tail(&spu->list, &spu_list);
+	list_add_tail(&spu->list, &spu_list[spu->node]);
 	mutex_unlock(&spu_mutex);
 }
 EXPORT_SYMBOL_GPL(spu_free);
@@ -481,173 +557,65 @@ int spu_irq_class_1_bottom(struct spu *s
 	if (!error) {
 		spu_restart_dma(spu);
 	} else {
-		__spu_trap_invalid_dma(spu);
+		spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE);
 	}
 	return ret;
 }
 
-static int __init find_spu_node_id(struct device_node *spe)
-{
-	unsigned int *id;
-	struct device_node *cpu;
-	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
-	return id ? *id : 0;
-}
+struct sysdev_class spu_sysdev_class = {
+	set_kset_name("spu")
+};
 
-static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe,
-		const char *prop)
+int spu_add_sysdev_attr(struct sysdev_attribute *attr)
 {
-	static DEFINE_MUTEX(add_spumem_mutex);
-
-	struct address_prop {
-		unsigned long address;
-		unsigned int len;
-	} __attribute__((packed)) *p;
-	int proplen;
-
-	unsigned long start_pfn, nr_pages;
-	struct pglist_data *pgdata;
-	struct zone *zone;
-	int ret;
-
-	p = (void*)get_property(spe, prop, &proplen);
-	WARN_ON(proplen != sizeof (*p));
-
-	start_pfn = p->address >> PAGE_SHIFT;
-	nr_pages = ((unsigned long)p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
-	pgdata = NODE_DATA(spu->nid);
-	zone = pgdata->node_zones;
+	struct spu *spu;
+	mutex_lock(&spu_mutex);
 
-	/* XXX rethink locking here */
-	mutex_lock(&add_spumem_mutex);
-	ret = __add_pages(zone, start_pfn, nr_pages);
-	mutex_unlock(&add_spumem_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysdev_create_file(&spu->sysdev, attr);
 
-	return ret;
+	mutex_unlock(&spu_mutex);
+	return 0;
 }
+EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
 
-static void __iomem * __init map_spe_prop(struct spu *spu,
-		struct device_node *n, const char *name)
+int spu_add_sysdev_attr_group(struct attribute_group *attrs)
 {
-	struct address_prop {
-		unsigned long address;
-		unsigned int len;
-	} __attribute__((packed)) *prop;
-
-	void *p;
-	int proplen;
-	void* ret = NULL;
-	int err = 0;
-
-	p = get_property(n, name, &proplen);
-	if (proplen != sizeof (struct address_prop))
-		return NULL;
-
-	prop = p;
-
-	err = cell_spuprop_present(spu, n, name);
-	if (err && (err != -EEXIST))
-		goto out;
+	struct spu *spu;
+	mutex_lock(&spu_mutex);
 
-	ret = ioremap(prop->address, prop->len);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysfs_create_group(&spu->sysdev.kobj, attrs);
 
- out:
-	return ret;
+	mutex_unlock(&spu_mutex);
+	return 0;
 }
+EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
 
-static void spu_unmap(struct spu *spu)
-{
-	iounmap(spu->priv2);
-	iounmap(spu->priv1);
-	iounmap(spu->problem);
-	iounmap((u8 __iomem *)spu->local_store);
-}
 
-/* This function shall be abstracted for HV platforms */
-static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+void spu_remove_sysdev_attr(struct sysdev_attribute *attr)
 {
-	unsigned int isrc;
-	u32 *tmp;
-
-	/* Get the interrupt source unit from the device-tree */
-	tmp = (u32 *)get_property(np, "isrc", NULL);
-	if (!tmp)
-		return -ENODEV;
-	isrc = tmp[0];
-
-	/* Add the node number */
-	isrc |= spu->node << IIC_IRQ_NODE_SHIFT;
-	spu->isrc = isrc;
+	struct spu *spu;
+	mutex_lock(&spu_mutex);
 
-	/* Now map interrupts of all 3 classes */
-	spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
-	spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
-	spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysdev_remove_file(&spu->sysdev, attr);
 
-	/* Right now, we only fail if class 2 failed */
-	return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+	mutex_unlock(&spu_mutex);
 }
+EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr);
 
-static int __init spu_map_device(struct spu *spu, struct device_node *node)
+void spu_remove_sysdev_attr_group(struct attribute_group *attrs)
 {
-	char *prop;
-	int ret;
-
-	ret = -ENODEV;
-	spu->name = get_property(node, "name", NULL);
-	if (!spu->name)
-		goto out;
-
-	prop = get_property(node, "local-store", NULL);
-	if (!prop)
-		goto out;
-	spu->local_store_phys = *(unsigned long *)prop;
-
-	/* we use local store as ram, not io memory */
-	spu->local_store = (void __force *)
-		map_spe_prop(spu, node, "local-store");
-	if (!spu->local_store)
-		goto out;
-
-	prop = get_property(node, "problem", NULL);
-	if (!prop)
-		goto out_unmap;
-	spu->problem_phys = *(unsigned long *)prop;
-
-	spu->problem= map_spe_prop(spu, node, "problem");
-	if (!spu->problem)
-		goto out_unmap;
-
-	spu->priv1= map_spe_prop(spu, node, "priv1");
-	/* priv1 is not available on a hypervisor */
-
-	spu->priv2= map_spe_prop(spu, node, "priv2");
-	if (!spu->priv2)
-		goto out_unmap;
-	ret = 0;
-	goto out;
-
-out_unmap:
-	spu_unmap(spu);
-out:
-	return ret;
-}
-
-struct sysdev_class spu_sysdev_class = {
-	set_kset_name("spu")
-};
+	struct spu *spu;
+	mutex_lock(&spu_mutex);
 
-static ssize_t spu_show_isrc(struct sys_device *sysdev, char *buf)
-{
-	struct spu *spu = container_of(sysdev, struct spu, sysdev);
-	return sprintf(buf, "%d\n", spu->isrc);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysfs_remove_group(&spu->sysdev.kobj, attrs);
 
+	mutex_unlock(&spu_mutex);
 }
-static SYSDEV_ATTR(isrc, 0400, spu_show_isrc, NULL);
-
-extern int attach_sysdev_to_node(struct sys_device *dev, int nid);
+EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group);
 
 static int spu_create_sysdev(struct spu *spu)
 {
@@ -662,70 +630,62 @@ static int spu_create_sysdev(struct spu 
 		return ret;
 	}
 
-	if (spu->isrc != 0)
-		sysdev_create_file(&spu->sysdev, &attr_isrc);
-	sysfs_add_device_to_node(&spu->sysdev, spu->nid);
+	sysfs_add_device_to_node(&spu->sysdev, spu->node);
 
 	return 0;
 }
 
 static void spu_destroy_sysdev(struct spu *spu)
 {
-	sysdev_remove_file(&spu->sysdev, &attr_isrc);
-	sysfs_remove_device_from_node(&spu->sysdev, spu->nid);
+	sysfs_remove_device_from_node(&spu->sysdev, spu->node);
 	sysdev_unregister(&spu->sysdev);
 }
 
-static int __init create_spu(struct device_node *spe)
+static int __init create_spu(void *data)
 {
 	struct spu *spu;
 	int ret;
 	static int number;
+	unsigned long flags;
 
 	ret = -ENOMEM;
 	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
 	if (!spu)
 		goto out;
 
-	ret = spu_map_device(spu, spe);
+	spin_lock_init(&spu->register_lock);
+	mutex_lock(&spu_mutex);
+	spu->number = number++;
+	mutex_unlock(&spu_mutex);
+
+	ret = spu_create_spu(spu, data);
+
 	if (ret)
 		goto out_free;
 
-	spu->node = find_spu_node_id(spe);
-	spu->nid = of_node_to_nid(spe);
-	if (spu->nid == -1)
-		spu->nid = 0;
-	ret = spu_map_interrupts(spu, spe);
-	if (ret)
-		goto out_unmap;
-	spin_lock_init(&spu->register_lock);
-	spu_mfc_sdr_set(spu, mfspr(SPRN_SDR1));
+	spu_mfc_sdr_setup(spu);
 	spu_mfc_sr1_set(spu, 0x33);
-	mutex_lock(&spu_mutex);
-
-	spu->number = number++;
 	ret = spu_request_irqs(spu);
 	if (ret)
-		goto out_unmap;
+		goto out_destroy;
 
 	ret = spu_create_sysdev(spu);
 	if (ret)
 		goto out_free_irqs;
 
-	list_add(&spu->list, &spu_list);
+	mutex_lock(&spu_mutex);
+	spin_lock_irqsave(&spu_list_lock, flags);
+	list_add(&spu->list, &spu_list[spu->node]);
+	list_add(&spu->full_list, &spu_full_list);
+	spin_unlock_irqrestore(&spu_list_lock, flags);
 	mutex_unlock(&spu_mutex);
 
-	pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n",
-		spu->name, spu->isrc, spu->local_store,
-		spu->problem, spu->priv1, spu->priv2, spu->number);
 	goto out;
 
 out_free_irqs:
 	spu_free_irqs(spu);
-
-out_unmap:
-	mutex_unlock(&spu_mutex);
-	spu_unmap(spu);
+out_destroy:
+	spu_destroy_spu(spu);
 out_free:
 	kfree(spu);
 out:
@@ -735,19 +695,24 @@ out:
 static void destroy_spu(struct spu *spu)
 {
 	list_del_init(&spu->list);
+	list_del_init(&spu->full_list);
 
 	spu_destroy_sysdev(spu);
 	spu_free_irqs(spu);
-	spu_unmap(spu);
+	spu_destroy_spu(spu);
 	kfree(spu);
 }
 
 static void cleanup_spu_base(void)
 {
 	struct spu *spu, *tmp;
+	int node;
+
 	mutex_lock(&spu_mutex);
-	list_for_each_entry_safe(spu, tmp, &spu_list, list)
-		destroy_spu(spu);
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		list_for_each_entry_safe(spu, tmp, &spu_list[node], list)
+			destroy_spu(spu);
+	}
 	mutex_unlock(&spu_mutex);
 	sysdev_class_unregister(&spu_sysdev_class);
 }
@@ -755,37 +720,28 @@ module_exit(cleanup_spu_base);
 
 static int __init init_spu_base(void)
 {
-	struct device_node *node;
-	int ret;
+	int i, ret;
+
+	if (!spu_management_ops)
+		return 0;
 
 	/* create sysdev class for spus */
 	ret = sysdev_class_register(&spu_sysdev_class);
 	if (ret)
 		return ret;
 
-	ret = -ENODEV;
-	for (node = of_find_node_by_type(NULL, "spe");
-			node; node = of_find_node_by_type(node, "spe")) {
-		ret = create_spu(node);
-		if (ret) {
-			printk(KERN_WARNING "%s: Error initializing %s\n",
-				__FUNCTION__, node->name);
-			cleanup_spu_base();
-			break;
-		}
-	}
-	/* in some old firmware versions, the spe is called 'spc', so we
-	   look for that as well */
-	for (node = of_find_node_by_type(NULL, "spc");
-			node; node = of_find_node_by_type(node, "spc")) {
-		ret = create_spu(node);
-		if (ret) {
-			printk(KERN_WARNING "%s: Error initializing %s\n",
-				__FUNCTION__, node->name);
-			cleanup_spu_base();
-			break;
-		}
+	for (i = 0; i < MAX_NUMNODES; i++)
+		INIT_LIST_HEAD(&spu_list[i]);
+
+	ret = spu_enumerate_spus(create_spu);
+
+	if (ret) {
+		printk(KERN_WARNING "%s: Error initializing spus\n",
+			__FUNCTION__);
+		cleanup_spu_base();
+		return ret;
 	}
+
 	return ret;
 }
 module_init(init_spu_base);
Index: b/arch/powerpc/platforms/cell/spu_coredump.c
===================================================================
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_coredump.c
@@ -0,0 +1,81 @@
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+
+static struct spu_coredump_calls spu_coredump_calls;
+static DEFINE_MUTEX(spu_coredump_mutex);
+
+int arch_notes_size(void)
+{
+	long ret;
+	struct module *owner = spu_coredump_calls.owner;
+
+	ret = -ENOSYS;
+	mutex_lock(&spu_coredump_mutex);
+	if (owner && try_module_get(owner)) {
+		ret = spu_coredump_calls.arch_notes_size();
+		module_put(owner);
+	}
+	mutex_unlock(&spu_coredump_mutex);
+	return ret;
+}
+
+void arch_write_notes(struct file *file)
+{
+	struct module *owner = spu_coredump_calls.owner;
+
+	mutex_lock(&spu_coredump_mutex);
+	if (owner && try_module_get(owner)) {
+		spu_coredump_calls.arch_write_notes(file);
+		module_put(owner);
+	}
+	mutex_unlock(&spu_coredump_mutex);
+}
+
+int register_arch_coredump_calls(struct spu_coredump_calls *calls)
+{
+	if (spu_coredump_calls.owner)
+		return -EBUSY;
+
+	mutex_lock(&spu_coredump_mutex);
+	spu_coredump_calls.arch_notes_size = calls->arch_notes_size;
+	spu_coredump_calls.arch_write_notes = calls->arch_write_notes;
+	spu_coredump_calls.owner = calls->owner;
+	mutex_unlock(&spu_coredump_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_arch_coredump_calls);
+
+void unregister_arch_coredump_calls(struct spu_coredump_calls *calls)
+{
+	BUG_ON(spu_coredump_calls.owner != calls->owner);
+
+	mutex_lock(&spu_coredump_mutex);
+	spu_coredump_calls.owner = NULL;
+	mutex_unlock(&spu_coredump_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_arch_coredump_calls);
Index: b/arch/powerpc/platforms/cell/spu_manage.c
===================================================================
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -0,0 +1,366 @@
+/*
+ * spu management operations for of based platforms
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "interrupt.h"
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+	return spu->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static u64 __init find_spu_unit_number(struct device_node *spe)
+{
+	const unsigned int *prop;
+	int proplen;
+	prop = get_property(spe, "unit-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	prop = get_property(spe, "reg", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	return 0;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		iounmap(spu->priv1);
+	iounmap(spu->priv2);
+	iounmap(spu->problem);
+	iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+	struct device_node *np)
+{
+	unsigned int isrc;
+	const u32 *tmp;
+	int nid;
+
+	/* Get the interrupt source unit from the device-tree */
+	tmp = get_property(np, "isrc", NULL);
+	if (!tmp)
+		return -ENODEV;
+	isrc = tmp[0];
+
+	tmp = get_property(np->parent->parent, "node-id", NULL);
+	if (!tmp) {
+		printk(KERN_WARNING "%s: can't find node-id\n", __FUNCTION__);
+		nid = spu->node;
+	} else
+		nid = tmp[0];
+
+	/* Add the node number */
+	isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+	/* Now map interrupts of all 3 classes */
+	spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+	spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+	spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+	/* Right now, we only fail if class 2 failed */
+	return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+}
+
+static void __iomem * __init spu_map_prop_old(struct spu *spu,
+					      struct device_node *n,
+					      const char *name)
+{
+	const struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *prop;
+	int proplen;
+
+	prop = get_property(n, name, &proplen);
+	if (prop == NULL || proplen != sizeof (struct address_prop))
+		return NULL;
+
+	return ioremap(prop->address, prop->len);
+}
+
+static int __init spu_map_device_old(struct spu *spu)
+{
+	struct device_node *node = spu->devnode;
+	const char *prop;
+	int ret;
+
+	ret = -ENODEV;
+	spu->name = get_property(node, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	prop = get_property(node, "local-store", NULL);
+	if (!prop)
+		goto out;
+	spu->local_store_phys = *(unsigned long *)prop;
+
+	/* we use local store as ram, not io memory */
+	spu->local_store = (void __force *)
+		spu_map_prop_old(spu, node, "local-store");
+	if (!spu->local_store)
+		goto out;
+
+	prop = get_property(node, "problem", NULL);
+	if (!prop)
+		goto out_unmap;
+	spu->problem_phys = *(unsigned long *)prop;
+
+	spu->problem = spu_map_prop_old(spu, node, "problem");
+	if (!spu->problem)
+		goto out_unmap;
+
+	spu->priv2 = spu_map_prop_old(spu, node, "priv2");
+	if (!spu->priv2)
+		goto out_unmap;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		spu->priv1 = spu_map_prop_old(spu, node, "priv1");
+		if (!spu->priv1)
+			goto out_unmap;
+	}
+
+	ret = 0;
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+	struct of_irq oirq;
+	int ret;
+	int i;
+
+	for (i=0; i < 3; i++) {
+		ret = of_irq_map_one(np, i, &oirq);
+		if (ret) {
+			pr_debug("spu_new: failed to get irq %d\n", i);
+			goto err;
+		}
+		ret = -EINVAL;
+		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.specifier[0],
+			 oirq.controller->full_name);
+		spu->irqs[i] = irq_create_of_mapping(oirq.controller,
+					oirq.specifier, oirq.size);
+		if (spu->irqs[i] == NO_IRQ) {
+			pr_debug("spu_new: failed to map it !\n");
+			goto err;
+		}
+	}
+	return 0;
+
+err:
+	pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier,
+		spu->name);
+	for (; i >= 0; i--) {
+		if (spu->irqs[i] != NO_IRQ)
+			irq_dispose_mapping(spu->irqs[i]);
+	}
+	return ret;
+}
+
+static int spu_map_resource(struct spu *spu, int nr,
+			    void __iomem** virt, unsigned long *phys)
+{
+	struct device_node *np = spu->devnode;
+	struct resource resource = { };
+	unsigned long len;
+	int ret;
+
+	ret = of_address_to_resource(np, nr, &resource);
+	if (ret)
+		return ret;
+	if (phys)
+		*phys = resource.start;
+	len = resource.end - resource.start + 1;
+	*virt = ioremap(resource.start, len);
+	if (!*virt)
+		return -EINVAL;
+	return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+	struct device_node *np = spu->devnode;
+	int ret = -ENODEV;
+
+	spu->name = get_property(np, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+			       &spu->local_store_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 0\n",
+			 np->full_name);
+		goto out;
+	}
+	ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+			       &spu->problem_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 1\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 2\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		ret = spu_map_resource(spu, 3,
+			       (void __iomem**)&spu->priv1, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 3\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	pr_debug("spu_new: %s maps:\n", np->full_name);
+	pr_debug("  local store   : 0x%016lx -> 0x%p\n",
+		 spu->local_store_phys, spu->local_store);
+	pr_debug("  problem state : 0x%016lx -> 0x%p\n",
+		 spu->problem_phys, spu->problem);
+	pr_debug("  priv2         :                       0x%p\n", spu->priv2);
+	pr_debug("  priv1         :                       0x%p\n", spu->priv1);
+
+	return 0;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+	return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+	int ret;
+	struct device_node *node;
+
+	ret = -ENODEV;
+	for (node = of_find_node_by_type(NULL, "spe");
+			node; node = of_find_node_by_type(node, "spe")) {
+		ret = fn(node);
+		if (ret) {
+			printk(KERN_WARNING "%s: Error initializing %s\n",
+				__FUNCTION__, node->name);
+			break;
+		}
+	}
+	return ret;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+	int ret;
+	struct device_node *spe = (struct device_node *)data;
+	static int legacy_map = 0, legacy_irq = 0;
+
+	spu->devnode = of_node_get(spe);
+	spu->spe_id = find_spu_unit_number(spe);
+
+	spu->node = of_node_to_nid(spe);
+	if (spu->node >= MAX_NUMNODES) {
+		printk(KERN_WARNING "SPE %s on node %d ignored,"
+		       " node number too big\n", spe->full_name, spu->node);
+		printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ret = spu_map_device(spu);
+	if (ret) {
+		if (!legacy_map) {
+			legacy_map = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying to map old style\n", __FUNCTION__);
+		}
+		ret = spu_map_device_old(spu);
+		if (ret) {
+			printk(KERN_ERR "Unable to map %s\n",
+				spu->name);
+			goto out;
+		}
+	}
+
+	ret = spu_map_interrupts(spu, spe);
+	if (ret) {
+		if (!legacy_irq) {
+			legacy_irq = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying old style irq\n", __FUNCTION__);
+		}
+		ret = spu_map_interrupts_old(spu, spe);
+		if (ret) {
+			printk(KERN_ERR "%s: could not map interrupts",
+				spu->name);
+			goto out_unmap;
+		}
+	}
+
+	pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
+		spu->local_store, spu->problem, spu->priv1,
+		spu->priv2, spu->number);
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+	spu_unmap(spu);
+	of_node_put(spu->devnode);
+	return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+	.enumerate_spus = of_enumerate_spus,
+	.create_spu = of_create_spu,
+	.destroy_spu = of_destroy_spu,
+};
Index: b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
===================================================================
--- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -18,13 +18,24 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/interrupt.h>
+#include <linux/list.h>
 #include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
 
-#include <asm/io.h>
 #include <asm/spu.h>
 #include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
 
 #include "interrupt.h"
+#include "spu_priv1_mmio.h"
 
 static void int_mask_and(struct spu *spu, int class, u64 mask)
 {
@@ -84,9 +95,9 @@ static void mfc_dsisr_set(struct spu *sp
 	out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
 }
 
-static void mfc_sdr_set(struct spu *spu, u64 sdr)
+static void mfc_sdr_setup(struct spu *spu)
 {
-	out_be64(&spu->priv1->mfc_sdr_RW, sdr);
+	out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
 }
 
 static void mfc_sr1_set(struct spu *spu, u64 sr1)
@@ -146,7 +157,7 @@ const struct spu_priv1_ops spu_priv1_mmi
 	.mfc_dar_get = mfc_dar_get,
 	.mfc_dsisr_get = mfc_dsisr_get,
 	.mfc_dsisr_set = mfc_dsisr_set,
-	.mfc_sdr_set = mfc_sdr_set,
+	.mfc_sdr_setup = mfc_sdr_setup,
 	.mfc_sr1_set = mfc_sr1_set,
 	.mfc_sr1_get = mfc_sr1_get,
 	.mfc_tclass_id_set = mfc_tclass_id_set,
Index: b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
===================================================================
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
@@ -0,0 +1,26 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef SPU_PRIV1_MMIO_H
+#define SPU_PRIV1_MMIO_H
+
+struct device_node *spu_devnode(struct spu *spu);
+
+#endif /* SPU_PRIV1_MMIO_H */
Index: b/arch/powerpc/mm/hash_utils_64.c
===================================================================
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -51,6 +51,7 @@
 #include <asm/cputable.h>
 #include <asm/abs_addr.h>
 #include <asm/sections.h>
+#include <asm/spu.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -685,17 +686,26 @@ int hash_page(unsigned long ea, unsigned
 				       "non-cacheable mapping\n");
 				psize = mmu_vmalloc_psize = MMU_PAGE_4K;
 			}
+#ifdef CONFIG_SPE_BASE
+			spu_flush_all_slbs(mm);
+#endif
 		}
 		if (user_region) {
 			if (psize != get_paca()->context.user_psize) {
 				get_paca()->context = mm->context;
 				slb_flush_and_rebolt();
+#ifdef CONFIG_SPE_BASE
+				spu_flush_all_slbs(mm);
+#endif
 			}
 		} else if (get_paca()->vmalloc_sllp !=
 			   mmu_psize_defs[mmu_vmalloc_psize].sllp) {
 			get_paca()->vmalloc_sllp =
 				mmu_psize_defs[mmu_vmalloc_psize].sllp;
 			slb_flush_and_rebolt();
+#ifdef CONFIG_SPE_BASE
+			spu_flush_all_slbs(mm);
+#endif
 		}
 	}
 	if (psize == MMU_PAGE_64K)
Index: b/arch/powerpc/mm/hugetlbpage.c
===================================================================
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,6 +24,7 @@
 #include <asm/machdep.h>
 #include <asm/cputable.h>
 #include <asm/tlb.h>
+#include <asm/spu.h>
 
 #include <linux/sysctl.h>
 
@@ -507,6 +508,9 @@ int prepare_hugepage_range(unsigned long
 	if ((addr + len) > 0x100000000UL)
 		err = open_high_hpage_areas(current->mm,
 					    HTLB_AREA_MASK(addr, len));
+#ifdef CONFIG_SPE_BASE
+	spu_flush_all_slbs(mm);
+#endif
 	if (err) {
 		printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
 		       " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
Index: b/include/asm-powerpc/elf.h
===================================================================
--- a/include/asm-powerpc/elf.h
+++ b/include/asm-powerpc/elf.h
@@ -412,4 +412,17 @@ do {									\
 /* Keep this the last entry.  */
 #define R_PPC64_NUM		107
 
+#ifdef CONFIG_SPU_BASE
+/* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */
+#define NT_SPU		1
+
+extern int arch_notes_size(void);
+extern void arch_write_notes(struct file *file);
+
+#define ELF_CORE_EXTRA_NOTES_SIZE arch_notes_size()
+#define ELF_CORE_WRITE_EXTRA_NOTES arch_write_notes(file)
+
+#define ARCH_HAVE_EXTRA_ELF_NOTES
+#endif /* CONFIG_PPC_CELL */
+
 #endif /* _ASM_POWERPC_ELF_H */
Index: b/arch/powerpc/platforms/cell/setup.c
===================================================================
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -206,7 +206,8 @@ static void __init cell_init_irq(void)
 static void __init cell_setup_arch(void)
 {
 #ifdef CONFIG_SPU_BASE
-	spu_priv1_ops         = &spu_priv1_mmio_ops;
+	spu_priv1_ops = &spu_priv1_mmio_ops;
+	spu_management_ops = &spu_management_of_ops;
 #endif
 
 	cbe_regs_init();

RHBZ#: 228128 [FEATURE]
------
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=228128

Description:
------------
spufs backport to SDK2.1

This patch applies on top of the 2.6.21 backport (and the 2 additional
pre-requisites) and updates spufs to the stable version in the IBM Cell
SDK2.1.

This is a code base that is slightly newer (fixes some bugs, adds proper
affinity support) and tested & supported as part of our official SDK, thus
it would be preferable to have this in RHEL5U1 rather than the older
2.6.21 version.

Some of those changes aren't upstream as we expect to submit for 2.6.22 a
newer (and more experimental) version of spufs, though that wouldn't be
suitable for RHEL5U1.

RHEL Version Found:
-------------------
requested as a feature for 5.1

Upstream Status:
----------------
This code is currently present in the IBM Cell SDK2.1 other spufs changes
are expected to be present in 2.6.22.

Test Status:
------------
A brew scratch build of kernel-2.6.18-17.el5 with this patch applied
is available at http://brewweb.devel.redhat.com/brew/taskinfo?taskID=773808 .

These changes are part of the upstream Cell SDK, and will be tested by IBM
with the Cell SPUFS test suite.

Proposed Patch:
----------------
Please review and ACK for RHEL5.1

-- 
 arch/powerpc/platforms/cell/spu_manage.c          |  366 ----------------------
 b/arch/powerpc/platforms/cell/Makefile            |    6 
 b/arch/powerpc/platforms/cell/spu_base.c          |  310 +++++++++++-------
 b/arch/powerpc/platforms/cell/spu_priv1_mmio.c    |  365 ++++++++++++++++++++-
 b/arch/powerpc/platforms/cell/spu_syscalls.c      |   17 -
 b/arch/powerpc/platforms/cell/spufs/Makefile      |    3 
 b/arch/powerpc/platforms/cell/spufs/backing_ops.c |    6 
 b/arch/powerpc/platforms/cell/spufs/context.c     |   28 +
 b/arch/powerpc/platforms/cell/spufs/fault.c       |  210 ++++++++++++
 b/arch/powerpc/platforms/cell/spufs/file.c        |  210 ++++++++++--
 b/arch/powerpc/platforms/cell/spufs/gang.c        |    6 
 b/arch/powerpc/platforms/cell/spufs/hw_ops.c      |    9 
 b/arch/powerpc/platforms/cell/spufs/inode.c       |  135 +++++++-
 b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c |   62 +++
 b/arch/powerpc/platforms/cell/spufs/run.c         |   44 +-
 b/arch/powerpc/platforms/cell/spufs/sched.c       |  271 ++++++++++++++--
 b/arch/powerpc/platforms/cell/spufs/spufs.h       |   40 ++
 b/arch/powerpc/platforms/cell/spufs/switch.c      |   38 --
 b/arch/powerpc/platforms/cell/spufs/syscalls.c    |   32 +
 b/include/asm-powerpc/mmu.h                       |    2 
 b/include/asm-powerpc/spu.h                       |   47 +-
 b/include/asm-powerpc/spu_csa.h                   |    5 
 b/include/linux/syscalls.h                        |    3 
 23 files changed, 1538 insertions(+), 677 deletions(-)

Index: b/arch/powerpc/platforms/cell/spufs/Makefile
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,8 +1,9 @@
-obj-y += switch.o
+obj-y += switch.o lscsa_alloc.o
 
 obj-$(CONFIG_SPU_FS) += spufs.o
 spufs-y += inode.o file.o context.o syscalls.o coredump.o
 spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
+spufs-y += fault.o
 
 # Rules to build switch.o with the help of SPU tool chain
 SPU_CROSS	:= spu-
Index: b/arch/powerpc/platforms/cell/spufs/backing_ops.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -350,6 +350,11 @@ static int spu_backing_send_mfc_command(
 	return ret;
 }
 
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+	/* nothing to do here */
+}
+
 struct spu_context_ops spu_backing_ops = {
 	.mbox_read = spu_backing_mbox_read,
 	.mbox_stat_read = spu_backing_mbox_stat_read,
@@ -376,4 +381,5 @@ struct spu_context_ops spu_backing_ops =
 	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
 	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
 	.send_mfc_command = spu_backing_send_mfc_command,
+	.restart_dma = spu_backing_restart_dma,
 };
Index: b/arch/powerpc/platforms/cell/spufs/context.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -22,6 +22,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
@@ -36,11 +37,10 @@ struct spu_context *alloc_spu_context(st
 	/* Binding to physical processor deferred
 	 * until spu_activate().
 	 */
-	spu_init_csa(&ctx->csa);
-	if (!ctx->csa.lscsa) {
+	if (spu_init_csa(&ctx->csa))
 		goto out_free;
-	}
 	spin_lock_init(&ctx->mmio_lock);
+	spin_lock_init(&ctx->mapping_lock);
 	kref_init(&ctx->kref);
 	mutex_init(&ctx->state_mutex);
 	init_MUTEX(&ctx->run_sema);
@@ -51,6 +51,8 @@ struct spu_context *alloc_spu_context(st
 	ctx->state = SPU_STATE_SAVED;
 	ctx->ops = &spu_backing_ops;
 	ctx->owner = get_task_mm(current);
+	INIT_LIST_HEAD(&ctx->rq);
+	INIT_LIST_HEAD(&ctx->aff_list);
 	if (gang)
 		spu_gang_add_ctx(gang, ctx);
 	ctx->rt_priority = current->rt_priority;
@@ -75,6 +77,9 @@ void destroy_spu_context(struct kref *kr
 	spu_fini_csa(&ctx->csa);
 	if (ctx->gang)
 		spu_gang_remove_ctx(ctx->gang, ctx);
+	if (ctx->prof_priv_kref)
+		kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
+	BUG_ON(!list_empty(&ctx->rq));
 	kfree(ctx);
 }
 
@@ -199,3 +204,20 @@ void spu_acquire_saved(struct spu_contex
 	if (ctx->state != SPU_STATE_SAVED)
 		spu_deactivate(ctx);
 }
+
+void spu_set_profile_private_kref(struct spu_context * ctx,
+				  struct kref * prof_info_kref,
+				  void (* prof_info_release) (struct kref * kref))
+{
+	ctx->prof_priv_kref = prof_info_kref;
+	ctx->prof_priv_release = prof_info_release;
+}
+EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
+
+void * spu_get_profile_private_kref(struct spu_context * ctx)
+{
+	return ctx->prof_priv_kref;
+}
+EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
+
+
Index: b/arch/powerpc/platforms/cell/spufs/file.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -44,8 +44,26 @@ spufs_mem_open(struct inode *inode, stru
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->local_store = inode->i_mapping;
+	if (!i->i_openers++)
+		ctx->local_store = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
+	smp_wmb();
+	return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->local_store = NULL;
+	spin_unlock(&ctx->mapping_lock);
 	smp_wmb();
 	return 0;
 }
@@ -102,14 +120,16 @@ spufs_mem_write(struct file *file, const
 static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma,
 					  unsigned long address)
 {
-	struct spu_context *ctx = vma->vm_file->private_data;
-	unsigned long pfn, offset = address - vma->vm_start;
-
-	offset += vma->vm_pgoff << PAGE_SHIFT;
+	struct spu_context *ctx	= vma->vm_file->private_data;
+	unsigned long pfn, offset;
 
+	offset = (address - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT);
 	if (offset >= LS_SIZE)
 		return NOPFN_SIGBUS;
 
+	pr_debug("spufs_mem_mmap_nopfn address=0x%lx -> 0x%lx, offset=0x%lx\n",
+		 addr0, address, offset);
+
 	spu_acquire(ctx);
 
 	if (ctx->state == SPU_STATE_SAVED) {
@@ -133,8 +153,7 @@ static struct vm_operations_struct spufs
 	.nopfn = spufs_mem_mmap_nopfn,
 };
 
-static int
-spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
@@ -147,12 +166,13 @@ spufs_mem_mmap(struct file *file, struct
 	return 0;
 }
 
-static const struct file_operations spufs_mem_fops = {
-	.open	 = spufs_mem_open,
-	.read    = spufs_mem_read,
-	.write   = spufs_mem_write,
-	.llseek  = generic_file_llseek,
-	.mmap    = spufs_mem_mmap,
+static struct file_operations spufs_mem_fops = {
+	.open			= spufs_mem_open,
+	.release		= spufs_mem_release,
+	.read			= spufs_mem_read,
+	.write			= spufs_mem_write,
+	.llseek			= generic_file_llseek,
+	.mmap			= spufs_mem_mmap,
 };
 
 static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
@@ -238,16 +258,35 @@ static int spufs_cntl_open(struct inode 
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->cntl = inode->i_mapping;
+	if (!i->i_openers++)
+		ctx->cntl = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	smp_wmb();
 	return simple_attr_open(inode, file, spufs_cntl_get,
 					spufs_cntl_set, "0x%08lx");
 }
 
-static const struct file_operations spufs_cntl_fops = {
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	simple_attr_close(inode, file);
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->cntl = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	smp_wmb();
+	return 0;
+}
+
+static struct file_operations spufs_cntl_fops = {
 	.open = spufs_cntl_open,
-	.release = simple_attr_close,
+	.release = spufs_cntl_release,
 	.read = simple_attr_read,
 	.write = simple_attr_write,
 	.mmap = spufs_cntl_mmap,
@@ -305,7 +344,7 @@ spufs_regs_write(struct file *file, cons
 	return ret;
 }
 
-static const struct file_operations spufs_regs_fops = {
+static struct file_operations spufs_regs_fops = {
 	.open	 = spufs_regs_open,
 	.read    = spufs_regs_read,
 	.write   = spufs_regs_write,
@@ -356,7 +395,7 @@ spufs_fpcr_write(struct file *file, cons
 	return ret;
 }
 
-static const struct file_operations spufs_fpcr_fops = {
+static struct file_operations spufs_fpcr_fops = {
 	.open = spufs_regs_open,
 	.read = spufs_fpcr_read,
 	.write = spufs_fpcr_write,
@@ -422,7 +461,7 @@ static ssize_t spufs_mbox_read(struct fi
 	return count;
 }
 
-static const struct file_operations spufs_mbox_fops = {
+static struct file_operations spufs_mbox_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_read,
 };
@@ -448,7 +487,7 @@ static ssize_t spufs_mbox_stat_read(stru
 	return 4;
 }
 
-static const struct file_operations spufs_mbox_stat_fops = {
+static struct file_operations spufs_mbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_stat_read,
 };
@@ -555,7 +594,7 @@ static unsigned int spufs_ibox_poll(stru
 	return mask;
 }
 
-static const struct file_operations spufs_ibox_fops = {
+static struct file_operations spufs_ibox_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_ibox_read,
 	.poll	= spufs_ibox_poll,
@@ -581,7 +620,7 @@ static ssize_t spufs_ibox_stat_read(stru
 	return 4;
 }
 
-static const struct file_operations spufs_ibox_stat_fops = {
+static struct file_operations spufs_ibox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_ibox_stat_read,
 };
@@ -688,7 +727,7 @@ static unsigned int spufs_wbox_poll(stru
 	return mask;
 }
 
-static const struct file_operations spufs_wbox_fops = {
+static struct file_operations spufs_wbox_fops = {
 	.open	= spufs_pipe_open,
 	.write	= spufs_wbox_write,
 	.poll	= spufs_wbox_poll,
@@ -714,7 +753,7 @@ static ssize_t spufs_wbox_stat_read(stru
 	return 4;
 }
 
-static const struct file_operations spufs_wbox_stat_fops = {
+static struct file_operations spufs_wbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_wbox_stat_read,
 };
@@ -723,12 +762,30 @@ static int spufs_signal1_open(struct ino
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->signal1 = inode->i_mapping;
+	if (!i->i_openers++)
+		ctx->signal1 = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	smp_wmb();
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal1 = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	smp_wmb();
+	return 0;
+}
+
 static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
@@ -819,8 +876,9 @@ static int spufs_signal1_mmap(struct fil
 	return 0;
 }
 
-static const struct file_operations spufs_signal1_fops = {
+static struct file_operations spufs_signal1_fops = {
 	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
 	.read = spufs_signal1_read,
 	.write = spufs_signal1_write,
 	.mmap = spufs_signal1_mmap,
@@ -830,12 +888,30 @@ static int spufs_signal2_open(struct ino
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->signal2 = inode->i_mapping;
+	if (!i->i_openers++)
+		ctx->signal2 = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	smp_wmb();
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal2 = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	smp_wmb();
+	return 0;
+}
+
 static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
@@ -930,8 +1006,9 @@ static int spufs_signal2_mmap(struct fil
 #define spufs_signal2_mmap NULL
 #endif /* !SPUFS_MMAP_4K */
 
-static const struct file_operations spufs_signal2_fops = {
+static struct file_operations spufs_signal2_fops = {
 	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
 	.read = spufs_signal2_read,
 	.write = spufs_signal2_write,
 	.mmap = spufs_signal2_mmap,
@@ -1031,13 +1108,32 @@ static int spufs_mss_open(struct inode *
 	struct spu_context *ctx = i->i_ctx;
 
 	file->private_data = i->i_ctx;
-	ctx->mss = inode->i_mapping;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!i->i_openers++)
+		ctx->mss = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	smp_wmb();
 	return nonseekable_open(inode, file);
 }
 
-static const struct file_operations spufs_mss_fops = {
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mss = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	smp_wmb();
+	return 0;
+}
+
+static struct file_operations spufs_mss_fops = {
 	.open	 = spufs_mss_open,
+	.release = spufs_mss_release,
 	.mmap	 = spufs_mss_mmap,
 };
 
@@ -1072,14 +1168,32 @@ static int spufs_psmap_open(struct inode
 	struct spufs_inode_info *i = SPUFS_I(inode);
 	struct spu_context *ctx = i->i_ctx;
 
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = i->i_ctx;
-	ctx->psmap = inode->i_mapping;
+	if (!i->i_openers++)
+		ctx->psmap = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	smp_wmb();
 	return nonseekable_open(inode, file);
 }
 
-static const struct file_operations spufs_psmap_fops = {
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->psmap = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	smp_wmb();
+	return 0;
+}
+
+static struct file_operations spufs_psmap_fops = {
 	.open	 = spufs_psmap_open,
+	.release = spufs_psmap_release,
 	.mmap	 = spufs_psmap_mmap,
 };
 
@@ -1126,12 +1240,29 @@ static int spufs_mfc_open(struct inode *
 	if (atomic_read(&inode->i_count) != 1)
 		return -EBUSY;
 
+	spin_lock(&ctx->mapping_lock);
 	file->private_data = ctx;
-	ctx->mfc = inode->i_mapping;
+	if (!i->i_openers++)
+		ctx->mfc = inode->i_mapping;
+	spin_unlock(&ctx->mapping_lock);
 	smp_wmb();
 	return nonseekable_open(inode, file);
 }
 
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	spin_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mfc = NULL;
+	spin_unlock(&ctx->mapping_lock);
+	smp_wmb();
+	return 0;
+}
+
 /* interrupt-level mfc callback function. */
 void spufs_mfc_callback(struct spu *spu)
 {
@@ -1397,8 +1528,9 @@ static int spufs_mfc_fasync(int fd, stru
 	return fasync_helper(fd, file, on, &ctx->mfc_fasync);
 }
 
-static const struct file_operations spufs_mfc_fops = {
+static struct file_operations spufs_mfc_fops = {
 	.open	 = spufs_mfc_open,
+	.release = spufs_mfc_release,
 	.read	 = spufs_mfc_read,
 	.write	 = spufs_mfc_write,
 	.poll	 = spufs_mfc_poll,
@@ -1654,7 +1786,7 @@ static ssize_t spufs_mbox_info_read(stru
 	return ret;
 }
 
-static const struct file_operations spufs_mbox_info_fops = {
+static struct file_operations spufs_mbox_info_fops = {
 	.open = spufs_info_open,
 	.read = spufs_mbox_info_read,
 	.llseek  = generic_file_llseek,
@@ -1692,7 +1824,7 @@ static ssize_t spufs_ibox_info_read(stru
 	return ret;
 }
 
-static const struct file_operations spufs_ibox_info_fops = {
+static struct file_operations spufs_ibox_info_fops = {
 	.open = spufs_info_open,
 	.read = spufs_ibox_info_read,
 	.llseek  = generic_file_llseek,
@@ -1733,7 +1865,7 @@ static ssize_t spufs_wbox_info_read(stru
 	return ret;
 }
 
-static const struct file_operations spufs_wbox_info_fops = {
+static struct file_operations spufs_wbox_info_fops = {
 	.open = spufs_info_open,
 	.read = spufs_wbox_info_read,
 	.llseek  = generic_file_llseek,
@@ -1783,7 +1915,7 @@ static ssize_t spufs_dma_info_read(struc
 	return ret;
 }
 
-static const struct file_operations spufs_dma_info_fops = {
+static struct file_operations spufs_dma_info_fops = {
 	.open = spufs_info_open,
 	.read = spufs_dma_info_read,
 };
@@ -1834,7 +1966,7 @@ static ssize_t spufs_proxydma_info_read(
 	return ret;
 }
 
-static const struct file_operations spufs_proxydma_info_fops = {
+static struct file_operations spufs_proxydma_info_fops = {
 	.open = spufs_info_open,
 	.read = spufs_proxydma_info_read,
 };
Index: b/arch/powerpc/platforms/cell/spufs/gang.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/gang.c
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -35,7 +35,9 @@ struct spu_gang *alloc_spu_gang(void)
 
 	kref_init(&gang->kref);
 	mutex_init(&gang->mutex);
+	mutex_init(&gang->aff_mutex);
 	INIT_LIST_HEAD(&gang->list);
+	INIT_LIST_HEAD(&gang->aff_list_head);
 
 out:
 	return gang;
@@ -73,6 +75,10 @@ void spu_gang_remove_ctx(struct spu_gang
 {
 	mutex_lock(&gang->mutex);
 	WARN_ON(ctx->gang != gang);
+	if (!list_empty(&ctx->aff_list)) {
+		list_del_init(&ctx->aff_list);
+		gang->aff_flags &= ~AFF_OFFSETS_SET;
+	}
 	list_del_init(&ctx->gang_list);
 	gang->contexts--;
 	mutex_unlock(&gang->mutex);
Index: b/arch/powerpc/platforms/cell/spufs/hw_ops.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -296,6 +296,14 @@ static int spu_hw_send_mfc_command(struc
 	}
 }
 
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+	struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
 struct spu_context_ops spu_hw_ops = {
 	.mbox_read = spu_hw_mbox_read,
 	.mbox_stat_read = spu_hw_mbox_stat_read,
@@ -320,4 +328,5 @@ struct spu_context_ops spu_hw_ops = {
 	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
 	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
 	.send_mfc_command = spu_hw_send_mfc_command,
+	.restart_dma = spu_hw_restart_dma,
 };
Index: b/arch/powerpc/platforms/cell/spufs/inode.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -54,6 +54,7 @@ spufs_alloc_inode(struct super_block *sb
 
 	ei->i_gang = NULL;
 	ei->i_ctx = NULL;
+	ei->i_openers = 0;
 
 	return &ei->vfs_inode;
 }
@@ -224,7 +225,7 @@ static int spufs_dir_close(struct inode 
 	.lookup = simple_lookup,
 };
 
-const struct file_operations spufs_context_fops = {
+struct file_operations spufs_context_fops = {
 	.open		= dcache_dir_open,
 	.release	= spufs_dir_close,
 	.llseek		= dcache_dir_lseek,
@@ -307,11 +308,107 @@ out:
 	return ret;
 }
 
-static int spufs_create_context(struct inode *inode,
-			struct dentry *dentry,
-			struct vfsmount *mnt, int flags, int mode)
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+						struct file *filp)
+{
+	struct spu_context *tmp, *neighbor;
+	int count, node;
+	int aff_supp;
+
+	aff_supp = !list_empty(&(list_entry(be_spu_info[0].spus.next,
+					struct spu, be_list))->aff_list);
+
+	if (!aff_supp)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_GANG)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_AFFINITY_MEM &&
+	    gang->aff_ref_ctx &&
+	    gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+		return ERR_PTR(-EEXIST);
+
+	if (gang->aff_flags & AFF_MERGED)
+		return ERR_PTR(-EBUSY);
+
+	neighbor = NULL;
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (!filp || filp->f_op != &spufs_context_fops)
+			return ERR_PTR(-EINVAL);
+
+		neighbor = get_spu_context(
+				SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
+
+		if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+		    !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+		    !list_entry(neighbor->aff_list.next, struct spu_context,
+		    aff_list)->aff_head)
+			return ERR_PTR(-EEXIST);
+
+		if (gang != neighbor->gang)
+			return ERR_PTR(-EINVAL);
+
+		count = 1;
+		list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+			count++;
+		if (list_empty(&neighbor->aff_list))
+			count++;
+
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			if ((be_spu_info[node].n_spus - atomic_read(
+				&be_spu_info[node].reserved_spus)) >= count)
+				break;
+		}
+
+		if (node == MAX_NUMNODES)
+			return ERR_PTR(-EEXIST);
+	}
+
+	return neighbor;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+					struct spu_context *neighbor)
+{
+	if (flags & SPU_CREATE_AFFINITY_MEM)
+		ctx->gang->aff_ref_ctx = ctx;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (list_empty(&neighbor->aff_list)) {
+			list_add_tail(&neighbor->aff_list,
+				&ctx->gang->aff_list_head);
+			neighbor->aff_head = 1;
+		}
+
+		if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+		    || list_entry(neighbor->aff_list.next, struct spu_context,
+							aff_list)->aff_head) {
+			list_add(&ctx->aff_list, &neighbor->aff_list);
+		} else  {
+			list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+			if (neighbor->aff_head) {
+				neighbor->aff_head = 0;
+				ctx->aff_head = 1;
+			}
+		}
+
+		if (!ctx->gang->aff_ref_ctx)
+			ctx->gang->aff_ref_ctx = ctx;
+	}
+}
+
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+			struct vfsmount *mnt, int flags, int mode,
+			struct file *aff_filp)
 {
 	int ret;
+	int affinity;
+	struct spu_gang *gang;
+	struct spu_context *neighbor;
 
 	ret = -EPERM;
 	if ((flags & SPU_CREATE_NOSCHED) &&
@@ -327,9 +424,29 @@ static int spufs_create_context(struct i
 	if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
 		goto out_unlock;
 
+	gang = NULL;
+	neighbor = NULL;
+	affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+	if (affinity) {
+		gang = SPUFS_I(inode)->i_gang;
+		ret = -EINVAL;
+		if (!gang)
+			goto out_unlock;
+		mutex_lock(&gang->aff_mutex);
+		neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+		if (IS_ERR(neighbor)) {
+			ret = PTR_ERR(neighbor);
+			goto out_aff_unlock;
+		}
+	}
+
 	ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
 	if (ret)
-		goto out_unlock;
+		goto out_aff_unlock;
+
+	if (affinity)
+		spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
+								neighbor);
 
 	/*
 	 * get references for dget and mntget, will be released
@@ -343,6 +460,9 @@ static int spufs_create_context(struct i
 		goto out;
 	}
 
+out_aff_unlock:
+	if (affinity)
+		mutex_unlock(&gang->aff_mutex);
 out_unlock:
 	mutex_unlock(&inode->i_mutex);
 out:
@@ -471,7 +591,8 @@ out:
 
 static struct file_system_type spufs_type;
 
-long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
+long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
+							struct file *filp)
 {
 	struct dentry *dentry;
 	int ret;
@@ -508,7 +629,7 @@ long spufs_create(struct nameidata *nd, 
 					dentry, nd->mnt, mode);
 	else
 		return spufs_create_context(nd->dentry->d_inode,
-					dentry, nd->mnt, flags, mode);
+					dentry, nd->mnt, flags, mode, filp);
 
 out_dput:
 	dput(dentry);
Index: b/arch/powerpc/platforms/cell/spufs/run.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -18,36 +18,17 @@ void spufs_stop_callback(struct spu *spu
 	wake_up_all(&ctx->stop_wq);
 }
 
-void spufs_dma_callback(struct spu *spu, int type)
-{
-	struct spu_context *ctx = spu->ctx;
-
-	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
-		ctx->event_return |= type;
-		wake_up_all(&ctx->stop_wq);
-	} else {
-		switch (type) {
-		case SPE_EVENT_DMA_ALIGNMENT:
-		case SPE_EVENT_SPE_DATA_STORAGE:
-		case SPE_EVENT_INVALID_DMA:
-			force_sig(SIGBUS, /* info, */ current);
-			break;
-		case SPE_EVENT_SPE_ERROR:
-			force_sig(SIGILL, /* info */ current);
-			break;
-		}
-	}
-}
-
 static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
 {
 	struct spu *spu;
 	u64 pte_fault;
 
 	*stat = ctx->ops->status_read(ctx);
-	if (ctx->state != SPU_STATE_RUNNABLE)
-		return 1;
+
 	spu = ctx->spu;
+	if (ctx->state != SPU_STATE_RUNNABLE ||
+	    test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+		return 1;
 	pte_fault = spu->dsisr &
 	    (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
 	return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0;
@@ -292,11 +273,8 @@ int spu_process_callback(struct spu_cont
 static inline int spu_process_events(struct spu_context *ctx)
 {
 	struct spu *spu = ctx->spu;
-	u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED;
 	int ret = 0;
 
-	if (spu->dsisr & pte_fault)
-		ret = spu_irq_class_1_bottom(spu);
 	if (spu->class_0_pending)
 		ret = spu_irq_class_0_bottom(spu);
 	if (!ret && signal_pending(current))
@@ -308,6 +286,7 @@ long spufs_run_spu(struct file *file, st
 		   u32 *npc, u32 *event)
 {
 	int ret;
+	struct spu * spu;
 	u32 status;
 
 	if (down_interruptible(&ctx->run_sema))
@@ -321,8 +300,17 @@ long spufs_run_spu(struct file *file, st
 
 	do {
 		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
+		spu = ctx->spu;
 		if (unlikely(ret))
 			break;
+		if (unlikely(test_bit(SPU_SCHED_NOTIFY_ACTIVE,
+				      &ctx->sched_flags))) {
+			clear_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags);
+			if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
+				spu_switch_notify(spu, ctx);
+				continue;
+			}
+		}
 		if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
 		    (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
 			ret = spu_process_callback(ctx);
@@ -330,6 +318,10 @@ long spufs_run_spu(struct file *file, st
 				break;
 			status &= ~SPU_STATUS_STOPPED_BY_STOP;
 		}
+		ret = spufs_handle_class1(ctx);
+		if (ret)
+			break;
+
 		if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
 			ret = spu_reacquire_runnable(ctx, npc, &status);
 			if (ret) {
Index: b/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -71,21 +71,42 @@ static inline int node_allowed(int node)
 
 void spu_start_tick(struct spu_context *ctx)
 {
-	if (ctx->policy == SCHED_RR)
+	if (ctx->policy == SCHED_RR) {
+		/*
+		 * Make sure the exiting bit is cleared.
+		 */
+		clear_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
+		mb();
 		queue_delayed_work(spu_sched_wq, &ctx->sched_work, SPU_TIMESLICE);
+	}
 }
 
 void spu_stop_tick(struct spu_context *ctx)
 {
-	if (ctx->policy == SCHED_RR)
+	if (ctx->policy == SCHED_RR) {
+		/*
+		 * While the work can be rearming normally setting this flag
+		 * makes sure it does not rearm itself anymore.
+		 */
+		set_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
+		mb();
 		cancel_delayed_work(&ctx->sched_work);
+	}
 }
 
 void spu_sched_tick(void *data)
 {
 	struct spu_context *ctx = data;
 	struct spu *spu;
-	int rearm = 1;
+	int preempted = 0;
+
+	/*
+	 * If this context is beeing stopped avoid rescheduling from the
+	 * scheduler tick because we would deadlock on the state_mutex.
+	 * The caller will yield the spu later on anyway.
+	 */
+	if (test_bit(SPU_SCHED_EXITING, &ctx->sched_flags))
+		return;
 
 	mutex_lock(&ctx->state_mutex);
 	spu = ctx->spu;
@@ -93,12 +114,19 @@ void spu_sched_tick(void *data)
 		int best = sched_find_first_bit(spu_prio->bitmap);
 		if (best <= ctx->prio) {
 			spu_deactivate(ctx);
-			rearm = 0;
+			preempted = 1;
 		}
 	}
 	mutex_unlock(&ctx->state_mutex);
 
-	if (rearm)
+	if (preempted) {
+		/*
+		 * We need to break out of the wait loop in spu_run manually
+		 * to ensure this context gets put on the runqueue again
+		 * ASAP.
+		 */
+		wake_up(&ctx->stop_wq);
+	} else
 		spu_start_tick(ctx);
 }
 
@@ -128,21 +156,47 @@ static void spu_remove_from_active_list(
 
 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
 
-static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
 {
 	blocking_notifier_call_chain(&spu_switch_notifier,
 			    ctx ? ctx->object_id : 0, spu);
 }
 
+static void notify_spus_active(void)
+{
+	int node;
+	/* Wake up the active spu_contexts. When the awakened processes
+	 * see their "notify_active" flag is set, they will call
+	 * spu_switch_notify();
+	 */
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		struct spu *spu;
+		mutex_lock(&spu_prio->active_mutex[node]);
+		list_for_each_entry(spu, &spu_prio->active_list[node], list) {
+			struct spu_context *ctx = spu->ctx;
+			set_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags);
+			mb();
+			wake_up_all(&ctx->stop_wq);
+		}
+		mutex_unlock(&spu_prio->active_mutex[node]);
+	}
+}
+
 int spu_switch_event_register(struct notifier_block * n)
 {
-	return blocking_notifier_chain_register(&spu_switch_notifier, n);
+	int ret;
+	ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
+	if (!ret)
+		notify_spus_active();
+	return ret;
 }
+EXPORT_SYMBOL_GPL(spu_switch_event_register);
 
 int spu_switch_event_unregister(struct notifier_block * n)
 {
 	return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
 }
+EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
 
 /**
  * spu_bind_context - bind spu context to physical spu
@@ -153,11 +207,16 @@ static void spu_bind_context(struct spu 
 {
 	pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
 		 spu->number, spu->node);
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_inc(&be_spu_info[spu->node].reserved_spus);
+	if (!list_empty(&ctx->aff_list))
+		atomic_inc(&ctx->gang->aff_sched_count);
 	spu->ctx = ctx;
 	spu->flags = 0;
 	ctx->spu = spu;
 	ctx->ops = &spu_hw_ops;
 	spu->pid = current->pid;
+	spu->tgid = current->tgid;
 	spu_associate_mm(spu, ctx->owner);
 	spu->ibox_callback = spufs_ibox_callback;
 	spu->wbox_callback = spufs_wbox_callback;
@@ -184,6 +243,11 @@ static void spu_unbind_context(struct sp
 	pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
 		 spu->pid, spu->number, spu->node);
 
+	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_dec(&be_spu_info[spu->node].reserved_spus);
+	if (!list_empty(&ctx->aff_list))
+		if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
+			ctx->gang->aff_ref_spu = NULL;
 	spu_remove_from_active_list(spu);
 	spu_switch_notify(spu, NULL);
 	spu_unmap_mappings(ctx);
@@ -197,6 +261,7 @@ static void spu_unbind_context(struct sp
 	spu->dma_callback = NULL;
 	spu_associate_mm(spu, NULL);
 	spu->pid = 0;
+	spu->tgid = 0;
 	ctx->ops = &spu_backing_ops;
 	ctx->spu = NULL;
 	spu->flags = 0;
@@ -212,9 +277,18 @@ static void spu_add_to_rq(struct spu_con
 	spin_lock(&spu_prio->runq_lock);
 	list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
 	set_bit(ctx->prio, spu_prio->bitmap);
+	mb();
 	spin_unlock(&spu_prio->runq_lock);
 }
 
+static void __spu_del_from_rq(struct spu_context *ctx, int prio)
+{
+	if (!list_empty(&ctx->rq))
+		list_del_init(&ctx->rq);
+	if (list_empty(&spu_prio->runq[prio]))
+		clear_bit(ctx->prio, spu_prio->bitmap);
+}
+
 /**
  * spu_del_from_rq - remove a context from the runqueue
  * @ctx:       context to remove
@@ -222,33 +296,10 @@ static void spu_add_to_rq(struct spu_con
 static void spu_del_from_rq(struct spu_context *ctx)
 {
 	spin_lock(&spu_prio->runq_lock);
-	list_del_init(&ctx->rq);
-	if (list_empty(&spu_prio->runq[ctx->prio]))
-		clear_bit(ctx->prio, spu_prio->bitmap);
+	__spu_del_from_rq(ctx, ctx->prio);
 	spin_unlock(&spu_prio->runq_lock);
 }
 
-/**
- * spu_grab_context - remove one context from the runqueue
- * @prio:      priority of the context to be removed
- *
- * This function removes one context from the runqueue for priority @prio.
- * If there is more than one context with the given priority the first
- * task on the runqueue will be taken.
- *
- * Returns the spu_context it just removed.
- *
- * Must be called with spu_prio->runq_lock held.
- */
-static struct spu_context *spu_grab_context(int prio)
-{
-	struct list_head *rq = &spu_prio->runq[prio];
-
-	if (list_empty(rq))
-		return NULL;
-	return list_entry(rq->next, struct spu_context, rq);
-}
-
 static void spu_prio_wait(struct spu_context *ctx)
 {
 	DEFINE_WAIT(wait);
@@ -267,7 +318,7 @@ static void spu_prio_wait(struct spu_con
  * spu_reschedule - try to find a runnable context for a spu
  * @spu:       spu available
  *
- * This function is called whenever a spu becomes idle.  It looks for the
+ * This function is called whenever a spu becomes idle.	 It looks for the
  * most suitable runnable spu context and schedules it for execution.
  */
 static void spu_reschedule(struct spu *spu)
@@ -279,9 +330,14 @@ static void spu_reschedule(struct spu *s
 	spin_lock(&spu_prio->runq_lock);
 	best = sched_find_first_bit(spu_prio->bitmap);
 	if (best < MAX_PRIO) {
-		struct spu_context *ctx = spu_grab_context(best);
-		if (ctx)
-			wake_up(&ctx->stop_wq);
+		struct list_head *rq = &spu_prio->runq[best];
+		struct spu_context *ctx;
+
+		BUG_ON(list_empty(rq));
+
+		ctx = list_entry(rq->next, struct spu_context, rq);
+		__spu_del_from_rq(ctx, best);
+		wake_up(&ctx->stop_wq);
 	}
 	spin_unlock(&spu_prio->runq_lock);
 }
@@ -292,6 +348,10 @@ static struct spu *spu_get_idle(struct s
 	int node = cpu_to_node(raw_smp_processor_id());
 	int n;
 
+	spu = affinity_check(ctx);
+	if (spu)
+		return spu_alloc_spu(spu);
+
 	for (n = 0; n < MAX_NUMNODES; n++, node++) {
 		node = (node < MAX_NUMNODES) ? node : 0;
 		if (!node_allowed(node))
@@ -364,6 +424,12 @@ static struct spu *find_victim(struct sp
 			}
 			spu_unbind_context(spu, victim);
 			mutex_unlock(&victim->state_mutex);
+			/*
+			 * We need to break out of the wait loop in spu_run
+			 * manually to ensure this context gets put on the
+			 * runqueue again ASAP.
+			 */
+			wake_up(&victim->stop_wq);
 			return spu;
 		}
 	}
@@ -427,7 +493,7 @@ void spu_deactivate(struct spu_context *
 }
 
 /**
- * spu_yield -  yield a physical spu if others are waiting
+ * spu_yield -	yield a physical spu if others are waiting
  * @ctx:	spu context to yield
  *
  * Check if there is a higher priority context waiting and if yes
@@ -500,3 +566,136 @@ void __exit spu_sched_exit(void)
 	kfree(spu_prio);
 	destroy_workqueue(spu_sched_wq);
 }
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+
+	list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+		if (list_empty(&ctx->aff_list))
+			list_add(&ctx->aff_list, &gang->aff_list_head);
+	}
+	gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+	int offset;
+
+	offset = -1;
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset--;
+	}
+
+	offset = 0;
+	list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset++;
+	}
+
+	gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static inline int sched_spu(struct spu *spu)
+{
+	return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static struct spu *
+aff_ref_location(int mem_aff, int group_size, int prio, int lowest_offset)
+{
+	struct spu *spu;
+	int node, n;
+
+	/* TODO: A better algorithm could be used to find a good spu to be
+	 *	 used as reference location for the ctxs chain.
+	 */
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(node))
+			continue;
+		list_for_each_entry(spu, &be_spu_info[node].spus, be_list) {
+			if ((!mem_aff || spu->has_mem_affinity) &&
+							sched_spu(spu))
+				return spu;
+		}
+	}
+	return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+	int mem_aff, gs, lowest_offset;
+	struct spu_context *ctx;
+	struct spu *tmp;
+
+	mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+	lowest_offset = 0;
+	gs = 0;
+	list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+		gs++;
+
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		lowest_offset = ctx->aff_offset;
+	}
+
+	gang->aff_ref_spu = aff_ref_location(mem_aff, gs, ctx->prio,
+							lowest_offset);
+}
+
+static struct spu* ctx_location(struct spu *ref, int offset)
+{
+	struct spu *spu;
+
+	spu = NULL;
+	if (offset >= 0) {
+		list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset--;
+		}
+	} else {
+		list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset++;
+		}
+	}
+	return spu;
+}
+
+/**
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+struct spu* affinity_check(struct spu_context *ctx)
+{
+	struct spu_gang *gang;
+
+	if (list_empty(&ctx->aff_list))
+		return NULL;
+	gang = ctx->gang;
+	mutex_lock(&gang->aff_mutex);
+	if (!gang->aff_ref_spu) {
+		if (!(gang->aff_flags & AFF_MERGED))
+			aff_merge_remaining_ctxs(gang);
+		if (!(gang->aff_flags & AFF_OFFSETS_SET))
+			aff_set_offsets(gang);
+		aff_set_ref_point_location(gang);
+	}
+	mutex_unlock(&gang->aff_mutex);
+	if (!gang->aff_ref_spu)
+		return NULL;
+	return ctx_location(gang->aff_ref_spu, ctx->aff_offset);
+}
Index: b/arch/powerpc/platforms/cell/spufs/spufs.h
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -41,7 +41,8 @@ struct spu_gang;
 
 /* ctx->sched_flags */
 enum {
-	SPU_SCHED_WAKE = 0, /* currently unused */
+	SPU_SCHED_EXITING = 0,
+	SPU_SCHED_NOTIFY_ACTIVE,
 };
 
 struct spu_context {
@@ -53,8 +54,9 @@ struct spu_context {
 	struct address_space *cntl; 	   /* 'control' area mappings. */
 	struct address_space *signal1; 	   /* 'signal1' area mappings. */
 	struct address_space *signal2; 	   /* 'signal2' area mappings. */
-	struct address_space *mss; 	   /* 'mss' area mappings. */
-	struct address_space *psmap; 	   /* 'psmap' area mappings. */
+	struct address_space *mss;	   /* 'mss' area mappings. */
+	struct address_space *psmap;	   /* 'psmap' area mappings. */
+	spinlock_t mapping_lock;
 	u64 object_id;		   /* user space pointer for oprofile */
 
 	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
@@ -79,14 +81,20 @@ struct spu_context {
 
 	struct list_head gang_list;
 	struct spu_gang *gang;
+	struct kref * prof_priv_kref;
+	void (* prof_priv_release) (struct kref *kref);
 
 	/* scheduler fields */
- 	struct list_head rq;
+	struct list_head rq;
 	struct work_struct sched_work;
 	unsigned long sched_flags;
 	unsigned long rt_priority;
 	int policy;
 	int prio;
+
+	struct list_head aff_list;
+	int aff_head;
+	int aff_offset;
 };
 
 struct spu_gang {
@@ -94,8 +102,19 @@ struct spu_gang {
 	struct mutex mutex;
 	struct kref kref;
 	int contexts;
+
+	struct spu_context *aff_ref_ctx;
+	struct list_head aff_list_head;
+	struct mutex aff_mutex;
+	int aff_flags;
+	struct spu *aff_ref_spu;
+	atomic_t aff_sched_count;
 };
 
+/* Flag bits for spu_gang aff_flags */
+#define AFF_OFFSETS_SET		1
+#define AFF_MERGED		2
+
 struct mfc_dma_command {
 	int32_t pad;	/* reserved */
 	uint32_t lsa;	/* local storage address */
@@ -140,6 +159,7 @@ struct spu_context_ops {
 			       struct spu_dma_info * info);
 	void (*proxydma_info_read) (struct spu_context * ctx,
 				    struct spu_proxydma_info * info);
+	void (*restart_dma)(struct spu_context *ctx);
 };
 
 extern struct spu_context_ops spu_hw_ops;
@@ -149,6 +169,7 @@ struct spufs_inode_info {
 	struct spu_context *i_ctx;
 	struct spu_gang *i_gang;
 	struct inode vfs_inode;
+	int i_openers;
 };
 #define SPUFS_I(inode) \
 	container_of(inode, struct spufs_inode_info, vfs_inode)
@@ -160,8 +181,8 @@ extern struct tree_descr spufs_dir_nosch
 long spufs_run_spu(struct file *file,
 		   struct spu_context *ctx, u32 *npc, u32 *status);
 long spufs_create(struct nameidata *nd,
-			 unsigned int flags, mode_t mode);
-extern const struct file_operations spufs_context_fops;
+			 unsigned int flags, mode_t mode, struct file *filp);
+extern struct file_operations spufs_context_fops;
 
 /* gang management */
 struct spu_gang *alloc_spu_gang(void);
@@ -170,6 +191,12 @@ int put_spu_gang(struct spu_gang *gang);
 void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
 void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
 
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+
+/* affinity */
+struct spu *affinity_check(struct spu_context *ctx);
+
 /* context management */
 static inline void spu_acquire(struct spu_context *ctx)
 {
@@ -195,6 +222,7 @@ int spu_acquire_exclusive(struct spu_con
 int spu_activate(struct spu_context *ctx, unsigned long flags);
 void spu_deactivate(struct spu_context *ctx);
 void spu_yield(struct spu_context *ctx);
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
 void spu_start_tick(struct spu_context *ctx);
 void spu_stop_tick(struct spu_context *ctx);
 void spu_sched_tick(void *data);
Index: b/arch/powerpc/platforms/cell/spufs/switch.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -1931,7 +1931,7 @@ static void harvest(struct spu_state *pr
 	reset_spu_privcntl(prev, spu);	        /* Step 16. */
 	reset_spu_lslr(prev, spu);              /* Step 17. */
 	setup_mfc_sr1(prev, spu);	        /* Step 18. */
-	spu_invalidate_slbs(spu);        	/* Step 19. */
+	spu_invalidate_slbs(spu);		/* Step 19. */
 	reset_ch_part1(prev, spu);	        /* Step 20. */
 	reset_ch_part2(prev, spu);	        /* Step 21. */
 	enable_interrupts(prev, spu);	        /* Step 22. */
@@ -2084,6 +2084,10 @@ int spu_save(struct spu_state *prev, str
 	int rc;
 
 	acquire_spu_lock(spu);	        /* Step 1.     */
+	prev->dar = spu->dar;
+	prev->dsisr = spu->dsisr;
+	spu->dar = 0;
+	spu->dsisr = 0;
 	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
 	release_spu_lock(spu);
 	if (rc != 0 && rc != 2 && rc != 6) {
@@ -2109,9 +2113,9 @@ int spu_restore(struct spu_state *new, s
 
 	acquire_spu_lock(spu);
 	harvest(NULL, spu);
-	spu->dar = 0;
-	spu->dsisr = 0;
 	spu->slb_replace = 0;
+	new->dar = 0;
+	new->dsisr = 0;
 	spu->class_0_pending = 0;
 	rc = __do_spu_restore(new, spu);
 	release_spu_lock(spu);
@@ -2185,40 +2189,30 @@ static void init_priv2(struct spu_state 
  * as it is by far the largest of the context save regions,
  * and may need to be pinned or otherwise specially aligned.
  */
-void spu_init_csa(struct spu_state *csa)
+int spu_init_csa(struct spu_state *csa)
 {
-	struct spu_lscsa *lscsa;
-	unsigned char *p;
+	int rc;
 
 	if (!csa)
-		return;
+		return -EINVAL;
 	memset(csa, 0, sizeof(struct spu_state));
 
-	lscsa = vmalloc(sizeof(struct spu_lscsa));
-	if (!lscsa)
-		return;
+	rc = spu_alloc_lscsa(csa);
+	if (rc)
+		return rc;
 
-	memset(lscsa, 0, sizeof(struct spu_lscsa));
-	csa->lscsa = lscsa;
 	spin_lock_init(&csa->register_lock);
 
-	/* Set LS pages reserved to allow for user-space mapping. */
-	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
-		SetPageReserved(vmalloc_to_page(p));
-
 	init_prob(csa);
 	init_priv1(csa);
 	init_priv2(csa);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(spu_init_csa);
 
 void spu_fini_csa(struct spu_state *csa)
 {
-	/* Clear reserved bit before vfree. */
-	unsigned char *p;
-	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
-		ClearPageReserved(vmalloc_to_page(p));
-
-	vfree(csa->lscsa);
+	spu_free_lscsa(csa);
 }
 EXPORT_SYMBOL_GPL(spu_fini_csa);
Index: b/arch/powerpc/platforms/cell/spufs/syscalls.c
===================================================================
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -76,8 +76,8 @@ asmlinkage long sys_spu_run(int fd, __u3
 }
 #endif
 
-asmlinkage long sys_spu_create(const char __user *pathname,
-					unsigned int flags, mode_t mode)
+asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags,
+				mode_t mode, struct file *neighbor)
 {
 	char *tmp;
 	int ret;
@@ -90,7 +90,7 @@ asmlinkage long sys_spu_create(const cha
 		ret = path_lookup(tmp, LOOKUP_PARENT|
 				LOOKUP_OPEN|LOOKUP_CREATE, &nd);
 		if (!ret) {
-			ret = spufs_create(&nd, flags, mode);
+			ret = spufs_create(&nd, flags, mode, neighbor);
 			path_release(&nd);
 		}
 		putname(tmp);
@@ -99,8 +99,32 @@ asmlinkage long sys_spu_create(const cha
 	return ret;
 }
 
+#ifndef MODULE
+asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags,
+				mode_t mode, int neighbor_fd)
+{
+	int fput_needed;
+	struct file *neighbor;
+	long ret;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		ret = -EBADF;
+		neighbor = fget_light(neighbor_fd, &fput_needed);
+		if (neighbor) {
+			ret = do_spu_create(pathname, flags, mode, neighbor);
+			fput_light(neighbor, fput_needed);
+		}
+	}
+	else {
+		ret = do_spu_create(pathname, flags, mode, NULL);
+	}
+
+	return ret;
+}
+#endif
+
 struct spufs_calls spufs_calls = {
-	.create_thread = sys_spu_create,
+	.create_thread = do_spu_create,
 	.spu_run = do_spu_run,
 	.owner = THIS_MODULE,
 };
Index: b/include/asm-powerpc/spu.h
===================================================================
--- a/include/asm-powerpc/spu.h
+++ b/include/asm-powerpc/spu.h
@@ -104,7 +104,6 @@
 
 struct spu_context;
 struct spu_runqueue;
-struct device_node;
 
 struct spu {
 	const char *name;
@@ -114,6 +113,7 @@ struct spu {
 	struct spu_problem __iomem *problem;
 	struct spu_priv2 __iomem *priv2;
 	struct list_head list;
+	struct list_head be_list;
 	struct list_head sched_list;
 	struct list_head full_list;
 	int number;
@@ -129,6 +129,7 @@ struct spu {
 	struct spu_runqueue *rq;
 	unsigned long long timestamp;
 	pid_t pid;
+	pid_t tgid;
 	int class_0_pending;
 	spinlock_t register_lock;
 
@@ -142,29 +143,44 @@ struct spu {
 	char irq_c1[8];
 	char irq_c2[8];
 
-	u64 spe_id;
-
 	void* pdata; /* platform private data */
+	struct sys_device sysdev;
 
-	/* of based platforms only */
-	struct device_node *devnode;
-
-	/* native only */
-	struct spu_priv1 __iomem *priv1;
-
-	/* beat only */
-	u64 shadow_int_mask_RW[3];
+	int has_mem_affinity;
+	struct list_head aff_list;
+};
 
-	struct sys_device sysdev;
+struct be_spu_info {
+	struct list_head spus;
+	struct list_head free_spus;
+	int n_spus;
+	atomic_t reserved_spus;
 };
 
+extern struct be_spu_info be_spu_info[];
+
 struct spu *spu_alloc(void);
 struct spu *spu_alloc_node(int node);
+struct spu *spu_alloc_spu(struct spu *spu);
 void spu_free(struct spu *spu);
 int spu_irq_class_0_bottom(struct spu *spu);
 int spu_irq_class_1_bottom(struct spu *spu);
 void spu_irq_setaffinity(struct spu *spu, int cpu);
 
+/* This interface allows a profiler (e.g., OProfile) to store a ref
+ * to spu context information that it creates.	This caching technique
+ * avoids the need to recreate this information after a save/restore operation.
+ *
+ * Assumes the caller has already incremented the ref count to
+ * profile_info; then spu_context_destroy must call kref_put
+ * on prof_info_kref.
+ */
+void spu_set_profile_private_kref(struct spu_context * ctx,
+				  struct kref * prof_info_kref,
+				  void (* prof_info_release) (struct kref * kref));
+
+void * spu_get_profile_private_kref(struct spu_context * ctx);
+
 extern void spu_invalidate_slbs(struct spu *spu);
 extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
 
@@ -183,7 +199,8 @@ extern long spu_sys_callback(struct spu_
 struct file;
 extern struct spufs_calls {
 	asmlinkage long (*create_thread)(const char __user *name,
-					unsigned int flags, mode_t mode);
+					unsigned int flags, mode_t mode,
+					struct file *neighbor);
 	asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc,
 						__u32 __user *ustatus);
 	struct module *owner;
@@ -210,8 +227,10 @@ struct spu_coredump_calls {
 #define SPU_CREATE_GANG			0x0002
 #define SPU_CREATE_NOSCHED		0x0004
 #define SPU_CREATE_ISOLATE		0x0008
+#define SPU_CREATE_AFFINITY_SPU		0x0010
+#define SPU_CREATE_AFFINITY_MEM		0x0020
 
-#define SPU_CREATE_FLAG_ALL		0x000f /* mask of all valid flags */
+#define SPU_CREATE_FLAG_ALL		0x003f /* mask of all valid flags */
 
 
 #ifdef CONFIG_SPU_FS_MODULE
Index: b/include/asm-powerpc/spu_csa.h
===================================================================
--- a/include/asm-powerpc/spu_csa.h
+++ b/include/asm-powerpc/spu_csa.h
@@ -242,16 +242,19 @@ struct spu_state {
 	u64 spu_chnldata_RW[32];
 	u32 spu_mailbox_data[4];
 	u32 pu_mailbox_data[1];
+	u64 dar, dsisr;
 	unsigned long suspend_time;
 	spinlock_t register_lock;
 };
 
-extern void spu_init_csa(struct spu_state *csa);
+extern int spu_init_csa(struct spu_state *csa);
 extern void spu_fini_csa(struct spu_state *csa);
 extern int spu_save(struct spu_state *prev, struct spu *spu);
 extern int spu_restore(struct spu_state *new, struct spu *spu);
 extern int spu_switch(struct spu_state *prev, struct spu_state *new,
 		      struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);
 
 #endif /* !__SPU__ */
 #endif /* __KERNEL__ */
Index: b/arch/powerpc/platforms/cell/Makefile
===================================================================
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -10,11 +10,7 @@ endif
 spufs-modular-$(CONFIG_SPU_FS)		+= spu_syscalls.o
 spu-priv1-$(CONFIG_PPC_CELL_NATIVE)	+= spu_priv1_mmio.o
 
-spu-manage-$(CONFIG_PPC_CELL_NATIVE)	+= spu_manage.o
-
 obj-$(CONFIG_SPU_BASE)			+= spu_callbacks.o spu_base.o \
 					   spu_coredump.o \
 					   $(spufs-modular-m) \
-					   $(spu-priv1-y) \
-					   $(spu-manage-y) \
-					   spufs/
+					   $(spu-priv1-y) spufs/
Index: b/arch/powerpc/platforms/cell/spu_base.c
===================================================================
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -33,14 +33,15 @@
 #include <linux/mutex.h>
 #include <asm/spu.h>
 #include <asm/spu_priv1.h>
+#include <asm/prom.h>
+#include "spu_priv1_mmio.h"
 
 const struct spu_management_ops *spu_management_ops;
 const struct spu_priv1_ops *spu_priv1_ops;
 
-static struct list_head spu_list[MAX_NUMNODES];
+static spinlock_t spu_list_lock = SPIN_LOCK_UNLOCKED;
 static LIST_HEAD(spu_full_list);
 static DEFINE_MUTEX(spu_mutex);
-static spinlock_t spu_list_lock = SPIN_LOCK_UNLOCKED;
 
 EXPORT_SYMBOL_GPL(spu_priv1_ops);
 
@@ -141,12 +142,11 @@ static int __spu_trap_data_seg(struct sp
 
 	switch(REGION_ID(ea)) {
 	case USER_REGION_ID:
-#ifdef CONFIG_HUGETLB_PAGE
-		if (in_hugepage_area(mm->context, ea))
-			psize = mmu_huge_psize;
-		else
+#ifdef CONFIG_PPC_MM_SLICES
+		psize = get_slice_psize(mm, ea);
+#else
+		psize = mm->context.user_psize;
 #endif
-			psize = mm->context.user_psize;
 		vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
 				SLB_VSID_USER;
 		break;
@@ -227,11 +227,9 @@ int
 spu_irq_class_0_bottom(struct spu *spu)
 {
 	unsigned long stat, mask;
-	unsigned long flags;
 
 	spu->class_0_pending = 0;
 
-	spin_lock_irqsave(&spu->register_lock, flags);
 	mask = spu_int_mask_get(spu, 0);
 	stat = spu_int_stat_get(spu, 0);
 
@@ -247,7 +245,6 @@ spu_irq_class_0_bottom(struct spu *spu)
 		__spu_trap_error(spu);
 
 	spu_int_stat_clear(spu, 0, stat);
-	spin_unlock_irqrestore(&spu->register_lock, flags);
 
 	return (stat & 0x7) ? -EIO : 0;
 }
@@ -289,7 +286,6 @@ spu_irq_class_1(int irq, void *data, str
 
 	return stat ? IRQ_HANDLED : IRQ_NONE;
 }
-EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom);
 
 static irqreturn_t
 spu_irq_class_2(int irq, void *data, struct pt_regs *regs)
@@ -421,19 +417,40 @@ static void spu_init_channels(struct spu
 	}
 }
 
+struct spu *spu_alloc_spu(struct spu *req_spu)
+{
+	struct spu *spu, *ret = NULL;
+
+	mutex_lock(&spu_mutex);
+	list_for_each_entry(spu, &be_spu_info[req_spu->node].free_spus, list) {
+		if (spu == req_spu) {
+			list_del_init(&spu->list);
+			pr_debug("Got SPU %d %d\n", spu->number, spu->node);
+			spu_init_channels(spu);
+			ret = spu;
+			break;
+		}
+	}
+	mutex_unlock(&spu_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spu_alloc_spu);
+
 struct spu *spu_alloc_node(int node)
 {
 	struct spu *spu = NULL;
 
 	mutex_lock(&spu_mutex);
-	if (!list_empty(&spu_list[node])) {
-		spu = list_entry(spu_list[node].next, struct spu, list);
+	if (!list_empty(&be_spu_info[node].free_spus)) {
+		spu = list_entry(be_spu_info[node].free_spus.next, struct spu,
+									list);
 		list_del_init(&spu->list);
 		pr_debug("Got SPU %d %d\n", spu->number, spu->node);
-		spu_init_channels(spu);
 	}
 	mutex_unlock(&spu_mutex);
 
+	if (spu)
+		spu_init_channels(spu);
 	return spu;
 }
 EXPORT_SYMBOL_GPL(spu_alloc_node);
@@ -455,113 +472,11 @@ struct spu *spu_alloc(void)
 void spu_free(struct spu *spu)
 {
 	mutex_lock(&spu_mutex);
-	list_add_tail(&spu->list, &spu_list[spu->node]);
+	list_add_tail(&spu->list, &be_spu_info[spu->node].free_spus);
 	mutex_unlock(&spu_mutex);
 }
 EXPORT_SYMBOL_GPL(spu_free);
 
-static int spu_handle_mm_fault(struct spu *spu)
-{
-	struct mm_struct *mm = spu->mm;
-	struct vm_area_struct *vma;
-	u64 ea, dsisr, is_write;
-	int ret;
-
-	ea = spu->dar;
-	dsisr = spu->dsisr;
-#if 0
-	if (!IS_VALID_EA(ea)) {
-		return -EFAULT;
-	}
-#endif /* XXX */
-	if (mm == NULL) {
-		return -EFAULT;
-	}
-	if (mm->pgd == NULL) {
-		return -EFAULT;
-	}
-
-	down_read(&mm->mmap_sem);
-	vma = find_vma(mm, ea);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start <= ea)
-		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-#if 0
-	if (expand_stack(vma, ea))
-		goto bad_area;
-#endif /* XXX */
-good_area:
-	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
-	if (is_write) {
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-	} else {
-		if (dsisr & MFC_DSISR_ACCESS_DENIED)
-			goto bad_area;
-		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-			goto bad_area;
-	}
-	ret = 0;
-	switch (handle_mm_fault(mm, vma, ea, is_write)) {
-	case VM_FAULT_MINOR:
-		current->min_flt++;
-		break;
-	case VM_FAULT_MAJOR:
-		current->maj_flt++;
-		break;
-	case VM_FAULT_SIGBUS:
-		ret = -EFAULT;
-		goto bad_area;
-	case VM_FAULT_OOM:
-		ret = -ENOMEM;
-		goto bad_area;
-	default:
-		BUG();
-	}
-	up_read(&mm->mmap_sem);
-	return ret;
-
-bad_area:
-	up_read(&mm->mmap_sem);
-	return -EFAULT;
-}
-
-int spu_irq_class_1_bottom(struct spu *spu)
-{
-	u64 ea, dsisr, access, error = 0UL;
-	int ret = 0;
-
-	ea = spu->dar;
-	dsisr = spu->dsisr;
-	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) {
-		u64 flags;
-
-		access = (_PAGE_PRESENT | _PAGE_USER);
-		access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
-		local_irq_save(flags);
-		if (hash_page(ea, access, 0x300) != 0)
-			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-		local_irq_restore(flags);
-	}
-	if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) {
-		if ((ret = spu_handle_mm_fault(spu)) != 0)
-			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-		else
-			error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR;
-	}
-	spu->dar = 0UL;
-	spu->dsisr = 0UL;
-	if (!error) {
-		spu_restart_dma(spu);
-	} else {
-		spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE);
-	}
-	return ret;
-}
-
 struct sysdev_class spu_sysdev_class = {
 	set_kset_name("spu")
 };
@@ -675,11 +590,15 @@ static int __init create_spu(void *data)
 
 	mutex_lock(&spu_mutex);
 	spin_lock_irqsave(&spu_list_lock, flags);
-	list_add(&spu->list, &spu_list[spu->node]);
+	list_add(&spu->list, &be_spu_info[spu->node].free_spus);
+	list_add(&spu->be_list, &be_spu_info[spu->node].spus);
+	be_spu_info[spu->node].n_spus++;
 	list_add(&spu->full_list, &spu_full_list);
 	spin_unlock_irqrestore(&spu_list_lock, flags);
 	mutex_unlock(&spu_mutex);
 
+	INIT_LIST_HEAD(&spu->aff_list);
+
 	goto out;
 
 out_free_irqs:
@@ -695,6 +614,7 @@ out:
 static void destroy_spu(struct spu *spu)
 {
 	list_del_init(&spu->list);
+	list_del_init(&spu->be_list);
 	list_del_init(&spu->full_list);
 
 	spu_destroy_sysdev(spu);
@@ -710,7 +630,8 @@ static void cleanup_spu_base(void)
 
 	mutex_lock(&spu_mutex);
 	for (node = 0; node < MAX_NUMNODES; node++) {
-		list_for_each_entry_safe(spu, tmp, &spu_list[node], list)
+		list_for_each_entry_safe(spu, tmp, &be_spu_info[node].free_spus,
+									list)
 			destroy_spu(spu);
 	}
 	mutex_unlock(&spu_mutex);
@@ -718,9 +639,145 @@ static void cleanup_spu_base(void)
 }
 module_exit(cleanup_spu_base);
 
+struct be_spu_info be_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(be_spu_info);
+
+/* Hardcoded affinity idxs for QS20 */
+#define SPES_PER_BE 8
+static int QS20_reg_idxs[SPES_PER_BE] =   { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *spu_lookup_reg(int node, u32 reg)
+{
+	struct spu *spu;
+
+	list_for_each_entry(spu, &be_spu_info[node].spus, be_list) {
+		if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg)
+			return spu;
+	}
+	return NULL;
+}
+
+static void init_aff_QS20_harcoded(void)
+{
+	int node, i;
+	struct spu *last_spu, *spu;
+	u32 reg;
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		last_spu = NULL;
+		for (i = 0; i < SPES_PER_BE; i++) {
+			reg = QS20_reg_idxs[i];
+			spu = spu_lookup_reg(node, reg);
+			if (!spu)
+				continue;
+			spu->has_mem_affinity = QS20_reg_memory[reg];
+			if (last_spu)
+				list_add_tail(&spu->aff_list,
+						&last_spu->aff_list);
+			last_spu = spu;
+		}
+	}
+}
+
+static int of_has_vicinity(void)
+{
+	struct spu* spu;
+
+	spu = list_entry(be_spu_info[0].spus.next, struct spu, be_list);
+	return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL;
+}
+
+static struct spu *aff_devnode_spu(int be, struct device_node *dn)
+{
+	struct spu *spu;
+
+	list_for_each_entry(spu, &be_spu_info[be].spus, be_list)
+		if (spu_devnode(spu) == dn)
+			return spu;
+	return NULL;
+}
+
+static struct spu *
+aff_node_next_to(int be, struct device_node *target, struct device_node *avoid)
+{
+	struct spu *spu;
+	const phandle *vic_handles;
+	int lenp, i;
+
+	list_for_each_entry(spu, &be_spu_info[be].spus, be_list) {
+		if (spu_devnode(spu) == avoid)
+			continue;
+		vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp);
+		for (i=0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == target->linux_phandle)
+				return spu;
+		}
+	}
+	return NULL;
+}
+
+static void init_aff_fw_vicinity_node(int be)
+{
+	struct spu *spu, *last_spu;
+	struct device_node *vic_dn, *last_spu_dn;
+	phandle avoid_ph;
+	const phandle *vic_handles;
+	const char *name;
+	int lenp, i, added, mem_aff;
+
+	last_spu = list_entry(be_spu_info[be].spus.next, struct spu, be_list);
+	avoid_ph = 0;
+	for (added = 1; added < be_spu_info[be].n_spus; added++) {
+		last_spu_dn = spu_devnode(last_spu);
+		vic_handles = get_property(last_spu_dn, "vicinity", &lenp);
+
+		for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == avoid_ph)
+				continue;
+
+			vic_dn = of_find_node_by_phandle(vic_handles[i]);
+			if (!vic_dn)
+				continue;
+
+			name = get_property(vic_dn, "name", NULL);
+			if (strcmp(name, "spe") == 0) {
+				spu = aff_devnode_spu(be, vic_dn);
+				avoid_ph = last_spu_dn->linux_phandle;
+			}
+			else {
+				mem_aff = strcmp(name, "mic-tm") == 0;
+				spu = aff_node_next_to(be, vic_dn, last_spu_dn);
+				if (!spu)
+					continue;
+				if (mem_aff) {
+					last_spu->has_mem_affinity = 1;
+					spu->has_mem_affinity = 1;
+				}
+				avoid_ph = vic_dn->linux_phandle;
+			}
+			list_add_tail(&spu->aff_list, &last_spu->aff_list);
+			last_spu = spu;
+			break;
+		}
+	}
+}
+
+static void init_aff_fw_vicinity(void)
+{
+	int be;
+
+	/* sets has_mem_affinity for each spu, as long as the
+	 * spu->aff_list list, linking each spu to its neighbors
+	 */
+	for (be = 0; be < MAX_NUMNODES; be++)
+		init_aff_fw_vicinity_node(be);
+}
+
 static int __init init_spu_base(void)
 {
 	int i, ret;
+	long root;
 
 	if (!spu_management_ops)
 		return 0;
@@ -730,11 +787,12 @@ static int __init init_spu_base(void)
 	if (ret)
 		return ret;
 
-	for (i = 0; i < MAX_NUMNODES; i++)
-		INIT_LIST_HEAD(&spu_list[i]);
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		INIT_LIST_HEAD(&be_spu_info[i].spus);
+		INIT_LIST_HEAD(&be_spu_info[i].free_spus);
+	}
 
 	ret = spu_enumerate_spus(create_spu);
-
 	if (ret) {
 		printk(KERN_WARNING "%s: Error initializing spus\n",
 			__FUNCTION__);
@@ -742,6 +800,14 @@ static int __init init_spu_base(void)
 		return ret;
 	}
 
+	if (of_has_vicinity()) {
+		init_aff_fw_vicinity();
+	} else {
+		root = of_get_flat_dt_root();
+		if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+			init_aff_QS20_harcoded();
+	}
+
 	return ret;
 }
 module_init(init_spu_base);
Index: b/arch/powerpc/platforms/cell/spu_manage.c
===================================================================
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * spu management operations for of based platforms
- *
- * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
- * Copyright 2006 Sony Corp.
- * (C) Copyright 2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/interrupt.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/wait.h>
-#include <linux/mm.h>
-#include <linux/io.h>
-#include <linux/mutex.h>
-#include <linux/device.h>
-
-#include <asm/spu.h>
-#include <asm/spu_priv1.h>
-#include <asm/firmware.h>
-#include <asm/prom.h>
-
-#include "interrupt.h"
-
-struct device_node *spu_devnode(struct spu *spu)
-{
-	return spu->devnode;
-}
-
-EXPORT_SYMBOL_GPL(spu_devnode);
-
-static u64 __init find_spu_unit_number(struct device_node *spe)
-{
-	const unsigned int *prop;
-	int proplen;
-	prop = get_property(spe, "unit-id", &proplen);
-	if (proplen == 4)
-		return (u64)*prop;
-
-	prop = get_property(spe, "reg", &proplen);
-	if (proplen == 4)
-		return (u64)*prop;
-
-	return 0;
-}
-
-static void spu_unmap(struct spu *spu)
-{
-	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		iounmap(spu->priv1);
-	iounmap(spu->priv2);
-	iounmap(spu->problem);
-	iounmap((__force u8 __iomem *)spu->local_store);
-}
-
-static int __init spu_map_interrupts_old(struct spu *spu,
-	struct device_node *np)
-{
-	unsigned int isrc;
-	const u32 *tmp;
-	int nid;
-
-	/* Get the interrupt source unit from the device-tree */
-	tmp = get_property(np, "isrc", NULL);
-	if (!tmp)
-		return -ENODEV;
-	isrc = tmp[0];
-
-	tmp = get_property(np->parent->parent, "node-id", NULL);
-	if (!tmp) {
-		printk(KERN_WARNING "%s: can't find node-id\n", __FUNCTION__);
-		nid = spu->node;
-	} else
-		nid = tmp[0];
-
-	/* Add the node number */
-	isrc |= nid << IIC_IRQ_NODE_SHIFT;
-
-	/* Now map interrupts of all 3 classes */
-	spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
-	spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
-	spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
-
-	/* Right now, we only fail if class 2 failed */
-	return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
-}
-
-static void __iomem * __init spu_map_prop_old(struct spu *spu,
-					      struct device_node *n,
-					      const char *name)
-{
-	const struct address_prop {
-		unsigned long address;
-		unsigned int len;
-	} __attribute__((packed)) *prop;
-	int proplen;
-
-	prop = get_property(n, name, &proplen);
-	if (prop == NULL || proplen != sizeof (struct address_prop))
-		return NULL;
-
-	return ioremap(prop->address, prop->len);
-}
-
-static int __init spu_map_device_old(struct spu *spu)
-{
-	struct device_node *node = spu->devnode;
-	const char *prop;
-	int ret;
-
-	ret = -ENODEV;
-	spu->name = get_property(node, "name", NULL);
-	if (!spu->name)
-		goto out;
-
-	prop = get_property(node, "local-store", NULL);
-	if (!prop)
-		goto out;
-	spu->local_store_phys = *(unsigned long *)prop;
-
-	/* we use local store as ram, not io memory */
-	spu->local_store = (void __force *)
-		spu_map_prop_old(spu, node, "local-store");
-	if (!spu->local_store)
-		goto out;
-
-	prop = get_property(node, "problem", NULL);
-	if (!prop)
-		goto out_unmap;
-	spu->problem_phys = *(unsigned long *)prop;
-
-	spu->problem = spu_map_prop_old(spu, node, "problem");
-	if (!spu->problem)
-		goto out_unmap;
-
-	spu->priv2 = spu_map_prop_old(spu, node, "priv2");
-	if (!spu->priv2)
-		goto out_unmap;
-
-	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
-		spu->priv1 = spu_map_prop_old(spu, node, "priv1");
-		if (!spu->priv1)
-			goto out_unmap;
-	}
-
-	ret = 0;
-	goto out;
-
-out_unmap:
-	spu_unmap(spu);
-out:
-	return ret;
-}
-
-static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
-{
-	struct of_irq oirq;
-	int ret;
-	int i;
-
-	for (i=0; i < 3; i++) {
-		ret = of_irq_map_one(np, i, &oirq);
-		if (ret) {
-			pr_debug("spu_new: failed to get irq %d\n", i);
-			goto err;
-		}
-		ret = -EINVAL;
-		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.specifier[0],
-			 oirq.controller->full_name);
-		spu->irqs[i] = irq_create_of_mapping(oirq.controller,
-					oirq.specifier, oirq.size);
-		if (spu->irqs[i] == NO_IRQ) {
-			pr_debug("spu_new: failed to map it !\n");
-			goto err;
-		}
-	}
-	return 0;
-
-err:
-	pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier,
-		spu->name);
-	for (; i >= 0; i--) {
-		if (spu->irqs[i] != NO_IRQ)
-			irq_dispose_mapping(spu->irqs[i]);
-	}
-	return ret;
-}
-
-static int spu_map_resource(struct spu *spu, int nr,
-			    void __iomem** virt, unsigned long *phys)
-{
-	struct device_node *np = spu->devnode;
-	struct resource resource = { };
-	unsigned long len;
-	int ret;
-
-	ret = of_address_to_resource(np, nr, &resource);
-	if (ret)
-		return ret;
-	if (phys)
-		*phys = resource.start;
-	len = resource.end - resource.start + 1;
-	*virt = ioremap(resource.start, len);
-	if (!*virt)
-		return -EINVAL;
-	return 0;
-}
-
-static int __init spu_map_device(struct spu *spu)
-{
-	struct device_node *np = spu->devnode;
-	int ret = -ENODEV;
-
-	spu->name = get_property(np, "name", NULL);
-	if (!spu->name)
-		goto out;
-
-	ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
-			       &spu->local_store_phys);
-	if (ret) {
-		pr_debug("spu_new: failed to map %s resource 0\n",
-			 np->full_name);
-		goto out;
-	}
-	ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
-			       &spu->problem_phys);
-	if (ret) {
-		pr_debug("spu_new: failed to map %s resource 1\n",
-			 np->full_name);
-		goto out_unmap;
-	}
-	ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
-	if (ret) {
-		pr_debug("spu_new: failed to map %s resource 2\n",
-			 np->full_name);
-		goto out_unmap;
-	}
-	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		ret = spu_map_resource(spu, 3,
-			       (void __iomem**)&spu->priv1, NULL);
-	if (ret) {
-		pr_debug("spu_new: failed to map %s resource 3\n",
-			 np->full_name);
-		goto out_unmap;
-	}
-	pr_debug("spu_new: %s maps:\n", np->full_name);
-	pr_debug("  local store   : 0x%016lx -> 0x%p\n",
-		 spu->local_store_phys, spu->local_store);
-	pr_debug("  problem state : 0x%016lx -> 0x%p\n",
-		 spu->problem_phys, spu->problem);
-	pr_debug("  priv2         :                       0x%p\n", spu->priv2);
-	pr_debug("  priv1         :                       0x%p\n", spu->priv1);
-
-	return 0;
-
-out_unmap:
-	spu_unmap(spu);
-out:
-	pr_debug("failed to map spe %s: %d\n", spu->name, ret);
-	return ret;
-}
-
-static int __init of_enumerate_spus(int (*fn)(void *data))
-{
-	int ret;
-	struct device_node *node;
-
-	ret = -ENODEV;
-	for (node = of_find_node_by_type(NULL, "spe");
-			node; node = of_find_node_by_type(node, "spe")) {
-		ret = fn(node);
-		if (ret) {
-			printk(KERN_WARNING "%s: Error initializing %s\n",
-				__FUNCTION__, node->name);
-			break;
-		}
-	}
-	return ret;
-}
-
-static int __init of_create_spu(struct spu *spu, void *data)
-{
-	int ret;
-	struct device_node *spe = (struct device_node *)data;
-	static int legacy_map = 0, legacy_irq = 0;
-
-	spu->devnode = of_node_get(spe);
-	spu->spe_id = find_spu_unit_number(spe);
-
-	spu->node = of_node_to_nid(spe);
-	if (spu->node >= MAX_NUMNODES) {
-		printk(KERN_WARNING "SPE %s on node %d ignored,"
-		       " node number too big\n", spe->full_name, spu->node);
-		printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
-		ret = -ENODEV;
-		goto out;
-	}
-
-	ret = spu_map_device(spu);
-	if (ret) {
-		if (!legacy_map) {
-			legacy_map = 1;
-			printk(KERN_WARNING "%s: Legacy device tree found, "
-				"trying to map old style\n", __FUNCTION__);
-		}
-		ret = spu_map_device_old(spu);
-		if (ret) {
-			printk(KERN_ERR "Unable to map %s\n",
-				spu->name);
-			goto out;
-		}
-	}
-
-	ret = spu_map_interrupts(spu, spe);
-	if (ret) {
-		if (!legacy_irq) {
-			legacy_irq = 1;
-			printk(KERN_WARNING "%s: Legacy device tree found, "
-				"trying old style irq\n", __FUNCTION__);
-		}
-		ret = spu_map_interrupts_old(spu, spe);
-		if (ret) {
-			printk(KERN_ERR "%s: could not map interrupts",
-				spu->name);
-			goto out_unmap;
-		}
-	}
-
-	pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
-		spu->local_store, spu->problem, spu->priv1,
-		spu->priv2, spu->number);
-	goto out;
-
-out_unmap:
-	spu_unmap(spu);
-out:
-	return ret;
-}
-
-static int of_destroy_spu(struct spu *spu)
-{
-	spu_unmap(spu);
-	of_node_put(spu->devnode);
-	return 0;
-}
-
-const struct spu_management_ops spu_management_of_ops = {
-	.enumerate_spus = of_enumerate_spus,
-	.create_spu = of_create_spu,
-	.destroy_spu = of_destroy_spu,
-};
Index: b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
===================================================================
--- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -37,112 +37,433 @@
 #include "interrupt.h"
 #include "spu_priv1_mmio.h"
 
+static DEFINE_MUTEX(add_spumem_mutex);
+
+struct spu_pdata {
+	struct device_node *devnode;
+	struct spu_priv1 __iomem *priv1;
+};
+
+static struct spu_pdata *spu_get_pdata(struct spu *spu)
+{
+	BUG_ON(!spu->pdata);
+	return spu->pdata;
+}
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+	return spu_get_pdata(spu)->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static void __iomem * __init map_spe_prop(struct spu *spu,
+		struct device_node *n, const char *name)
+{
+	const struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *prop;
+
+	const void *p;
+	int proplen;
+
+	p = get_property(n, name, &proplen);
+	if (proplen != sizeof (struct address_prop))
+		return NULL;
+
+	prop = p;
+	return ioremap(prop->address, prop->len);
+}
+
+static void spu_unmap(struct spu *spu)
+{
+	iounmap(spu->priv2);
+	iounmap(spu_get_pdata(spu)->priv1);
+	iounmap(spu->problem);
+	iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+	struct device_node *np)
+{
+	unsigned int isrc;
+	const u32 *tmp;
+	int nid;
+
+	/* Get the interrupt source unit from the device-tree */
+	tmp = get_property(np, "isrc", NULL);
+	if (!tmp)
+		return -ENODEV;
+	isrc = tmp[0];
+
+	tmp = get_property(np->parent->parent, "node-id", NULL);
+	if (!tmp) {
+		printk(KERN_WARNING "%s: can't find node-id\n", __FUNCTION__);
+		nid = spu->node;
+	} else
+		nid = tmp[0];
+
+	/* Add the node number */
+	isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+	/* Now map interrupts of all 3 classes */
+	spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+	spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+	spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+	/* Right now, we only fail if class 2 failed */
+	return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+}
+
+static int __init spu_map_device_old(struct spu *spu, struct device_node *node)
+{
+	const char *prop;
+	int ret;
+
+	ret = -ENODEV;
+	spu->name = get_property(node, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	prop = get_property(node, "local-store", NULL);
+	if (!prop)
+		goto out;
+	spu->local_store_phys = *(unsigned long *)prop;
+
+	/* we use local store as ram, not io memory */
+	spu->local_store = (void __force *)
+		map_spe_prop(spu, node, "local-store");
+	if (!spu->local_store)
+		goto out;
+
+	prop = get_property(node, "problem", NULL);
+	if (!prop)
+		goto out_unmap;
+	spu->problem_phys = *(unsigned long *)prop;
+
+	spu->problem= map_spe_prop(spu, node, "problem");
+	if (!spu->problem)
+		goto out_unmap;
+
+	spu_get_pdata(spu)->priv1= map_spe_prop(spu, node, "priv1");
+
+	spu->priv2= map_spe_prop(spu, node, "priv2");
+	if (!spu->priv2)
+		goto out_unmap;
+	ret = 0;
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+	struct of_irq oirq;
+	int ret;
+	int i;
+
+	for (i=0; i < 3; i++) {
+		ret = of_irq_map_one(np, i, &oirq);
+		if (ret) {
+			pr_debug("spu_new: failed to get irq %d\n", i);
+			goto err;
+		}
+		ret = -EINVAL;
+		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.specifier[0],
+			 oirq.controller->full_name);
+		spu->irqs[i] = irq_create_of_mapping(oirq.controller,
+					oirq.specifier, oirq.size);
+		if (spu->irqs[i] == NO_IRQ) {
+			pr_debug("spu_new: failed to map it !\n");
+			goto err;
+		}
+	}
+	return 0;
+
+err:
+	pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier,
+		spu->name);
+	for (; i >= 0; i--) {
+		if (spu->irqs[i] != NO_IRQ)
+			irq_dispose_mapping(spu->irqs[i]);
+	}
+	return ret;
+}
+
+static int spu_map_resource(struct spu *spu, int nr,
+			    void __iomem** virt, unsigned long *phys)
+{
+	struct device_node *np = spu_get_pdata(spu)->devnode;
+	struct resource resource = { };
+	unsigned long len;
+	int ret;
+
+	ret = of_address_to_resource(np, nr, &resource);
+	if (ret)
+		return ret;
+
+	if (phys)
+		*phys = resource.start;
+	len = resource.end - resource.start + 1;
+	*virt = ioremap(resource.start, len);
+	if (!*virt)
+		return -EINVAL;
+	return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+	struct device_node *np = spu_get_pdata(spu)->devnode;
+	int ret = -ENODEV;
+
+	spu->name = get_property(np, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+			       &spu->local_store_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 0\n",
+			 np->full_name);
+		goto out;
+	}
+	ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+			       &spu->problem_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 1\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 2\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		ret = spu_map_resource(spu, 3,
+			       (void __iomem**)&spu_get_pdata(spu)->priv1, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 3\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	pr_debug("spu_new: %s maps:\n", np->full_name);
+	pr_debug("  local store   : 0x%016lx -> 0x%p\n",
+		 spu->local_store_phys, spu->local_store);
+	pr_debug("  problem state : 0x%016lx -> 0x%p\n",
+		 spu->problem_phys, spu->problem);
+	pr_debug("  priv2         :                       0x%p\n", spu->priv2);
+	pr_debug("  priv1         :                       0x%p\n",
+		 spu_get_pdata(spu)->priv1);
+
+	return 0;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+	return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+	int ret;
+	struct device_node *node;
+
+	ret = -ENODEV;
+	for (node = of_find_node_by_type(NULL, "spe");
+			node; node = of_find_node_by_type(node, "spe")) {
+		ret = fn(node);
+		if (ret) {
+			printk(KERN_WARNING "%s: Error initializing %s\n",
+				__FUNCTION__, node->name);
+			break;
+		}
+	}
+	return ret;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+	int ret;
+	struct device_node *spe = (struct device_node *)data;
+
+	spu->pdata = kzalloc(sizeof(struct spu_pdata),
+		GFP_KERNEL);
+	if (!spu->pdata) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	spu_get_pdata(spu)->devnode = of_node_get(spe);
+
+	spu->node = of_node_to_nid(spe);
+	if (spu->node >= MAX_NUMNODES) {
+		printk(KERN_WARNING "SPE %s on node %d ignored,"
+		       " node number too big\n", spe->full_name, spu->node);
+		printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	ret = spu_map_device(spu);
+	/* try old method */
+	if (ret)
+		ret = spu_map_device_old(spu, spe);
+	if (ret)
+		goto out_free;
+
+	ret = spu_map_interrupts(spu, spe);
+	if (ret)
+		ret = spu_map_interrupts_old(spu, spe);
+	if (ret)
+		goto out_unmap;
+
+	pr_debug(KERN_DEBUG "Using SPE %s %p %p %p %p %d\n", spu->name,
+		spu->local_store, spu->problem, spu_get_pdata(spu)->priv1,
+		spu->priv2, spu->number);
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out_free:
+	kfree(spu->pdata);
+	spu->pdata = NULL;
+out:
+	return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+	spu_unmap(spu);
+	of_node_put(spu_get_pdata(spu)->devnode);
+	kfree(spu->pdata);
+	spu->pdata = NULL;
+	return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+	.enumerate_spus = of_enumerate_spus,
+	.create_spu = of_create_spu,
+	.destroy_spu = of_destroy_spu,
+};
+
 static void int_mask_and(struct spu *spu, int class, u64 mask)
 {
 	u64 old_mask;
 
-	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
-	out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
+	old_mask = in_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class]);
+	out_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class],
+		old_mask & mask);
 }
 
 static void int_mask_or(struct spu *spu, int class, u64 mask)
 {
 	u64 old_mask;
 
-	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
-	out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
+	old_mask = in_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class]);
+	out_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class],
+		old_mask | mask);
 }
 
 static void int_mask_set(struct spu *spu, int class, u64 mask)
 {
-	out_be64(&spu->priv1->int_mask_RW[class], mask);
+	out_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class], mask);
 }
 
 static u64 int_mask_get(struct spu *spu, int class)
 {
-	return in_be64(&spu->priv1->int_mask_RW[class]);
+	return in_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class]);
 }
 
 static void int_stat_clear(struct spu *spu, int class, u64 stat)
 {
-	out_be64(&spu->priv1->int_stat_RW[class], stat);
+	out_be64(&spu_get_pdata(spu)->priv1->int_stat_RW[class], stat);
 }
 
 static u64 int_stat_get(struct spu *spu, int class)
 {
-	return in_be64(&spu->priv1->int_stat_RW[class]);
+	return in_be64(&spu_get_pdata(spu)->priv1->int_stat_RW[class]);
 }
 
 static void cpu_affinity_set(struct spu *spu, int cpu)
 {
 	u64 target = iic_get_target_id(cpu);
 	u64 route = target << 48 | target << 32 | target << 16;
-	out_be64(&spu->priv1->int_route_RW, route);
+	out_be64(&spu_get_pdata(spu)->priv1->int_route_RW, route);
 }
 
 static u64 mfc_dar_get(struct spu *spu)
 {
-	return in_be64(&spu->priv1->mfc_dar_RW);
+	return in_be64(&spu_get_pdata(spu)->priv1->mfc_dar_RW);
 }
 
 static u64 mfc_dsisr_get(struct spu *spu)
 {
-	return in_be64(&spu->priv1->mfc_dsisr_RW);
+	return in_be64(&spu_get_pdata(spu)->priv1->mfc_dsisr_RW);
 }
 
 static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
 {
-	out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
+	out_be64(&spu_get_pdata(spu)->priv1->mfc_dsisr_RW, dsisr);
 }
 
 static void mfc_sdr_setup(struct spu *spu)
 {
-	out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
+	out_be64(&spu_get_pdata(spu)->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
 }
 
 static void mfc_sr1_set(struct spu *spu, u64 sr1)
 {
-	out_be64(&spu->priv1->mfc_sr1_RW, sr1);
+	out_be64(&spu_get_pdata(spu)->priv1->mfc_sr1_RW, sr1);
 }
 
 static u64 mfc_sr1_get(struct spu *spu)
 {
-	return in_be64(&spu->priv1->mfc_sr1_RW);
+	return in_be64(&spu_get_pdata(spu)->priv1->mfc_sr1_RW);
 }
 
 static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
 {
-	out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
+	out_be64(&spu_get_pdata(spu)->priv1->mfc_tclass_id_RW, tclass_id);
 }
 
 static u64 mfc_tclass_id_get(struct spu *spu)
 {
-	return in_be64(&spu->priv1->mfc_tclass_id_RW);
+	return in_be64(&spu_get_pdata(spu)->priv1->mfc_tclass_id_RW);
 }
 
 static void tlb_invalidate(struct spu *spu)
 {
-	out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
+	out_be64(&spu_get_pdata(spu)->priv1->tlb_invalidate_entry_W, 0ul);
 }
 
 static void resource_allocation_groupID_set(struct spu *spu, u64 id)
 {
-	out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
+	out_be64(&spu_get_pdata(spu)->priv1->resource_allocation_groupID_RW,
+		id);
 }
 
 static u64 resource_allocation_groupID_get(struct spu *spu)
 {
-	return in_be64(&spu->priv1->resource_allocation_groupID_RW);
+	return in_be64(
+		&spu_get_pdata(spu)->priv1->resource_allocation_groupID_RW);
 }
 
 static void resource_allocation_enable_set(struct spu *spu, u64 enable)
 {
-	out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
+	out_be64(&spu_get_pdata(spu)->priv1->resource_allocation_enable_RW,
+		enable);
 }
 
 static u64 resource_allocation_enable_get(struct spu *spu)
 {
-	return in_be64(&spu->priv1->resource_allocation_enable_RW);
+	return in_be64(
+		&spu_get_pdata(spu)->priv1->resource_allocation_enable_RW);
 }
 
 const struct spu_priv1_ops spu_priv1_mmio_ops =
Index: b/arch/powerpc/platforms/cell/spu_syscalls.c
===================================================================
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -34,14 +34,27 @@ struct spufs_calls spufs_calls = {
  * this file is not used and the syscalls directly enter the fs code */
 
 asmlinkage long sys_spu_create(const char __user *name,
-		unsigned int flags, mode_t mode)
+		unsigned int flags, mode_t mode, int neighbor_fd)
 {
 	long ret;
 	struct module *owner = spufs_calls.owner;
+	struct file *neighbor;
+	int fput_needed;
 
 	ret = -ENOSYS;
 	if (owner && try_module_get(owner)) {
-		ret = spufs_calls.create_thread(name, flags, mode);
+		if (flags & SPU_CREATE_AFFINITY_SPU) {
+			neighbor = fget_light(neighbor_fd, &fput_needed);
+			if (neighbor) {
+				ret = spufs_calls.create_thread(name, flags,
+								mode, neighbor);
+				fput_light(neighbor, fput_needed);
+			}
+		}
+		else {
+			ret = spufs_calls.create_thread(name, flags,
+							mode, NULL);
+		}
 		module_put(owner);
 	}
 	return ret;
Index: b/arch/powerpc/platforms/cell/spufs/fault.c
===================================================================
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,210 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr)
+{
+	struct vm_area_struct *vma;
+	unsigned long is_write;
+	int ret;
+
+#if 0
+	if (!IS_VALID_EA(ea)) {
+		return -EFAULT;
+	}
+#endif /* XXX */
+	if (mm == NULL) {
+		return -EFAULT;
+	}
+	if (mm->pgd == NULL) {
+		return -EFAULT;
+	}
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, ea);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= ea)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+#if 0 /* needs to be exported first */
+	if (expand_stack(vma, ea))
+		goto bad_area;
+#endif
+good_area:
+	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (dsisr & MFC_DSISR_ACCESS_DENIED)
+			goto bad_area;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+	ret = 0;
+	switch (handle_mm_fault(mm, vma, ea, is_write)) {
+	case VM_FAULT_MINOR:
+		current->min_flt++;
+		break;
+	case VM_FAULT_MAJOR:
+		current->maj_flt++;
+		break;
+	case VM_FAULT_SIGBUS:
+		ret = -EFAULT;
+		goto bad_area;
+	case VM_FAULT_OOM:
+		ret = -ENOMEM;
+		goto bad_area;
+	default:
+		BUG();
+	}
+	up_read(&mm->mmap_sem);
+	return ret;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+	return -EFAULT;
+}
+
+static void spufs_handle_dma_error(struct spu_context *ctx,
+				unsigned long ea, int type)
+{
+	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+		ctx->event_return |= type;
+		wake_up_all(&ctx->stop_wq);
+	} else {
+		siginfo_t info;
+		memset(&info, 0, sizeof(info));
+
+		switch (type) {
+		case SPE_EVENT_INVALID_DMA:
+			info.si_signo = SIGBUS;
+			info.si_code = BUS_OBJERR;
+			break;
+		case SPE_EVENT_SPE_DATA_STORAGE:
+			info.si_signo = SIGBUS;
+			info.si_addr = (void __user *)ea;
+			info.si_code = BUS_ADRERR;
+			break;
+		case SPE_EVENT_DMA_ALIGNMENT:
+			info.si_signo = SIGBUS;
+			/* DAR isn't set for an alignment fault :( */
+			info.si_code = BUS_ADRALN;
+			break;
+		case SPE_EVENT_SPE_ERROR:
+			info.si_signo = SIGILL;
+			info.si_addr = (void __user *)(unsigned long)
+				ctx->ops->npc_read(ctx) - 4;
+			info.si_code = ILL_ILLOPC;
+			break;
+		}
+		if (info.si_signo)
+			force_sig_info(info.si_signo, &info, current);
+	}
+}
+
+void spufs_dma_callback(struct spu *spu, int type)
+{
+	spufs_handle_dma_error(spu->ctx, spu->dar, type);
+}
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+	u64 ea, dsisr, access;
+	unsigned long flags;
+	int ret;
+
+	/*
+	 * dar and dsisr get passed from the registers
+	 * to the spu_context, to this function, but not
+	 * back to the spu if it gets scheduled again.
+	 *
+	 * if we don't handle the fault for a saved context
+	 * in time, we can still expect to get the same fault
+	 * the immediately after the context restore.
+	 */
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		ea = ctx->spu->dar;
+		dsisr = ctx->spu->dsisr;
+		ctx->spu->dar= ctx->spu->dsisr = 0;
+	} else {
+		ea = ctx->csa.priv1.mfc_dar_RW;
+		dsisr = ctx->csa.priv1.mfc_dsisr_RW;
+		ctx->csa.priv1.mfc_dar_RW = 0;
+		ctx->csa.priv1.mfc_dsisr_RW = 0;
+	}
+
+	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+		return 0;
+
+	pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
+		dsisr, ctx->state);
+
+	/* we must not hold the lock when entering spu_handle_mm_fault */
+	spu_release(ctx);
+
+	access = (_PAGE_PRESENT | _PAGE_USER);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+	local_irq_save(flags);
+	ret = hash_page(ea, access, 0x300);
+	local_irq_restore(flags);
+
+	/* hashing failed, so try the actual fault handler */
+	if (ret)
+		ret = spu_handle_mm_fault(current->mm, ea, dsisr);
+
+	spu_acquire(ctx);
+	/*
+	 * If we handled the fault successfully and are in runnable
+	 * state, restart the DMA.
+	 * In case of unhandled error report the problem to user space.
+	 */
+	if (!ret) {
+		if (ctx->spu)
+			ctx->ops->restart_dma(ctx);
+	} else
+		spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+	return ret;
+}
Index: b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
===================================================================
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,62 @@
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	struct spu_lscsa *lscsa;
+	unsigned char *p;
+
+	lscsa = vmalloc(sizeof(struct spu_lscsa));
+	if (!lscsa)
+		return -ENOMEM;
+	memset(lscsa, 0, sizeof(struct spu_lscsa));
+	csa->lscsa = lscsa;
+
+	/* Set LS pages reserved to allow for user-space mapping. */
+	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	return 0;
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	/* Clear reserved bit before vfree. */
+	unsigned char *p;
+
+	if (csa->lscsa == NULL)
+		return;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vfree(csa->lscsa);
+}
+
Index: b/include/asm-powerpc/mmu.h
===================================================================
--- a/include/asm-powerpc/mmu.h
+++ b/include/asm-powerpc/mmu.h
@@ -241,6 +241,8 @@ extern int hash_huge_page(struct mm_stru
 extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 			     unsigned long pstart, unsigned long mode,
 			     int psize);
+extern int hash_page(unsigned long ea, unsigned long access,
+		     unsigned long trap);
 
 extern void htab_initialize(void);
 extern void htab_initialize_secondary(void);
Index: b/include/linux/syscalls.h
===================================================================
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -544,7 +544,8 @@ asmlinkage long sys_inotify_rm_watch(int
 asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
 				 __u32 __user *ustatus);
 asmlinkage long sys_spu_create(const char __user *name,
-		unsigned int flags, mode_t mode);
+			       unsigned int flags, mode_t mode,
+			       int neighbor_fd);
 
 asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode,
 			    unsigned dev);