From: Scott Moser <smoser@redhat.com> Date: Tue, 20 Nov 2007 17:17:08 -0500 Subject: [ppc64] spufs: feature updates Message-id: 11955970301429-do-send-email-smoser@redhat.com O-Subject: [PATCH RHEL5u2] bz253208 Cell/B.E. Kernel Maintenance [3/4] Bugzilla: 253208 spufs: backport SDK3.0 + 2.6 fixes to RHEL 5.2 This patch adds a backport of the features in SDK3.0, plus fixes that have since been comitted upstream since the RHEL 5.1 release. This patch does not alter the RHEL 5 kABI. Signed-off-by: Jeremy Kerr <jk@ozlabs.org> -- arch/powerpc/platforms/cell/Makefile | 10 arch/powerpc/platforms/cell/spu_base.c | 416 +--- arch/powerpc/platforms/cell/spu_callbacks.c | 4 arch/powerpc/platforms/cell/spu_coredump.c | 81 arch/powerpc/platforms/cell/spu_manage.c | 539 +++++ arch/powerpc/platforms/cell/spu_priv1_mmio.c | 365 --- arch/powerpc/platforms/cell/spu_syscalls.c | 143 + arch/powerpc/platforms/cell/spufs/backing_ops.c | 10 arch/powerpc/platforms/cell/spufs/context.c | 87 arch/powerpc/platforms/cell/spufs/coredump.c | 236 +- arch/powerpc/platforms/cell/spufs/fault.c | 29 arch/powerpc/platforms/cell/spufs/file.c | 568 +++-- arch/powerpc/platforms/cell/spufs/inode.c | 109 - arch/powerpc/platforms/cell/spufs/lscsa_alloc.c | 13 arch/powerpc/platforms/cell/spufs/run.c | 153 + arch/powerpc/platforms/cell/spufs/sched.c | 952 ++++++---- arch/powerpc/platforms/cell/spufs/spu_restore.c | 8 arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped | 482 ++--- arch/powerpc/platforms/cell/spufs/spu_save.c | 2 arch/powerpc/platforms/cell/spufs/spufs.h | 121 - arch/powerpc/platforms/cell/spufs/switch.c | 116 - arch/powerpc/platforms/cell/spufs/syscalls.c | 50 fs/binfmt_elf.c | 9 include/asm-powerpc/elf.h | 9 include/asm-powerpc/spu.h | 115 - include/asm-powerpc/spu_csa.h | 8 include/asm-powerpc/spu_priv1.h | 9 include/linux/elf.h | 18 28 files changed, 2630 insertions(+), 2032 deletions(-) Acked-by: David Howells <dhowells@redhat.com> diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index e7b8178..b5a968c 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -11,10 +11,12 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o endif # needed only when building loadable spufs.ko -spufs-modular-$(CONFIG_SPU_FS) += spu_syscalls.o spu-priv1-$(CONFIG_PPC_CELL_NATIVE) += spu_priv1_mmio.o +spu-manage-$(CONFIG_PPC_CELLEB) += spu_manage.o +spu-manage-$(CONFIG_PPC_CELL_NATIVE) += spu_manage.o obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ - spu_coredump.o \ - $(spufs-modular-m) \ - $(spu-priv1-y) spufs/ + spu_syscalls.o \ + $(spu-priv1-y) \ + $(spu-manage-y) \ + spufs/ diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 32a8fb1..1ae67da 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -25,30 +25,48 @@ #include <linux/interrupt.h> #include <linux/list.h> #include <linux/module.h> -#include <linux/poll.h> #include <linux/ptrace.h> #include <linux/slab.h> #include <linux/wait.h> #include <linux/mm.h> #include <linux/io.h> #include <linux/mutex.h> - -#include <asm/io.h> -#include <asm/prom.h> +#include <linux/linux_logo.h> +#include <linux/uaccess.h> +#include <linux/poll.h> #include <asm/spu.h> #include <asm/spu_priv1.h> +#include <asm/xmon.h> #include <asm/prom.h> -#include "spu_priv1_mmio.h" -#include <asm/mmu_context.h> const struct spu_management_ops *spu_management_ops; +EXPORT_SYMBOL_GPL(spu_management_ops); + const struct spu_priv1_ops *spu_priv1_ops; +EXPORT_SYMBOL_GPL(spu_priv1_ops); -static spinlock_t spu_list_lock = SPIN_LOCK_UNLOCKED; -static LIST_HEAD(spu_full_list); -static DEFINE_MUTEX(spu_mutex); +struct cbe_spu_info cbe_spu_info[MAX_NUMNODES]; +EXPORT_SYMBOL_GPL(cbe_spu_info); -EXPORT_SYMBOL_GPL(spu_priv1_ops); +/* + * Protects cbe_spu_info and spu->number. + */ +static DEFINE_SPINLOCK(spu_lock); + +/* + * List of all spus in the system. + * + * This list is iterated by callers from irq context and callers that + * want to sleep. Thus modifications need to be done with both + * spu_full_list_lock and spu_full_list_mutex held, while iterating + * through it requires either of these locks. + * + * In addition spu_full_list_lock protects all assignmens to + * spu->mm. + */ +static LIST_HEAD(spu_full_list); +static DEFINE_SPINLOCK(spu_full_list_lock); +static DEFINE_MUTEX(spu_full_list_mutex); void spu_invalidate_slbs(struct spu *spu) { @@ -67,12 +85,12 @@ void spu_flush_all_slbs(struct mm_struct *mm) struct spu *spu; unsigned long flags; - spin_lock_irqsave(&spu_list_lock, flags); + spin_lock_irqsave(&spu_full_list_lock, flags); list_for_each_entry(spu, &spu_full_list, full_list) { if (spu->mm == mm) spu_invalidate_slbs(spu); } - spin_unlock_irqrestore(&spu_list_lock, flags); + spin_unlock_irqrestore(&spu_full_list_lock, flags); } /* The hack below stinks... try to do something better one of @@ -90,9 +108,9 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm) { unsigned long flags; - spin_lock_irqsave(&spu_list_lock, flags); + spin_lock_irqsave(&spu_full_list_lock, flags); spu->mm = mm; - spin_unlock_irqrestore(&spu_list_lock, flags); + spin_unlock_irqrestore(&spu_full_list_lock, flags); if (mm) mm_needs_global_tlbie(mm); } @@ -186,7 +204,7 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) spu->slb_replace = 0; spu_restart_dma(spu); - + spu->stats.slb_flt++; return 0; } @@ -220,25 +238,34 @@ static irqreturn_t spu_irq_class_0(int irq, void *data, struct pt_regs *regs) { struct spu *spu; + unsigned long stat, mask; spu = data; - spu->class_0_pending = 1; + + mask = spu_int_mask_get(spu, 0); + stat = spu_int_stat_get(spu, 0); + stat &= mask; + + spin_lock(&spu->register_lock); + spu->class_0_pending_value |= stat; + spin_unlock(&spu->register_lock); + spu->stop_callback(spu); + spu_int_stat_clear(spu, 0, stat); + return IRQ_HANDLED; } int spu_irq_class_0_bottom(struct spu *spu) { - unsigned long stat, mask; - - spu->class_0_pending = 0; - - mask = spu_int_mask_get(spu, 0); - stat = spu_int_stat_get(spu, 0); + unsigned long flags; + unsigned long stat; - stat &= mask; + spin_lock_irqsave(&spu->register_lock, flags); + stat = spu->class_0_pending_value; + spu->class_0_pending_value = 0; if (stat & 1) /* invalid DMA alignment */ __spu_trap_dma_align(spu); @@ -249,7 +276,7 @@ spu_irq_class_0_bottom(struct spu *spu) if (stat & 4) /* error on SPU */ __spu_trap_error(spu); - spu_int_stat_clear(spu, 0, stat); + spin_unlock_irqrestore(&spu->register_lock, flags); return (stat & 0x7) ? -EIO : 0; } @@ -332,6 +359,7 @@ spu_irq_class_2(int irq, void *data, struct pt_regs *regs) if (stat & 0x10) /* SPU mailbox threshold */ spu->wbox_callback(spu); + spu->stats.class2_intr++; return stat ? IRQ_HANDLED : IRQ_NONE; } @@ -388,7 +416,7 @@ static void spu_free_irqs(struct spu *spu) free_irq(spu->irqs[2], spu); } -static void spu_init_channels(struct spu *spu) +void spu_init_channels(struct spu *spu) { static const struct { unsigned channel; @@ -421,80 +449,31 @@ static void spu_init_channels(struct spu *spu) out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); } } +EXPORT_SYMBOL_GPL(spu_init_channels); -struct spu *spu_alloc_spu(struct spu *req_spu) +static int spu_shutdown(struct sys_device *sysdev) { - struct spu *spu, *ret = NULL; - - mutex_lock(&spu_mutex); - list_for_each_entry(spu, &be_spu_info[req_spu->node].free_spus, list) { - if (spu == req_spu) { - list_del_init(&spu->list); - pr_debug("Got SPU %d %d\n", spu->number, spu->node); - spu_init_channels(spu); - ret = spu; - break; - } - } - mutex_unlock(&spu_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(spu_alloc_spu); - -struct spu *spu_alloc_node(int node) -{ - struct spu *spu = NULL; - - mutex_lock(&spu_mutex); - if (!list_empty(&be_spu_info[node].free_spus)) { - spu = list_entry(be_spu_info[node].free_spus.next, struct spu, - list); - list_del_init(&spu->list); - pr_debug("Got SPU %d %d\n", spu->number, spu->node); - } - mutex_unlock(&spu_mutex); - - if (spu) - spu_init_channels(spu); - return spu; -} -EXPORT_SYMBOL_GPL(spu_alloc_node); + struct spu *spu = container_of(sysdev, struct spu, sysdev); -struct spu *spu_alloc(void) -{ - struct spu *spu = NULL; - int node; - - for (node = 0; node < MAX_NUMNODES; node++) { - spu = spu_alloc_node(node); - if (spu) - break; - } - - return spu; -} - -void spu_free(struct spu *spu) -{ - mutex_lock(&spu_mutex); - list_add_tail(&spu->list, &be_spu_info[spu->node].free_spus); - mutex_unlock(&spu_mutex); + spu_free_irqs(spu); + spu_destroy_spu(spu); + return 0; } -EXPORT_SYMBOL_GPL(spu_free); -struct sysdev_class spu_sysdev_class = { - set_kset_name("spu") +static struct sysdev_class spu_sysdev_class = { + set_kset_name("spu"), + .shutdown = spu_shutdown, }; int spu_add_sysdev_attr(struct sysdev_attribute *attr) { struct spu *spu; - mutex_lock(&spu_mutex); + mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysdev_create_file(&spu->sysdev, attr); + mutex_unlock(&spu_full_list_mutex); - mutex_unlock(&spu_mutex); return 0; } EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); @@ -502,12 +481,12 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); int spu_add_sysdev_attr_group(struct attribute_group *attrs) { struct spu *spu; - mutex_lock(&spu_mutex); + mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysfs_create_group(&spu->sysdev.kobj, attrs); + mutex_unlock(&spu_full_list_mutex); - mutex_unlock(&spu_mutex); return 0; } EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); @@ -516,24 +495,22 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); void spu_remove_sysdev_attr(struct sysdev_attribute *attr) { struct spu *spu; - mutex_lock(&spu_mutex); + mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysdev_remove_file(&spu->sysdev, attr); - - mutex_unlock(&spu_mutex); + mutex_unlock(&spu_full_list_mutex); } EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr); void spu_remove_sysdev_attr_group(struct attribute_group *attrs) { struct spu *spu; - mutex_lock(&spu_mutex); + mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysfs_remove_group(&spu->sysdev.kobj, attrs); - - mutex_unlock(&spu_mutex); + mutex_unlock(&spu_full_list_mutex); } EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group); @@ -555,28 +532,25 @@ static int spu_create_sysdev(struct spu *spu) return 0; } -static void spu_destroy_sysdev(struct spu *spu) -{ - sysfs_remove_device_from_node(&spu->sysdev, spu->node); - sysdev_unregister(&spu->sysdev); -} - static int __init create_spu(void *data) { struct spu *spu; int ret; static int number; unsigned long flags; + struct timespec ts; ret = -ENOMEM; spu = kzalloc(sizeof (*spu), GFP_KERNEL); if (!spu) goto out; + spu->alloc_state = SPU_FREE; + spin_lock_init(&spu->register_lock); - mutex_lock(&spu_mutex); + spin_lock(&spu_lock); spu->number = number++; - mutex_unlock(&spu_mutex); + spin_unlock(&spu_lock); ret = spu_create_spu(spu, data); @@ -593,14 +567,20 @@ static int __init create_spu(void *data) if (ret) goto out_free_irqs; - mutex_lock(&spu_mutex); - spin_lock_irqsave(&spu_list_lock, flags); - list_add(&spu->list, &be_spu_info[spu->node].free_spus); - list_add(&spu->be_list, &be_spu_info[spu->node].spus); - be_spu_info[spu->node].n_spus++; + mutex_lock(&cbe_spu_info[spu->node].list_mutex); + list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus); + cbe_spu_info[spu->node].n_spus++; + mutex_unlock(&cbe_spu_info[spu->node].list_mutex); + + mutex_lock(&spu_full_list_mutex); + spin_lock_irqsave(&spu_full_list_lock, flags); list_add(&spu->full_list, &spu_full_list); - spin_unlock_irqrestore(&spu_list_lock, flags); - mutex_unlock(&spu_mutex); + spin_unlock_irqrestore(&spu_full_list_lock, flags); + mutex_unlock(&spu_full_list_mutex); + + spu->stats.util_state = SPU_UTIL_IDLE_LOADED; + ktime_get_ts(&ts); + spu->stats.tstamp = timespec_to_ns(&ts); INIT_LIST_HEAD(&spu->aff_list); @@ -616,203 +596,87 @@ out: return ret; } -static void destroy_spu(struct spu *spu) -{ - list_del_init(&spu->list); - list_del_init(&spu->be_list); - list_del_init(&spu->full_list); - - spu_destroy_sysdev(spu); - spu_free_irqs(spu); - spu_destroy_spu(spu); - kfree(spu); -} - -static void cleanup_spu_base(void) -{ - struct spu *spu, *tmp; - int node; - - mutex_lock(&spu_mutex); - for (node = 0; node < MAX_NUMNODES; node++) { - list_for_each_entry_safe(spu, tmp, &be_spu_info[node].free_spus, - list) - destroy_spu(spu); - } - mutex_unlock(&spu_mutex); - sysdev_class_unregister(&spu_sysdev_class); -} -module_exit(cleanup_spu_base); - -struct be_spu_info be_spu_info[MAX_NUMNODES]; -EXPORT_SYMBOL_GPL(be_spu_info); - -/* Hardcoded affinity idxs for QS20 */ -#define SPES_PER_BE 8 -static int QS20_reg_idxs[SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; -static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; +static const char *spu_state_names[] = { + "user", "system", "iowait", "idle" +}; -static struct spu *spu_lookup_reg(int node, u32 reg) +static unsigned long long spu_acct_time(struct spu *spu, + enum spu_utilization_state state) { - struct spu *spu; + struct timespec ts; + unsigned long long time = spu->stats.times[state]; - list_for_each_entry(spu, &be_spu_info[node].spus, be_list) { - if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg) - return spu; + /* + * If the spu is idle or the context is stopped, utilization + * statistics are not updated. Apply the time delta from the + * last recorded state of the spu. + */ + if (spu->stats.util_state == state) { + ktime_get_ts(&ts); + time += timespec_to_ns(&ts) - spu->stats.tstamp; } - return NULL; -} -static void init_aff_QS20_harcoded(void) -{ - int node, i; - struct spu *last_spu, *spu; - u32 reg; - - for (node = 0; node < MAX_NUMNODES; node++) { - last_spu = NULL; - for (i = 0; i < SPES_PER_BE; i++) { - reg = QS20_reg_idxs[i]; - spu = spu_lookup_reg(node, reg); - if (!spu) - continue; - spu->has_mem_affinity = QS20_reg_memory[reg]; - if (last_spu) - list_add_tail(&spu->aff_list, - &last_spu->aff_list); - last_spu = spu; - } - } + return time / NSEC_PER_MSEC; } -static int of_has_vicinity(void) -{ - struct spu* spu; - - spu = list_entry(be_spu_info[0].spus.next, struct spu, be_list); - return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; -} -static struct spu *aff_devnode_spu(int be, struct device_node *dn) +static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) { - struct spu *spu; + struct spu *spu = container_of(sysdev, struct spu, sysdev); - list_for_each_entry(spu, &be_spu_info[be].spus, be_list) - if (spu_devnode(spu) == dn) - return spu; - return NULL; + return sprintf(buf, "%s %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + spu_state_names[spu->stats.util_state], + spu_acct_time(spu, SPU_UTIL_USER), + spu_acct_time(spu, SPU_UTIL_SYSTEM), + spu_acct_time(spu, SPU_UTIL_IOWAIT), + spu_acct_time(spu, SPU_UTIL_IDLE_LOADED), + spu->stats.vol_ctx_switch, + spu->stats.invol_ctx_switch, + spu->stats.slb_flt, + spu->stats.hash_flt, + spu->stats.min_flt, + spu->stats.maj_flt, + spu->stats.class2_intr, + spu->stats.libassist); } -static struct spu * -aff_node_next_to(int be, struct device_node *target, struct device_node *avoid) -{ - struct spu *spu; - const phandle *vic_handles; - int lenp, i; - - list_for_each_entry(spu, &be_spu_info[be].spus, be_list) { - if (spu_devnode(spu) == avoid) - continue; - vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp); - for (i=0; i < (lenp / sizeof(phandle)); i++) { - if (vic_handles[i] == target->linux_phandle) - return spu; - } - } - return NULL; -} - -static void init_aff_fw_vicinity_node(int be) -{ - struct spu *spu, *last_spu; - struct device_node *vic_dn, *last_spu_dn; - phandle avoid_ph; - const phandle *vic_handles; - const char *name; - int lenp, i, added, mem_aff; - - last_spu = list_entry(be_spu_info[be].spus.next, struct spu, be_list); - avoid_ph = 0; - for (added = 1; added < be_spu_info[be].n_spus; added++) { - last_spu_dn = spu_devnode(last_spu); - vic_handles = get_property(last_spu_dn, "vicinity", &lenp); - - for (i = 0; i < (lenp / sizeof(phandle)); i++) { - if (vic_handles[i] == avoid_ph) - continue; - - vic_dn = of_find_node_by_phandle(vic_handles[i]); - if (!vic_dn) - continue; - - name = get_property(vic_dn, "name", NULL); - if (strcmp(name, "spe") == 0) { - spu = aff_devnode_spu(be, vic_dn); - avoid_ph = last_spu_dn->linux_phandle; - } - else { - mem_aff = strcmp(name, "mic-tm") == 0; - spu = aff_node_next_to(be, vic_dn, last_spu_dn); - if (!spu) - continue; - if (mem_aff) { - last_spu->has_mem_affinity = 1; - spu->has_mem_affinity = 1; - } - avoid_ph = vic_dn->linux_phandle; - } - list_add_tail(&spu->aff_list, &last_spu->aff_list); - last_spu = spu; - break; - } - } -} - -static void init_aff_fw_vicinity(void) -{ - int be; - - /* sets has_mem_affinity for each spu, as long as the - * spu->aff_list list, linking each spu to its neighbors - */ - for (be = 0; be < MAX_NUMNODES; be++) - init_aff_fw_vicinity_node(be); -} +static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL); static int __init init_spu_base(void) { - int i, ret; - long root; + int i, ret = 0; + + for (i = 0; i < MAX_NUMNODES; i++) { + mutex_init(&cbe_spu_info[i].list_mutex); + INIT_LIST_HEAD(&cbe_spu_info[i].spus); + } if (!spu_management_ops) - return 0; + goto out; /* create sysdev class for spus */ ret = sysdev_class_register(&spu_sysdev_class); if (ret) - return ret; - - for (i = 0; i < MAX_NUMNODES; i++) { - INIT_LIST_HEAD(&be_spu_info[i].spus); - INIT_LIST_HEAD(&be_spu_info[i].free_spus); - } + goto out; ret = spu_enumerate_spus(create_spu); - if (ret) { + + if (ret < 0) { printk(KERN_WARNING "%s: Error initializing spus\n", __FUNCTION__); - cleanup_spu_base(); - return ret; + goto out_unregister_sysdev_class; } - if (of_has_vicinity()) { - init_aff_fw_vicinity(); - } else { - root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) - init_aff_QS20_harcoded(); - } + spu_add_sysdev_attr(&attr_stat); + + spu_init_affinity(); + return 0; + + out_unregister_sysdev_class: + sysdev_class_unregister(&spu_sysdev_class); + out: return ret; } module_init(init_spu_base); diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index 47ec3be..dceb8b6 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -2,7 +2,7 @@ * System call callback functions for SPUs */ -#define DEBUG +#undef DEBUG #include <linux/kallsyms.h> #include <linux/module.h> @@ -33,7 +33,7 @@ * mbind, mq_open, ipc, ... */ -void *spu_syscall_table[] = { +static void *spu_syscall_table[] = { #define SYSCALL(func) sys_ni_syscall, #define COMPAT_SYS(func) sys_ni_syscall, #define PPC_SYS(func) sys_ni_syscall, diff --git a/arch/powerpc/platforms/cell/spu_coredump.c b/arch/powerpc/platforms/cell/spu_coredump.c deleted file mode 100644 index 6915b41..0000000 --- a/arch/powerpc/platforms/cell/spu_coredump.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * SPU core dump code - * - * (C) Copyright 2006 IBM Corp. - * - * Author: Dwayne Grant McConnell <decimal@us.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/file.h> -#include <linux/module.h> -#include <linux/syscalls.h> - -#include <asm/spu.h> - -static struct spu_coredump_calls spu_coredump_calls; -static DEFINE_MUTEX(spu_coredump_mutex); - -int arch_notes_size(void) -{ - long ret; - struct module *owner = spu_coredump_calls.owner; - - ret = -ENOSYS; - mutex_lock(&spu_coredump_mutex); - if (owner && try_module_get(owner)) { - ret = spu_coredump_calls.arch_notes_size(); - module_put(owner); - } - mutex_unlock(&spu_coredump_mutex); - return ret; -} - -void arch_write_notes(struct file *file) -{ - struct module *owner = spu_coredump_calls.owner; - - mutex_lock(&spu_coredump_mutex); - if (owner && try_module_get(owner)) { - spu_coredump_calls.arch_write_notes(file); - module_put(owner); - } - mutex_unlock(&spu_coredump_mutex); -} - -int register_arch_coredump_calls(struct spu_coredump_calls *calls) -{ - if (spu_coredump_calls.owner) - return -EBUSY; - - mutex_lock(&spu_coredump_mutex); - spu_coredump_calls.arch_notes_size = calls->arch_notes_size; - spu_coredump_calls.arch_write_notes = calls->arch_write_notes; - spu_coredump_calls.owner = calls->owner; - mutex_unlock(&spu_coredump_mutex); - return 0; -} -EXPORT_SYMBOL_GPL(register_arch_coredump_calls); - -void unregister_arch_coredump_calls(struct spu_coredump_calls *calls) -{ - BUG_ON(spu_coredump_calls.owner != calls->owner); - - mutex_lock(&spu_coredump_mutex); - spu_coredump_calls.owner = NULL; - mutex_unlock(&spu_coredump_mutex); -} -EXPORT_SYMBOL_GPL(unregister_arch_coredump_calls); diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c new file mode 100644 index 0000000..0ae89d9 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -0,0 +1,539 @@ +/* + * spu management operations for of based platforms + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * Copyright 2006 Sony Corp. + * (C) Copyright 2007 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/wait.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <linux/mutex.h> +#include <linux/device.h> + +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/firmware.h> +#include <asm/prom.h> + +#include "interrupt.h" + +struct device_node *spu_devnode(struct spu *spu) +{ + return spu->devnode; +} + +EXPORT_SYMBOL_GPL(spu_devnode); + +static u64 __init find_spu_unit_number(struct device_node *spe) +{ + const unsigned int *prop; + int proplen; + + /* new device trees should provide the physical-id attribute */ + prop = get_property(spe, "physical-id", &proplen); + if (proplen == 4) + return (u64)*prop; + + /* celleb device tree provides the unit-id */ + prop = get_property(spe, "unit-id", &proplen); + if (proplen == 4) + return (u64)*prop; + + /* legacy device trees provide the id in the reg attribute */ + prop = get_property(spe, "reg", &proplen); + if (proplen == 4) + return (u64)*prop; + + return 0; +} + +static void spu_unmap(struct spu *spu) +{ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + iounmap(spu->priv1); + iounmap(spu->priv2); + iounmap(spu->problem); + iounmap((__force u8 __iomem *)spu->local_store); +} + +static int __init spu_map_interrupts_old(struct spu *spu, + struct device_node *np) +{ + unsigned int isrc; + const u32 *tmp; + int nid; + + /* Get the interrupt source unit from the device-tree */ + tmp = get_property(np, "isrc", NULL); + if (!tmp) + return -ENODEV; + isrc = tmp[0]; + + tmp = get_property(np->parent->parent, "node-id", NULL); + if (!tmp) { + printk(KERN_WARNING "%s: can't find node-id\n", __FUNCTION__); + nid = spu->node; + } else + nid = tmp[0]; + + /* Add the node number */ + isrc |= nid << IIC_IRQ_NODE_SHIFT; + + /* Now map interrupts of all 3 classes */ + spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc); + spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc); + spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc); + + /* Right now, we only fail if class 2 failed */ + return spu->irqs[2] == NO_IRQ ? -EINVAL : 0; +} + +static void __iomem * __init spu_map_prop_old(struct spu *spu, + struct device_node *n, + const char *name) +{ + const struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *prop; + int proplen; + + prop = get_property(n, name, &proplen); + if (prop == NULL || proplen != sizeof (struct address_prop)) + return NULL; + + return ioremap(prop->address, prop->len); +} + +static int __init spu_map_device_old(struct spu *spu) +{ + struct device_node *node = spu->devnode; + const char *prop; + int ret; + + ret = -ENODEV; + spu->name = get_property(node, "name", NULL); + if (!spu->name) + goto out; + + prop = get_property(node, "local-store", NULL); + if (!prop) + goto out; + spu->local_store_phys = *(unsigned long *)prop; + + /* we use local store as ram, not io memory */ + spu->local_store = (void __force *) + spu_map_prop_old(spu, node, "local-store"); + if (!spu->local_store) + goto out; + + prop = get_property(node, "problem", NULL); + if (!prop) + goto out_unmap; + spu->problem_phys = *(unsigned long *)prop; + + spu->problem = spu_map_prop_old(spu, node, "problem"); + if (!spu->problem) + goto out_unmap; + + spu->priv2 = spu_map_prop_old(spu, node, "priv2"); + if (!spu->priv2) + goto out_unmap; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + spu->priv1 = spu_map_prop_old(spu, node, "priv1"); + if (!spu->priv1) + goto out_unmap; + } + + ret = 0; + goto out; + +out_unmap: + spu_unmap(spu); +out: + return ret; +} + +static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) +{ + struct of_irq oirq; + int ret; + int i; + + for (i=0; i < 3; i++) { + ret = of_irq_map_one(np, i, &oirq); + if (ret) { + pr_debug("spu_new: failed to get irq %d\n", i); + goto err; + } + ret = -EINVAL; + pr_debug(" irq %d no 0x%x on %s\n", i, oirq.specifier[0], + oirq.controller->full_name); + spu->irqs[i] = irq_create_of_mapping(oirq.controller, + oirq.specifier, oirq.size); + if (spu->irqs[i] == NO_IRQ) { + pr_debug("spu_new: failed to map it !\n"); + goto err; + } + } + return 0; + +err: + pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier, + spu->name); + for (; i >= 0; i--) { + if (spu->irqs[i] != NO_IRQ) + irq_dispose_mapping(spu->irqs[i]); + } + return ret; +} + +static int spu_map_resource(struct spu *spu, int nr, + void __iomem** virt, unsigned long *phys) +{ + struct device_node *np = spu->devnode; + struct resource resource = { }; + unsigned long len; + int ret; + + ret = of_address_to_resource(np, nr, &resource); + if (ret) + return ret; + if (phys) + *phys = resource.start; + len = resource.end - resource.start + 1; + *virt = ioremap(resource.start, len); + if (!*virt) + return -EINVAL; + return 0; +} + +static int __init spu_map_device(struct spu *spu) +{ + struct device_node *np = spu->devnode; + int ret = -ENODEV; + + spu->name = get_property(np, "name", NULL); + if (!spu->name) + goto out; + + ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store, + &spu->local_store_phys); + if (ret) { + pr_debug("spu_new: failed to map %s resource 0\n", + np->full_name); + goto out; + } + ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem, + &spu->problem_phys); + if (ret) { + pr_debug("spu_new: failed to map %s resource 1\n", + np->full_name); + goto out_unmap; + } + ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL); + if (ret) { + pr_debug("spu_new: failed to map %s resource 2\n", + np->full_name); + goto out_unmap; + } + if (!firmware_has_feature(FW_FEATURE_LPAR)) + ret = spu_map_resource(spu, 3, + (void __iomem**)&spu->priv1, NULL); + if (ret) { + pr_debug("spu_new: failed to map %s resource 3\n", + np->full_name); + goto out_unmap; + } + pr_debug("spu_new: %s maps:\n", np->full_name); + pr_debug(" local store : 0x%016lx -> 0x%p\n", + spu->local_store_phys, spu->local_store); + pr_debug(" problem state : 0x%016lx -> 0x%p\n", + spu->problem_phys, spu->problem); + pr_debug(" priv2 : 0x%p\n", spu->priv2); + pr_debug(" priv1 : 0x%p\n", spu->priv1); + + return 0; + +out_unmap: + spu_unmap(spu); +out: + pr_debug("failed to map spe %s: %d\n", spu->name, ret); + return ret; +} + +static int __init of_enumerate_spus(int (*fn)(void *data)) +{ + int ret; + struct device_node *node; + unsigned int n = 0; + + ret = -ENODEV; + for (node = of_find_node_by_type(NULL, "spe"); + node; node = of_find_node_by_type(node, "spe")) { + ret = fn(node); + if (ret) { + printk(KERN_WARNING "%s: Error initializing %s\n", + __FUNCTION__, node->name); + break; + } + n++; + } + return ret ? ret : n; +} + +static int __init of_create_spu(struct spu *spu, void *data) +{ + int ret; + struct device_node *spe = (struct device_node *)data; + static int legacy_map = 0, legacy_irq = 0; + + spu->devnode = of_node_get(spe); + spu->spe_id = find_spu_unit_number(spe); + + spu->node = of_node_to_nid(spe); + if (spu->node >= MAX_NUMNODES) { + printk(KERN_WARNING "SPE %s on node %d ignored," + " node number too big\n", spe->full_name, spu->node); + printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n"); + ret = -ENODEV; + goto out; + } + + ret = spu_map_device(spu); + if (ret) { + if (!legacy_map) { + legacy_map = 1; + printk(KERN_WARNING "%s: Legacy device tree found, " + "trying to map old style\n", __FUNCTION__); + } + ret = spu_map_device_old(spu); + if (ret) { + printk(KERN_ERR "Unable to map %s\n", + spu->name); + goto out; + } + } + + ret = spu_map_interrupts(spu, spe); + if (ret) { + if (!legacy_irq) { + legacy_irq = 1; + printk(KERN_WARNING "%s: Legacy device tree found, " + "trying old style irq\n", __FUNCTION__); + } + ret = spu_map_interrupts_old(spu, spe); + if (ret) { + printk(KERN_ERR "%s: could not map interrupts", + spu->name); + goto out_unmap; + } + } + + pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name, + spu->local_store, spu->problem, spu->priv1, + spu->priv2, spu->number); + goto out; + +out_unmap: + spu_unmap(spu); +out: + return ret; +} + +static int of_destroy_spu(struct spu *spu) +{ + spu_unmap(spu); + of_node_put(spu->devnode); + return 0; +} + +/* Hardcoded affinity idxs for qs20 */ +#define QS20_SPES_PER_BE 8 +static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; +static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; + +static struct spu *spu_lookup_reg(int node, u32 reg) +{ + struct spu *spu; + u32 *spu_reg; + + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + spu_reg = (u32*)get_property(spu_devnode(spu), "reg", NULL); + if (*spu_reg == reg) + return spu; + } + return NULL; +} + +static void init_affinity_qs20_harcoded(void) +{ + int node, i; + struct spu *last_spu, *spu; + u32 reg; + + for (node = 0; node < MAX_NUMNODES; node++) { + last_spu = NULL; + for (i = 0; i < QS20_SPES_PER_BE; i++) { + reg = qs20_reg_idxs[i]; + spu = spu_lookup_reg(node, reg); + if (!spu) + continue; + spu->has_mem_affinity = qs20_reg_memory[reg]; + if (last_spu) + list_add_tail(&spu->aff_list, + &last_spu->aff_list); + last_spu = spu; + } + } +} + +static int of_has_vicinity(void) +{ + struct spu* spu; + + spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list); + return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; +} + +static struct spu *devnode_spu(int cbe, struct device_node *dn) +{ + struct spu *spu; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) + if (spu_devnode(spu) == dn) + return spu; + return NULL; +} + +static struct spu * +neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) +{ + struct spu *spu; + struct device_node *spu_dn; + const phandle *vic_handles; + int lenp, i; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) { + spu_dn = spu_devnode(spu); + if (spu_dn == avoid) + continue; + vic_handles = get_property(spu_dn, "vicinity", &lenp); + for (i=0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == target->linux_phandle) + return spu; + } + } + return NULL; +} + +static void init_affinity_node(int cbe) +{ + struct spu *spu, *last_spu; + struct device_node *vic_dn, *last_spu_dn; + phandle avoid_ph; + const phandle *vic_handles; + const char *name; + int lenp, i, added; + + last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, + cbe_list); + avoid_ph = 0; + for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) { + last_spu_dn = spu_devnode(last_spu); + vic_handles = get_property(last_spu_dn, "vicinity", &lenp); + + /* + * Walk through each phandle in vicinity property of the spu + * (tipically two vicinity phandles per spe node) + */ + for (i = 0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == avoid_ph) + continue; + + vic_dn = of_find_node_by_phandle(vic_handles[i]); + if (!vic_dn) + continue; + + /* a neighbour might be spe, mic-tm, or bif0 */ + name = get_property(vic_dn, "name", NULL); + if (!name) + continue; + + if (strcmp(name, "spe") == 0) { + spu = devnode_spu(cbe, vic_dn); + avoid_ph = last_spu_dn->linux_phandle; + } else { + /* + * "mic-tm" and "bif0" nodes do not have + * vicinity property. So we need to find the + * spe which has vic_dn as neighbour, but + * skipping the one we came from (last_spu_dn) + */ + spu = neighbour_spu(cbe, vic_dn, last_spu_dn); + if (!spu) + continue; + if (!strcmp(name, "mic-tm")) { + last_spu->has_mem_affinity = 1; + spu->has_mem_affinity = 1; + } + avoid_ph = vic_dn->linux_phandle; + } + + list_add_tail(&spu->aff_list, &last_spu->aff_list); + last_spu = spu; + break; + } + } +} + +static void init_affinity_fw(void) +{ + int cbe; + + for (cbe = 0; cbe < MAX_NUMNODES; cbe++) + init_affinity_node(cbe); +} + +static int __init init_affinity(void) +{ + if (of_has_vicinity()) { + init_affinity_fw(); + } else { + long root = of_get_flat_dt_root(); + if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) + init_affinity_qs20_harcoded(); + else + printk("No affinity configuration found"); + } + + return 0; +} + +const struct spu_management_ops spu_management_of_ops = { + .enumerate_spus = of_enumerate_spus, + .create_spu = of_create_spu, + .destroy_spu = of_destroy_spu, + .init_affinity = init_affinity, +}; diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c index a0ae600..eec8a93 100644 --- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c +++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c @@ -37,433 +37,112 @@ #include "interrupt.h" #include "spu_priv1_mmio.h" -static DEFINE_MUTEX(add_spumem_mutex); - -struct spu_pdata { - struct device_node *devnode; - struct spu_priv1 __iomem *priv1; -}; - -static struct spu_pdata *spu_get_pdata(struct spu *spu) -{ - BUG_ON(!spu->pdata); - return spu->pdata; -} - -struct device_node *spu_devnode(struct spu *spu) -{ - return spu_get_pdata(spu)->devnode; -} - -EXPORT_SYMBOL_GPL(spu_devnode); - -static void __iomem * __init map_spe_prop(struct spu *spu, - struct device_node *n, const char *name) -{ - const struct address_prop { - unsigned long address; - unsigned int len; - } __attribute__((packed)) *prop; - - const void *p; - int proplen; - - p = get_property(n, name, &proplen); - if (proplen != sizeof (struct address_prop)) - return NULL; - - prop = p; - return ioremap(prop->address, prop->len); -} - -static void spu_unmap(struct spu *spu) -{ - iounmap(spu->priv2); - iounmap(spu_get_pdata(spu)->priv1); - iounmap(spu->problem); - iounmap((__force u8 __iomem *)spu->local_store); -} - -static int __init spu_map_interrupts_old(struct spu *spu, - struct device_node *np) -{ - unsigned int isrc; - const u32 *tmp; - int nid; - - /* Get the interrupt source unit from the device-tree */ - tmp = get_property(np, "isrc", NULL); - if (!tmp) - return -ENODEV; - isrc = tmp[0]; - - tmp = get_property(np->parent->parent, "node-id", NULL); - if (!tmp) { - printk(KERN_WARNING "%s: can't find node-id\n", __FUNCTION__); - nid = spu->node; - } else - nid = tmp[0]; - - /* Add the node number */ - isrc |= nid << IIC_IRQ_NODE_SHIFT; - - /* Now map interrupts of all 3 classes */ - spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc); - spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc); - spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc); - - /* Right now, we only fail if class 2 failed */ - return spu->irqs[2] == NO_IRQ ? -EINVAL : 0; -} - -static int __init spu_map_device_old(struct spu *spu, struct device_node *node) -{ - const char *prop; - int ret; - - ret = -ENODEV; - spu->name = get_property(node, "name", NULL); - if (!spu->name) - goto out; - - prop = get_property(node, "local-store", NULL); - if (!prop) - goto out; - spu->local_store_phys = *(unsigned long *)prop; - - /* we use local store as ram, not io memory */ - spu->local_store = (void __force *) - map_spe_prop(spu, node, "local-store"); - if (!spu->local_store) - goto out; - - prop = get_property(node, "problem", NULL); - if (!prop) - goto out_unmap; - spu->problem_phys = *(unsigned long *)prop; - - spu->problem= map_spe_prop(spu, node, "problem"); - if (!spu->problem) - goto out_unmap; - - spu_get_pdata(spu)->priv1= map_spe_prop(spu, node, "priv1"); - - spu->priv2= map_spe_prop(spu, node, "priv2"); - if (!spu->priv2) - goto out_unmap; - ret = 0; - goto out; - -out_unmap: - spu_unmap(spu); -out: - return ret; -} - -static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) -{ - struct of_irq oirq; - int ret; - int i; - - for (i=0; i < 3; i++) { - ret = of_irq_map_one(np, i, &oirq); - if (ret) { - pr_debug("spu_new: failed to get irq %d\n", i); - goto err; - } - ret = -EINVAL; - pr_debug(" irq %d no 0x%x on %s\n", i, oirq.specifier[0], - oirq.controller->full_name); - spu->irqs[i] = irq_create_of_mapping(oirq.controller, - oirq.specifier, oirq.size); - if (spu->irqs[i] == NO_IRQ) { - pr_debug("spu_new: failed to map it !\n"); - goto err; - } - } - return 0; - -err: - pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier, - spu->name); - for (; i >= 0; i--) { - if (spu->irqs[i] != NO_IRQ) - irq_dispose_mapping(spu->irqs[i]); - } - return ret; -} - -static int spu_map_resource(struct spu *spu, int nr, - void __iomem** virt, unsigned long *phys) -{ - struct device_node *np = spu_get_pdata(spu)->devnode; - struct resource resource = { }; - unsigned long len; - int ret; - - ret = of_address_to_resource(np, nr, &resource); - if (ret) - return ret; - - if (phys) - *phys = resource.start; - len = resource.end - resource.start + 1; - *virt = ioremap(resource.start, len); - if (!*virt) - return -EINVAL; - return 0; -} - -static int __init spu_map_device(struct spu *spu) -{ - struct device_node *np = spu_get_pdata(spu)->devnode; - int ret = -ENODEV; - - spu->name = get_property(np, "name", NULL); - if (!spu->name) - goto out; - - ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store, - &spu->local_store_phys); - if (ret) { - pr_debug("spu_new: failed to map %s resource 0\n", - np->full_name); - goto out; - } - ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem, - &spu->problem_phys); - if (ret) { - pr_debug("spu_new: failed to map %s resource 1\n", - np->full_name); - goto out_unmap; - } - ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL); - if (ret) { - pr_debug("spu_new: failed to map %s resource 2\n", - np->full_name); - goto out_unmap; - } - if (!firmware_has_feature(FW_FEATURE_LPAR)) - ret = spu_map_resource(spu, 3, - (void __iomem**)&spu_get_pdata(spu)->priv1, NULL); - if (ret) { - pr_debug("spu_new: failed to map %s resource 3\n", - np->full_name); - goto out_unmap; - } - pr_debug("spu_new: %s maps:\n", np->full_name); - pr_debug(" local store : 0x%016lx -> 0x%p\n", - spu->local_store_phys, spu->local_store); - pr_debug(" problem state : 0x%016lx -> 0x%p\n", - spu->problem_phys, spu->problem); - pr_debug(" priv2 : 0x%p\n", spu->priv2); - pr_debug(" priv1 : 0x%p\n", - spu_get_pdata(spu)->priv1); - - return 0; - -out_unmap: - spu_unmap(spu); -out: - pr_debug("failed to map spe %s: %d\n", spu->name, ret); - return ret; -} - -static int __init of_enumerate_spus(int (*fn)(void *data)) -{ - int ret; - struct device_node *node; - - ret = -ENODEV; - for (node = of_find_node_by_type(NULL, "spe"); - node; node = of_find_node_by_type(node, "spe")) { - ret = fn(node); - if (ret) { - printk(KERN_WARNING "%s: Error initializing %s\n", - __FUNCTION__, node->name); - break; - } - } - return ret; -} - -static int __init of_create_spu(struct spu *spu, void *data) -{ - int ret; - struct device_node *spe = (struct device_node *)data; - - spu->pdata = kzalloc(sizeof(struct spu_pdata), - GFP_KERNEL); - if (!spu->pdata) { - ret = -ENOMEM; - goto out; - } - spu_get_pdata(spu)->devnode = of_node_get(spe); - - spu->node = of_node_to_nid(spe); - if (spu->node >= MAX_NUMNODES) { - printk(KERN_WARNING "SPE %s on node %d ignored," - " node number too big\n", spe->full_name, spu->node); - printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n"); - ret = -ENODEV; - goto out_free; - } - - ret = spu_map_device(spu); - /* try old method */ - if (ret) - ret = spu_map_device_old(spu, spe); - if (ret) - goto out_free; - - ret = spu_map_interrupts(spu, spe); - if (ret) - ret = spu_map_interrupts_old(spu, spe); - if (ret) - goto out_unmap; - - pr_debug(KERN_DEBUG "Using SPE %s %p %p %p %p %d\n", spu->name, - spu->local_store, spu->problem, spu_get_pdata(spu)->priv1, - spu->priv2, spu->number); - goto out; - -out_unmap: - spu_unmap(spu); -out_free: - kfree(spu->pdata); - spu->pdata = NULL; -out: - return ret; -} - -static int of_destroy_spu(struct spu *spu) -{ - spu_unmap(spu); - of_node_put(spu_get_pdata(spu)->devnode); - kfree(spu->pdata); - spu->pdata = NULL; - return 0; -} - -const struct spu_management_ops spu_management_of_ops = { - .enumerate_spus = of_enumerate_spus, - .create_spu = of_create_spu, - .destroy_spu = of_destroy_spu, -}; - static void int_mask_and(struct spu *spu, int class, u64 mask) { u64 old_mask; - old_mask = in_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class]); - out_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class], - old_mask & mask); + old_mask = in_be64(&spu->priv1->int_mask_RW[class]); + out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask); } static void int_mask_or(struct spu *spu, int class, u64 mask) { u64 old_mask; - old_mask = in_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class]); - out_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class], - old_mask | mask); + old_mask = in_be64(&spu->priv1->int_mask_RW[class]); + out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask); } static void int_mask_set(struct spu *spu, int class, u64 mask) { - out_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class], mask); + out_be64(&spu->priv1->int_mask_RW[class], mask); } static u64 int_mask_get(struct spu *spu, int class) { - return in_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class]); + return in_be64(&spu->priv1->int_mask_RW[class]); } static void int_stat_clear(struct spu *spu, int class, u64 stat) { - out_be64(&spu_get_pdata(spu)->priv1->int_stat_RW[class], stat); + out_be64(&spu->priv1->int_stat_RW[class], stat); } static u64 int_stat_get(struct spu *spu, int class) { - return in_be64(&spu_get_pdata(spu)->priv1->int_stat_RW[class]); + return in_be64(&spu->priv1->int_stat_RW[class]); } static void cpu_affinity_set(struct spu *spu, int cpu) { u64 target = iic_get_target_id(cpu); u64 route = target << 48 | target << 32 | target << 16; - out_be64(&spu_get_pdata(spu)->priv1->int_route_RW, route); + out_be64(&spu->priv1->int_route_RW, route); } static u64 mfc_dar_get(struct spu *spu) { - return in_be64(&spu_get_pdata(spu)->priv1->mfc_dar_RW); + return in_be64(&spu->priv1->mfc_dar_RW); } static u64 mfc_dsisr_get(struct spu *spu) { - return in_be64(&spu_get_pdata(spu)->priv1->mfc_dsisr_RW); + return in_be64(&spu->priv1->mfc_dsisr_RW); } static void mfc_dsisr_set(struct spu *spu, u64 dsisr) { - out_be64(&spu_get_pdata(spu)->priv1->mfc_dsisr_RW, dsisr); + out_be64(&spu->priv1->mfc_dsisr_RW, dsisr); } static void mfc_sdr_set(struct spu *spu, u64 unused) { - out_be64(&spu_get_pdata(spu)->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1)); + out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1)); } static void mfc_sr1_set(struct spu *spu, u64 sr1) { - out_be64(&spu_get_pdata(spu)->priv1->mfc_sr1_RW, sr1); + out_be64(&spu->priv1->mfc_sr1_RW, sr1); } static u64 mfc_sr1_get(struct spu *spu) { - return in_be64(&spu_get_pdata(spu)->priv1->mfc_sr1_RW); + return in_be64(&spu->priv1->mfc_sr1_RW); } static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id) { - out_be64(&spu_get_pdata(spu)->priv1->mfc_tclass_id_RW, tclass_id); + out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id); } static u64 mfc_tclass_id_get(struct spu *spu) { - return in_be64(&spu_get_pdata(spu)->priv1->mfc_tclass_id_RW); + return in_be64(&spu->priv1->mfc_tclass_id_RW); } static void tlb_invalidate(struct spu *spu) { - out_be64(&spu_get_pdata(spu)->priv1->tlb_invalidate_entry_W, 0ul); + out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul); } static void resource_allocation_groupID_set(struct spu *spu, u64 id) { - out_be64(&spu_get_pdata(spu)->priv1->resource_allocation_groupID_RW, - id); + out_be64(&spu->priv1->resource_allocation_groupID_RW, id); } static u64 resource_allocation_groupID_get(struct spu *spu) { - return in_be64( - &spu_get_pdata(spu)->priv1->resource_allocation_groupID_RW); + return in_be64(&spu->priv1->resource_allocation_groupID_RW); } static void resource_allocation_enable_set(struct spu *spu, u64 enable) { - out_be64(&spu_get_pdata(spu)->priv1->resource_allocation_enable_RW, - enable); + out_be64(&spu->priv1->resource_allocation_enable_RW, enable); } static u64 resource_allocation_enable_get(struct spu *spu) { - return in_be64( - &spu_get_pdata(spu)->priv1->resource_allocation_enable_RW); + return in_be64(&spu->priv1->resource_allocation_enable_RW); } const struct spu_priv1_ops spu_priv1_mmio_ops = diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index dd2c668..a9438b7 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -2,6 +2,7 @@ * SPU file system -- system call stubs * * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * (C) Copyright 2006-2007, IBM Corporation * * Author: Arnd Bergmann <arndb@de.ibm.com> * @@ -20,43 +21,73 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/file.h> +#include <linux/fs.h> #include <linux/module.h> #include <linux/syscalls.h> +#include <linux/rcupdate.h> #include <asm/spu.h> -struct spufs_calls spufs_calls = { - .owner = NULL, -}; +/* protected by rcu */ +static struct spufs_calls *spufs_calls; -/* These stub syscalls are needed to have the actual implementation - * within a loadable module. When spufs is built into the kernel, - * this file is not used and the syscalls directly enter the fs code */ +#ifdef CONFIG_SPU_FS_MODULE + +static inline struct spufs_calls *spufs_calls_get(void) +{ + struct spufs_calls *calls = NULL; + + rcu_read_lock(); + calls = rcu_dereference(spufs_calls); + if (calls && !try_module_get(calls->owner)) + calls = NULL; + rcu_read_unlock(); + + return calls; +} + +static inline void spufs_calls_put(struct spufs_calls *calls) +{ + BUG_ON(calls != spufs_calls); + + /* we don't need to rcu this, as we hold a reference to the module */ + module_put(spufs_calls->owner); +} + +#else /* !defined CONFIG_SPU_FS_MODULE */ + +static inline struct spufs_calls *spufs_calls_get(void) +{ + return spufs_calls; +} + +static inline void spufs_calls_put(struct spufs_calls *calls) { } + +#endif /* CONFIG_SPU_FS_MODULE */ asmlinkage long sys_spu_create(const char __user *name, unsigned int flags, mode_t mode, int neighbor_fd) { long ret; - struct module *owner = spufs_calls.owner; struct file *neighbor; int fput_needed; + struct spufs_calls *calls; - ret = -ENOSYS; - if (owner && try_module_get(owner)) { - if (flags & SPU_CREATE_AFFINITY_SPU) { - neighbor = fget_light(neighbor_fd, &fput_needed); - if (neighbor) { - ret = spufs_calls.create_thread(name, flags, - mode, neighbor); - fput_light(neighbor, fput_needed); - } - } - else { - ret = spufs_calls.create_thread(name, flags, - mode, NULL); + calls = spufs_calls_get(); + if (!calls) + return -ENOSYS; + + if (flags & SPU_CREATE_AFFINITY_SPU) { + ret = -EBADF; + neighbor = fget_light(neighbor_fd, &fput_needed); + if (neighbor) { + ret = calls->create_thread(name, flags, mode, neighbor); + fput_light(neighbor, fput_needed); } - module_put(owner); - } + } else + ret = calls->create_thread(name, flags, mode, NULL); + + spufs_calls_put(calls); return ret; } @@ -65,37 +96,69 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) long ret; struct file *filp; int fput_needed; - struct module *owner = spufs_calls.owner; + struct spufs_calls *calls; - ret = -ENOSYS; - if (owner && try_module_get(owner)) { - ret = -EBADF; - filp = fget_light(fd, &fput_needed); - if (filp) { - ret = spufs_calls.spu_run(filp, unpc, ustatus); - fput_light(filp, fput_needed); - } - module_put(owner); + calls = spufs_calls_get(); + if (!calls) + return -ENOSYS; + + ret = -EBADF; + filp = fget_light(fd, &fput_needed); + if (filp) { + ret = calls->spu_run(filp, unpc, ustatus); + fput_light(filp, fput_needed); } + + spufs_calls_put(calls); + return ret; +} + +int elf_coredump_extra_notes_size(void) +{ + struct spufs_calls *calls; + int ret; + + calls = spufs_calls_get(); + if (!calls) + return 0; + + ret = calls->coredump_extra_notes_size(); + + spufs_calls_put(calls); + + return ret; +} + +int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset) +{ + struct spufs_calls *calls; + int ret; + + calls = spufs_calls_get(); + if (!calls) + return 0; + + ret = calls->coredump_extra_notes_write(file, foffset); + + spufs_calls_put(calls); + return ret; } int register_spu_syscalls(struct spufs_calls *calls) { - if (spufs_calls.owner) + if (spufs_calls) return -EBUSY; - spufs_calls.create_thread = calls->create_thread; - spufs_calls.spu_run = calls->spu_run; - smp_mb(); - spufs_calls.owner = calls->owner; + rcu_assign_pointer(spufs_calls, calls); return 0; } EXPORT_SYMBOL_GPL(register_spu_syscalls); void unregister_spu_syscalls(struct spufs_calls *calls) { - BUG_ON(spufs_calls.owner != calls->owner); - spufs_calls.owner = NULL; + BUG_ON(spufs_calls->owner != calls->owner); + rcu_assign_pointer(spufs_calls, NULL); + synchronize_rcu(); } EXPORT_SYMBOL_GPL(unregister_spu_syscalls); diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c index 3322528..ec01214 100644 --- a/arch/powerpc/platforms/cell/spufs/backing_ops.c +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -28,7 +28,6 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/poll.h> @@ -163,7 +162,8 @@ static int spu_backing_wbox_write(struct spu_context *ctx, u32 data) BUG_ON(avail != (4 - slot)); ctx->csa.spu_mailbox_data[slot] = data; ctx->csa.spu_chnlcnt_RW[29] = ++slot; - ctx->csa.prob.mb_stat_R = (((4 - slot) & 0xff) << 8); + ctx->csa.prob.mb_stat_R &= ~(0x00ff00); + ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8); gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT); ret = 4; } else { @@ -321,6 +321,12 @@ static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask, /* FIXME: what are the side-effects of this? */ prob->dma_querymask_RW = mask; prob->dma_querytype_RW = mode; + /* In the current implementation, the SPU context is always + * acquired in runnable state when new bits are added to the + * mask (tagwait), so it's sufficient just to mask + * dma_tagstatus_R with the 'mask' parameter here. + */ + ctx->csa.prob.dma_tagstatus_R &= mask; out: spin_unlock(&ctx->csa.register_lock); diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index 4fa0143..9cb081c 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -24,10 +24,14 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/slab.h> +#include <asm/atomic.h> #include <asm/spu.h> #include <asm/spu_csa.h> #include "spufs.h" + +atomic_t nr_spu_contexts = ATOMIC_INIT(0); + struct spu_context *alloc_spu_context(struct spu_gang *gang) { struct spu_context *ctx; @@ -40,10 +44,10 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) if (spu_init_csa(&ctx->csa)) goto out_free; spin_lock_init(&ctx->mmio_lock); - spin_lock_init(&ctx->mapping_lock); + mutex_init(&ctx->mapping_lock); kref_init(&ctx->kref); mutex_init(&ctx->state_mutex); - init_MUTEX(&ctx->run_sema); + mutex_init(&ctx->run_mutex); init_waitqueue_head(&ctx->ibox_wq); init_waitqueue_head(&ctx->wbox_wq); init_waitqueue_head(&ctx->stop_wq); @@ -55,10 +59,12 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) INIT_LIST_HEAD(&ctx->aff_list); if (gang) spu_gang_add_ctx(gang, ctx); - ctx->rt_priority = current->rt_priority; - ctx->policy = current->policy; - ctx->prio = current->prio; - INIT_WORK(&ctx->sched_work, spu_sched_tick, ctx); + + __spu_update_sched_info(ctx); + spu_set_timeslice(ctx); + ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; + + atomic_inc(&nr_spu_contexts); goto out; out_free: kfree(ctx); @@ -80,6 +86,7 @@ void destroy_spu_context(struct kref *kref) if (ctx->prof_priv_kref) kref_put(ctx->prof_priv_kref, ctx->prof_priv_release); BUG_ON(!list_empty(&ctx->rq)); + atomic_dec(&nr_spu_contexts); kfree(ctx); } @@ -107,6 +114,7 @@ void spu_forget(struct spu_context *ctx) void spu_unmap_mappings(struct spu_context *ctx) { + mutex_lock(&ctx->mapping_lock); if (ctx->local_store) unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1); if (ctx->mfc) @@ -121,46 +129,7 @@ void spu_unmap_mappings(struct spu_context *ctx) unmap_mapping_range(ctx->mss, 0, 0x1000, 1); if (ctx->psmap) unmap_mapping_range(ctx->psmap, 0, 0x20000, 1); -} - -/** - * spu_acquire_exclusive - lock spu contex and protect against userspace access - * @ctx: spu contex to lock - * - * Note: - * Returns 0 and with the context locked on success - * Returns negative error and with the context _unlocked_ on failure. - */ -int spu_acquire_exclusive(struct spu_context *ctx) -{ - int ret = -EINVAL; - - spu_acquire(ctx); - /* - * Context is about to be freed, so we can't acquire it anymore. - */ - if (!ctx->owner) - goto out_unlock; - - if (ctx->state == SPU_STATE_SAVED) { - ret = spu_activate(ctx, 0); - if (ret) - goto out_unlock; - } else { - /* - * We need to exclude userspace access to the context. - * - * To protect against memory access we invalidate all ptes - * and make sure the pagefault handlers block on the mutex. - */ - spu_unmap_mappings(ctx); - } - - return 0; - - out_unlock: - spu_release(ctx); - return ret; + mutex_unlock(&ctx->mapping_lock); } /** @@ -201,20 +170,36 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags) void spu_acquire_saved(struct spu_context *ctx) { spu_acquire(ctx); - if (ctx->state != SPU_STATE_SAVED) + if (ctx->state != SPU_STATE_SAVED) { + set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); spu_deactivate(ctx); + } +} + +/** + * spu_release_saved - unlock spu context and return it to the runqueue + * @ctx: context to unlock + */ +void spu_release_saved(struct spu_context *ctx) +{ + BUG_ON(ctx->state != SPU_STATE_SAVED); + + if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags)) + spu_activate(ctx, 0); + + spu_release(ctx); } -void spu_set_profile_private_kref(struct spu_context * ctx, - struct kref * prof_info_kref, - void (* prof_info_release) (struct kref * kref)) +void spu_set_profile_private_kref(struct spu_context *ctx, + struct kref *prof_info_kref, + void ( * prof_info_release) (struct kref *kref)) { ctx->prof_priv_kref = prof_info_kref; ctx->prof_priv_release = prof_info_release; } EXPORT_SYMBOL_GPL(spu_set_profile_private_kref); -void * spu_get_profile_private_kref(struct spu_context * ctx) +void *spu_get_profile_private_kref(struct spu_context *ctx) { return ctx->prof_priv_kref; } diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 5d9ad5a..80f6236 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -31,16 +31,7 @@ #include "spufs.h" -struct spufs_ctx_info { - struct list_head list; - int dfd; - int memsize; /* in bytes */ - struct spu_context *ctx; -}; - -static LIST_HEAD(ctx_info_list); - -static ssize_t do_coredump_read(int num, struct spu_context *ctx, void __user *buffer, +static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer, size_t size, loff_t *off) { u64 data; @@ -50,49 +41,57 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void __user *b return spufs_coredump_read[num].read(ctx, buffer, size, off); data = spufs_coredump_read[num].get(ctx); - ret = copy_to_user(buffer, &data, 8); - return ret ? -EFAULT : 8; + ret = snprintf(buffer, size, "0x%.16lx", data); + if (ret >= size) + return size; + return ++ret; /* count trailing NULL */ } /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. */ -static int spufs_dump_write(struct file *file, const void *addr, int nr) +static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset) { - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} + unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; + ssize_t written; -static int spufs_dump_seek(struct file *file, loff_t off) -{ - if (file->f_op->llseek) { - if (file->f_op->llseek(file, off, 0) != off) - return 0; - } else - file->f_pos = off; - return 1; + if (*foffset + nr > limit) + return -EIO; + + written = file->f_op->write(file, addr, nr, &file->f_pos); + *foffset += written; + + if (written != nr) + return -EIO; + + return 0; } -static void spufs_fill_memsize(struct spufs_ctx_info *ctx_info) +static int spufs_dump_align(struct file *file, char *buf, loff_t new_off, + loff_t *foffset) { - struct spu_context *ctx; - unsigned long long lslr; + int rc, size; + + size = min((loff_t)PAGE_SIZE, new_off - *foffset); + memset(buf, 0, size); + + rc = 0; + while (rc == 0 && new_off > *foffset) { + size = min((loff_t)PAGE_SIZE, new_off - *foffset); + rc = spufs_dump_write(file, buf, size, foffset); + } - ctx = ctx_info->ctx; - lslr = ctx->csa.priv2.spu_lslr_RW; - ctx_info->memsize = lslr + 1; + return rc; } -static int spufs_ctx_note_size(struct spufs_ctx_info *ctx_info) +static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) { - int dfd, memsize, i, sz, total = 0; + int i, sz, total = 0; char *name; char fullname[80]; - dfd = ctx_info->dfd; - memsize = ctx_info->memsize; - - for (i = 0; spufs_coredump_read[i].name; i++) { + for (i = 0; spufs_coredump_read[i].name != NULL; i++) { name = spufs_coredump_read[i].name; sz = spufs_coredump_read[i].size; @@ -100,39 +99,12 @@ static int spufs_ctx_note_size(struct spufs_ctx_info *ctx_info) total += sizeof(struct elf_note); total += roundup(strlen(fullname) + 1, 4); - if (!strcmp(name, "mem")) - total += roundup(memsize, 4); - else - total += roundup(sz, 4); + total += roundup(sz, 4); } return total; } -static int spufs_add_one_context(struct file *file, int dfd) -{ - struct spu_context *ctx; - struct spufs_ctx_info *ctx_info; - int size; - - ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; - if (ctx->flags & SPU_CREATE_NOSCHED) - return 0; - - ctx_info = kzalloc(sizeof(*ctx_info), GFP_KERNEL); - if (unlikely(!ctx_info)) - return -ENOMEM; - - ctx_info->dfd = dfd; - ctx_info->ctx = ctx; - - spufs_fill_memsize(ctx_info); - - size = spufs_ctx_note_size(ctx_info); - list_add(&ctx_info->list, &ctx_info_list); - return size; -} - /* * The additional architecture-specific notes for Cell are various * context files in the spu context. @@ -142,33 +114,57 @@ static int spufs_add_one_context(struct file *file, int dfd) * internal functionality to dump them without needing to actually * open the files. */ -static int spufs_arch_notes_size(void) +static struct spu_context *coredump_next_context(int *fd) { struct fdtable *fdt = files_fdtable(current->files); - int size = 0, fd; + struct file *file; + struct spu_context *ctx = NULL; - for (fd = 0; fd < fdt->max_fds; fd++) { - if (FD_ISSET(fd, fdt->open_fds)) { - struct file *file = fcheck(fd); + for (; *fd < fdt->max_fds; (*fd)++) { + if (!FD_ISSET(*fd, fdt->open_fds)) + continue; - if (file && file->f_op == &spufs_context_fops) { - int rval = spufs_add_one_context(file, fd); - if (rval < 0) - break; - size += rval; - } - } + file = fcheck(*fd); + + if (!file || file->f_op != &spufs_context_fops) + continue; + + ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; + if (ctx->flags & SPU_CREATE_NOSCHED) + continue; + + /* start searching the next fd next time we're called */ + (*fd)++; + break; } - return size; + return ctx; } -static void spufs_arch_write_note(struct spufs_ctx_info *ctx_info, int i, - struct file *file) +int spufs_coredump_extra_notes_size(void) { struct spu_context *ctx; + int size = 0, rc, fd; + + fd = 0; + while ((ctx = coredump_next_context(&fd)) != NULL) { + spu_acquire_saved(ctx); + rc = spufs_ctx_note_size(ctx, fd); + spu_release_saved(ctx); + if (rc < 0) + break; + + size += rc; + } + + return size; +} + +static int spufs_arch_write_note(struct spu_context *ctx, int i, + struct file *file, int dfd, loff_t *foffset) +{ loff_t pos = 0; - int sz, dfd, rc, total = 0; + int sz, rc, nread, total = 0; const int bufsz = PAGE_SIZE; char *name; char fullname[80], *buf; @@ -176,64 +172,70 @@ static void spufs_arch_write_note(struct spufs_ctx_info *ctx_info, int i, buf = (void *)get_zeroed_page(GFP_KERNEL); if (!buf) - return; + return -ENOMEM; - dfd = ctx_info->dfd; name = spufs_coredump_read[i].name; - - if (!strcmp(name, "mem")) - sz = ctx_info->memsize; - else - sz = spufs_coredump_read[i].size; - - ctx = ctx_info->ctx; - if (!ctx) - goto out; + sz = spufs_coredump_read[i].size; sprintf(fullname, "SPU/%d/%s", dfd, name); en.n_namesz = strlen(fullname) + 1; en.n_descsz = sz; en.n_type = NT_SPU; - if (!spufs_dump_write(file, &en, sizeof(en))) + rc = spufs_dump_write(file, &en, sizeof(en), foffset); + if (rc) goto out; - if (!spufs_dump_write(file, fullname, en.n_namesz)) + + rc = spufs_dump_write(file, fullname, en.n_namesz, foffset); + if (rc) goto out; - if (!spufs_dump_seek(file, roundup((unsigned long)file->f_pos, 4))) + + rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset); + if (rc) goto out; do { - rc = do_coredump_read(i, ctx, buf, bufsz, &pos); - if (rc > 0) { - if (!spufs_dump_write(file, buf, rc)) + nread = do_coredump_read(i, ctx, buf, bufsz, &pos); + if (nread > 0) { + rc = spufs_dump_write(file, buf, nread, foffset); + if (rc) goto out; - total += rc; + total += nread; } - } while (rc == bufsz && total < sz); + } while (nread == bufsz && total < sz); + + if (nread < 0) { + rc = nread; + goto out; + } + + rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4), + foffset); - spufs_dump_seek(file, roundup((unsigned long)file->f_pos - - total + sz, 4)); out: free_page((unsigned long)buf); + return rc; } -static void spufs_arch_write_notes(struct file *file) +int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset) { - int j; - struct spufs_ctx_info *ctx_info, *next; - - list_for_each_entry_safe(ctx_info, next, &ctx_info_list, list) { - spu_acquire_saved(ctx_info->ctx); - for (j = 0; j < spufs_coredump_num_notes; j++) - spufs_arch_write_note(ctx_info, j, file); - spu_release(ctx_info->ctx); - list_del(&ctx_info->list); - kfree(ctx_info); + struct spu_context *ctx; + int fd, j, rc; + + fd = 0; + while ((ctx = coredump_next_context(&fd)) != NULL) { + spu_acquire_saved(ctx); + + for (j = 0; spufs_coredump_read[j].name != NULL; j++) { + rc = spufs_arch_write_note(ctx, j, file, fd, foffset); + if (rc) { + spu_release_saved(ctx); + return rc; + } + } + + spu_release_saved(ctx); } -} -struct spu_coredump_calls spufs_coredump_calls = { - .arch_notes_size = spufs_arch_notes_size, - .arch_write_notes = spufs_arch_write_notes, - .owner = THIS_MODULE, -}; + return 0; +} diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index adf1f75..87648fe 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -33,7 +33,8 @@ * function. Currently, there are a few corner cases that we haven't had * to handle fortunately. */ -static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr) +static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, + unsigned long dsisr, unsigned *flt) { struct vm_area_struct *vma; unsigned long is_write; @@ -59,10 +60,8 @@ static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; -#if 0 /* needs to be exported first */ if (expand_stack(vma, ea)) goto bad_area; -#endif good_area: is_write = dsisr & MFC_DSISR_ACCESS_PUT; if (is_write) { @@ -75,6 +74,7 @@ good_area: goto bad_area; } ret = 0; + switch (handle_mm_fault(mm, vma, ea, is_write)) { case VM_FAULT_MINOR: current->min_flt++; @@ -135,12 +135,12 @@ static void spufs_handle_dma_error(struct spu_context *ctx, force_sig_info(info.si_signo, &info, current); } } -EXPORT_SYMBOL_GPL(spufs_dma_callback); void spufs_dma_callback(struct spu *spu, int type) { spufs_handle_dma_error(spu->ctx, spu->dar, type); } +EXPORT_SYMBOL_GPL(spufs_dma_callback); /* * bottom half handler for page faults, we can't do this from @@ -155,6 +155,7 @@ int spufs_handle_class1(struct spu_context *ctx) { u64 ea, dsisr, access; unsigned long flags; + unsigned flt = 0; int ret; /* @@ -180,9 +181,15 @@ int spufs_handle_class1(struct spu_context *ctx) if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) return 0; + spuctx_switch_state(ctx, SPU_UTIL_IOWAIT); + pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, dsisr, ctx->state); + ctx->stats.hash_flt++; + if (ctx->state == SPU_STATE_RUNNABLE) + ctx->spu->stats.hash_flt++; + /* we must not hold the lock when entering spu_handle_mm_fault */ spu_release(ctx); @@ -194,7 +201,7 @@ int spufs_handle_class1(struct spu_context *ctx) /* hashing failed, so try the actual fault handler */ if (ret) - ret = spu_handle_mm_fault(current->mm, ea, dsisr); + ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt); spu_acquire(ctx); /* @@ -203,11 +210,23 @@ int spufs_handle_class1(struct spu_context *ctx) * In case of unhandled error report the problem to user space. */ if (!ret) { + if (flt & VM_FAULT_MAJOR) + ctx->stats.maj_flt++; + else + ctx->stats.min_flt++; + if (ctx->state == SPU_STATE_RUNNABLE) { + if (flt & VM_FAULT_MAJOR) + ctx->spu->stats.maj_flt++; + else + ctx->spu->stats.min_flt++; + } + if (ctx->spu) ctx->ops->restart_dma(ctx); } else spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); return ret; } EXPORT_SYMBOL_GPL(spufs_handle_class1); diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 5813e56..6eb9c94 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -28,6 +28,7 @@ #include <linux/pagemap.h> #include <linux/poll.h> #include <linux/ptrace.h> +#include <linux/seq_file.h> #include <asm/io.h> #include <asm/semaphore.h> @@ -39,18 +40,18 @@ #define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000) + static int spufs_mem_open(struct inode *inode, struct file *file) { struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); file->private_data = ctx; if (!i->i_openers++) ctx->local_store = inode->i_mapping; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return 0; } @@ -60,11 +61,10 @@ spufs_mem_release(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); if (!--i->i_openers) ctx->local_store = NULL; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return 0; } @@ -127,8 +127,8 @@ static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma, if (offset >= LS_SIZE) return NOPFN_SIGBUS; - pr_debug("spufs_mem_mmap_nopfn address=0x%lx -> 0x%lx, offset=0x%lx\n", - addr0, address, offset); + pr_debug("spufs_mem_mmap_nopfn address=0x%lx, offset=0x%lx\n", + address, offset); spu_acquire(ctx); @@ -166,7 +166,7 @@ static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static struct file_operations spufs_mem_fops = { +static const struct file_operations spufs_mem_fops = { .open = spufs_mem_open, .release = spufs_mem_release, .read = spufs_mem_read, @@ -258,12 +258,11 @@ static int spufs_cntl_open(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); file->private_data = ctx; if (!i->i_openers++) ctx->cntl = inode->i_mapping; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return simple_attr_open(inode, file, spufs_cntl_get, spufs_cntl_set, "0x%08lx"); } @@ -276,15 +275,14 @@ spufs_cntl_release(struct inode *inode, struct file *file) simple_attr_close(inode, file); - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); if (!--i->i_openers) ctx->cntl = NULL; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return 0; } -static struct file_operations spufs_cntl_fops = { +static const struct file_operations spufs_cntl_fops = { .open = spufs_cntl_open, .release = spufs_cntl_release, .read = simple_attr_read, @@ -318,7 +316,7 @@ spufs_regs_read(struct file *file, char __user *buffer, spu_acquire_saved(ctx); ret = __spufs_regs_read(ctx, buffer, size, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -340,11 +338,11 @@ spufs_regs_write(struct file *file, const char __user *buffer, ret = copy_from_user(lscsa->gprs + *pos - size, buffer, size) ? -EFAULT : size; - spu_release(ctx); + spu_release_saved(ctx); return ret; } -static struct file_operations spufs_regs_fops = { +static const struct file_operations spufs_regs_fops = { .open = spufs_regs_open, .read = spufs_regs_read, .write = spufs_regs_write, @@ -369,7 +367,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer, spu_acquire_saved(ctx); ret = __spufs_fpcr_read(ctx, buffer, size, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -391,11 +389,11 @@ spufs_fpcr_write(struct file *file, const char __user * buffer, ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, buffer, size) ? -EFAULT : size; - spu_release(ctx); + spu_release_saved(ctx); return ret; } -static struct file_operations spufs_fpcr_fops = { +static const struct file_operations spufs_fpcr_fops = { .open = spufs_regs_open, .read = spufs_fpcr_read, .write = spufs_fpcr_write, @@ -461,7 +459,7 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf, return count; } -static struct file_operations spufs_mbox_fops = { +static const struct file_operations spufs_mbox_fops = { .open = spufs_pipe_open, .read = spufs_mbox_read, }; @@ -487,7 +485,7 @@ static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, return 4; } -static struct file_operations spufs_mbox_stat_fops = { +static const struct file_operations spufs_mbox_stat_fops = { .open = spufs_pipe_open, .read = spufs_mbox_stat_read, }; @@ -594,7 +592,7 @@ static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait) return mask; } -static struct file_operations spufs_ibox_fops = { +static const struct file_operations spufs_ibox_fops = { .open = spufs_pipe_open, .read = spufs_ibox_read, .poll = spufs_ibox_poll, @@ -620,7 +618,7 @@ static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, return 4; } -static struct file_operations spufs_ibox_stat_fops = { +static const struct file_operations spufs_ibox_stat_fops = { .open = spufs_pipe_open, .read = spufs_ibox_stat_read, }; @@ -727,7 +725,7 @@ static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait) return mask; } -static struct file_operations spufs_wbox_fops = { +static const struct file_operations spufs_wbox_fops = { .open = spufs_pipe_open, .write = spufs_wbox_write, .poll = spufs_wbox_poll, @@ -753,7 +751,7 @@ static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, return 4; } -static struct file_operations spufs_wbox_stat_fops = { +static const struct file_operations spufs_wbox_stat_fops = { .open = spufs_pipe_open, .read = spufs_wbox_stat_read, }; @@ -763,12 +761,11 @@ static int spufs_signal1_open(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); file->private_data = ctx; if (!i->i_openers++) ctx->signal1 = inode->i_mapping; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return nonseekable_open(inode, file); } @@ -778,11 +775,10 @@ spufs_signal1_release(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); if (!--i->i_openers) ctx->signal1 = NULL; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return 0; } @@ -818,7 +814,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf, spu_acquire_saved(ctx); ret = __spufs_signal1_read(ctx, buf, len, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -876,7 +872,7 @@ static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static struct file_operations spufs_signal1_fops = { +static const struct file_operations spufs_signal1_fops = { .open = spufs_signal1_open, .release = spufs_signal1_release, .read = spufs_signal1_read, @@ -884,17 +880,23 @@ static struct file_operations spufs_signal1_fops = { .mmap = spufs_signal1_mmap, }; +static const struct file_operations spufs_signal1_nosched_fops = { + .open = spufs_signal1_open, + .release = spufs_signal1_release, + .write = spufs_signal1_write, + .mmap = spufs_signal1_mmap, +}; + static int spufs_signal2_open(struct inode *inode, struct file *file) { struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); file->private_data = ctx; if (!i->i_openers++) ctx->signal2 = inode->i_mapping; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return nonseekable_open(inode, file); } @@ -904,11 +906,10 @@ spufs_signal2_release(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); if (!--i->i_openers) ctx->signal2 = NULL; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return 0; } @@ -944,7 +945,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf, spu_acquire_saved(ctx); ret = __spufs_signal2_read(ctx, buf, len, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1006,7 +1007,7 @@ static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma) #define spufs_signal2_mmap NULL #endif /* !SPUFS_MMAP_4K */ -static struct file_operations spufs_signal2_fops = { +static const struct file_operations spufs_signal2_fops = { .open = spufs_signal2_open, .release = spufs_signal2_release, .read = spufs_signal2_read, @@ -1014,6 +1015,43 @@ static struct file_operations spufs_signal2_fops = { .mmap = spufs_signal2_mmap, }; +static const struct file_operations spufs_signal2_nosched_fops = { + .open = spufs_signal2_open, + .release = spufs_signal2_release, + .write = spufs_signal2_write, + .mmap = spufs_signal2_mmap, +}; + +/* + * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the + * work of acquiring (or not) the SPU context before calling through + * to the actual get routine. The set routine is called directly. + */ +#define SPU_ATTR_NOACQUIRE 0 +#define SPU_ATTR_ACQUIRE 1 +#define SPU_ATTR_ACQUIRE_SAVED 2 + +#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire) \ +static u64 __##__get(void *data) \ +{ \ + struct spu_context *ctx = data; \ + u64 ret; \ + \ + if (__acquire == SPU_ATTR_ACQUIRE) { \ + spu_acquire(ctx); \ + ret = __get(ctx); \ + spu_release(ctx); \ + } else if (__acquire == SPU_ATTR_ACQUIRE_SAVED) { \ + spu_acquire_saved(ctx); \ + ret = __get(ctx); \ + spu_release_saved(ctx); \ + } else \ + ret = __get(ctx); \ + \ + return ret; \ +} \ +DEFINE_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt); + static void spufs_signal1_type_set(void *data, u64 val) { struct spu_context *ctx = data; @@ -1023,25 +1061,13 @@ static void spufs_signal1_type_set(void *data, u64 val) spu_release(ctx); } -static u64 __spufs_signal1_type_get(void *data) +static u64 spufs_signal1_type_get(struct spu_context *ctx) { - struct spu_context *ctx = data; return ctx->ops->signal1_type_get(ctx); } +DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get, + spufs_signal1_type_set, "%llu", SPU_ATTR_ACQUIRE); -static u64 spufs_signal1_type_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - - spu_acquire(ctx); - ret = __spufs_signal1_type_get(data); - spu_release(ctx); - - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get, - spufs_signal1_type_set, "%llu"); static void spufs_signal2_type_set(void *data, u64 val) { @@ -1052,25 +1078,12 @@ static void spufs_signal2_type_set(void *data, u64 val) spu_release(ctx); } -static u64 __spufs_signal2_type_get(void *data) +static u64 spufs_signal2_type_get(struct spu_context *ctx) { - struct spu_context *ctx = data; return ctx->ops->signal2_type_get(ctx); } - -static u64 spufs_signal2_type_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - - spu_acquire(ctx); - ret = __spufs_signal2_type_get(data); - spu_release(ctx); - - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, - spufs_signal2_type_set, "%llu"); +DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, + spufs_signal2_type_set, "%llu", SPU_ATTR_ACQUIRE); #if SPUFS_MMAP_4K static unsigned long spufs_mss_mmap_nopfn(struct vm_area_struct *vma, @@ -1109,11 +1122,10 @@ static int spufs_mss_open(struct inode *inode, struct file *file) file->private_data = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); if (!i->i_openers++) ctx->mss = inode->i_mapping; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return nonseekable_open(inode, file); } @@ -1123,15 +1135,14 @@ spufs_mss_release(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); if (!--i->i_openers) ctx->mss = NULL; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return 0; } -static struct file_operations spufs_mss_fops = { +static const struct file_operations spufs_mss_fops = { .open = spufs_mss_open, .release = spufs_mss_release, .mmap = spufs_mss_mmap, @@ -1168,12 +1179,11 @@ static int spufs_psmap_open(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); file->private_data = i->i_ctx; if (!i->i_openers++) ctx->psmap = inode->i_mapping; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return nonseekable_open(inode, file); } @@ -1183,15 +1193,14 @@ spufs_psmap_release(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); if (!--i->i_openers) ctx->psmap = NULL; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return 0; } -static struct file_operations spufs_psmap_fops = { +static const struct file_operations spufs_psmap_fops = { .open = spufs_psmap_open, .release = spufs_psmap_release, .mmap = spufs_psmap_mmap, @@ -1240,12 +1249,11 @@ static int spufs_mfc_open(struct inode *inode, struct file *file) if (atomic_read(&inode->i_count) != 1) return -EBUSY; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); file->private_data = ctx; if (!i->i_openers++) ctx->mfc = inode->i_mapping; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return nonseekable_open(inode, file); } @@ -1255,11 +1263,10 @@ spufs_mfc_release(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - spin_lock(&ctx->mapping_lock); + mutex_lock(&ctx->mapping_lock); if (!--i->i_openers) ctx->mfc = NULL; - spin_unlock(&ctx->mapping_lock); - smp_wmb(); + mutex_unlock(&ctx->mapping_lock); return 0; } @@ -1457,14 +1464,15 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer, if (status) ret = status; } - spu_release(ctx); if (ret) - goto out; + goto out_unlock; ctx->tagwait |= 1 << cmd.tag; ret = size; +out_unlock: + spu_release(ctx); out: return ret; } @@ -1475,14 +1483,14 @@ static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait) u32 free_elements, tagstatus; unsigned int mask; + poll_wait(file, &ctx->mfc_wq, wait); + spu_acquire(ctx); ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2); free_elements = ctx->ops->get_mfc_free_elements(ctx); tagstatus = ctx->ops->read_mfc_tagstatus(ctx); spu_release(ctx); - poll_wait(file, &ctx->mfc_wq, wait); - mask = 0; if (free_elements & 0xffff) mask |= POLLOUT | POLLWRNORM; @@ -1531,7 +1539,7 @@ static int spufs_mfc_fasync(int fd, struct file *file, int on) return fasync_helper(fd, file, on, &ctx->mfc_fasync); } -static struct file_operations spufs_mfc_fops = { +static const struct file_operations spufs_mfc_fops = { .open = spufs_mfc_open, .release = spufs_mfc_release, .read = spufs_mfc_read, @@ -1551,17 +1559,12 @@ static void spufs_npc_set(void *data, u64 val) spu_release(ctx); } -static u64 spufs_npc_get(void *data) +static u64 spufs_npc_get(struct spu_context *ctx) { - struct spu_context *ctx = data; - u64 ret; - spu_acquire(ctx); - ret = ctx->ops->npc_read(ctx); - spu_release(ctx); - return ret; + return ctx->ops->npc_read(ctx); } -DEFINE_SIMPLE_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, - "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, + "0x%llx\n", SPU_ATTR_ACQUIRE); static void spufs_decr_set(void *data, u64 val) { @@ -1569,55 +1572,38 @@ static void spufs_decr_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->decr.slot[0] = (u32) val; - spu_release(ctx); + spu_release_saved(ctx); } -static u64 __spufs_decr_get(void *data) +static u64 spufs_decr_get(struct spu_context *ctx) { - struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; return lscsa->decr.slot[0]; } - -static u64 spufs_decr_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - spu_acquire_saved(ctx); - ret = __spufs_decr_get(data); - spu_release(ctx); - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, - "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, + "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED); static void spufs_decr_status_set(void *data, u64 val) { struct spu_context *ctx = data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); - lscsa->decr_status.slot[0] = (u32) val; - spu_release(ctx); -} - -static u64 __spufs_decr_status_get(void *data) -{ - struct spu_context *ctx = data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; - return lscsa->decr_status.slot[0]; + if (val) + ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; + else + ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; + spu_release_saved(ctx); } -static u64 spufs_decr_status_get(void *data) +static u64 spufs_decr_status_get(struct spu_context *ctx) { - struct spu_context *ctx = data; - u64 ret; - spu_acquire_saved(ctx); - ret = __spufs_decr_status_get(data); - spu_release(ctx); - return ret; + if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) + return SPU_DECR_STATUS_RUNNING; + else + return 0; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, - spufs_decr_status_set, "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, + spufs_decr_status_set, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); static void spufs_event_mask_set(void *data, u64 val) { @@ -1625,31 +1611,21 @@ static void spufs_event_mask_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->event_mask.slot[0] = (u32) val; - spu_release(ctx); + spu_release_saved(ctx); } -static u64 __spufs_event_mask_get(void *data) +static u64 spufs_event_mask_get(struct spu_context *ctx) { - struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; return lscsa->event_mask.slot[0]; } -static u64 spufs_event_mask_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - spu_acquire_saved(ctx); - ret = __spufs_event_mask_get(data); - spu_release(ctx); - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, - spufs_event_mask_set, "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, + spufs_event_mask_set, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); -static u64 __spufs_event_status_get(void *data) +static u64 spufs_event_status_get(struct spu_context *ctx) { - struct spu_context *ctx = data; struct spu_state *state = &ctx->csa; u64 stat; stat = state->spu_chnlcnt_RW[0]; @@ -1657,19 +1633,8 @@ static u64 __spufs_event_status_get(void *data) return state->spu_chnldata_RW[0]; return 0; } - -static u64 spufs_event_status_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret = 0; - - spu_acquire_saved(ctx); - ret = __spufs_event_status_get(data); - spu_release(ctx); - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, - NULL, "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, + NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED) static void spufs_srr0_set(void *data, u64 val) { @@ -1677,48 +1642,35 @@ static void spufs_srr0_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->srr0.slot[0] = (u32) val; - spu_release(ctx); + spu_release_saved(ctx); } -static u64 spufs_srr0_get(void *data) +static u64 spufs_srr0_get(struct spu_context *ctx) { - struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; - u64 ret; - spu_acquire_saved(ctx); - ret = lscsa->srr0.slot[0]; - spu_release(ctx); - return ret; + return lscsa->srr0.slot[0]; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, - "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, + "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED) -static u64 spufs_id_get(void *data) +static u64 spufs_id_get(struct spu_context *ctx) { - struct spu_context *ctx = data; u64 num; - spu_acquire(ctx); if (ctx->state == SPU_STATE_RUNNABLE) num = ctx->spu->number; else num = (unsigned int)-1; - spu_release(ctx); return num; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n", + SPU_ATTR_ACQUIRE) -static u64 __spufs_object_id_get(void *data) -{ - struct spu_context *ctx = data; - return ctx->object_id; -} - -static u64 spufs_object_id_get(void *data) +static u64 spufs_object_id_get(struct spu_context *ctx) { /* FIXME: Should there really be no locking here? */ - return __spufs_object_id_get(data); + return ctx->object_id; } static void spufs_object_id_set(void *data, u64 id) @@ -1727,27 +1679,15 @@ static void spufs_object_id_set(void *data, u64 id) ctx->object_id = id; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get, - spufs_object_id_set, "0x%llx\n"); +DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get, + spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE); -static u64 __spufs_lslr_get(void *data) +static u64 spufs_lslr_get(struct spu_context *ctx) { - struct spu_context *ctx = data; return ctx->csa.priv2.spu_lslr_RW; } - -static u64 spufs_lslr_get(void *data) -{ - struct spu_context *ctx = data; - u64 ret; - - spu_acquire_saved(ctx); - ret = __spufs_lslr_get(data); - spu_release(ctx); - - return ret; -} -DEFINE_SIMPLE_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n") +DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); static int spufs_info_open(struct inode *inode, struct file *file) { @@ -1757,6 +1697,29 @@ static int spufs_info_open(struct inode *inode, struct file *file) return 0; } +static int spufs_caps_show(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + + if (!(ctx->flags & SPU_CREATE_NOSCHED)) + seq_puts(s, "sched\n"); + if (!(ctx->flags & SPU_CREATE_ISOLATE)) + seq_puts(s, "step\n"); + return 0; +} + +static int spufs_caps_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_caps_fops = { + .open = spufs_caps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { @@ -1784,12 +1747,12 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_mbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } -static struct file_operations spufs_mbox_info_fops = { +static const struct file_operations spufs_mbox_info_fops = { .open = spufs_info_open, .read = spufs_mbox_info_read, .llseek = generic_file_llseek, @@ -1822,12 +1785,12 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_ibox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } -static struct file_operations spufs_ibox_info_fops = { +static const struct file_operations spufs_ibox_info_fops = { .open = spufs_info_open, .read = spufs_ibox_info_read, .llseek = generic_file_llseek, @@ -1863,12 +1826,12 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_wbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } -static struct file_operations spufs_wbox_info_fops = { +static const struct file_operations spufs_wbox_info_fops = { .open = spufs_info_open, .read = spufs_wbox_info_read, .llseek = generic_file_llseek, @@ -1913,12 +1876,12 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_dma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } -static struct file_operations spufs_dma_info_fops = { +static const struct file_operations spufs_dma_info_fops = { .open = spufs_info_open, .read = spufs_dma_info_read, }; @@ -1964,17 +1927,127 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_proxydma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } -static struct file_operations spufs_proxydma_info_fops = { +static const struct file_operations spufs_proxydma_info_fops = { .open = spufs_info_open, .read = spufs_proxydma_info_read, }; +static int spufs_show_tid(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + + seq_printf(s, "%d\n", ctx->tid); + return 0; +} + +static int spufs_tid_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_tid_fops = { + .open = spufs_tid_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const char *ctx_state_names[] = { + "user", "system", "iowait", "loaded" +}; + +static unsigned long long spufs_acct_time(struct spu_context *ctx, + enum spu_utilization_state state) +{ + struct timespec ts; + unsigned long long time = ctx->stats.times[state]; + + /* + * In general, utilization statistics are updated by the controlling + * thread as the spu context moves through various well defined + * state transitions, but if the context is lazily loaded its + * utilization statistics are not updated as the controlling thread + * is not tightly coupled with the execution of the spu context. We + * calculate and apply the time delta from the last recorded state + * of the spu context. + */ + if (ctx->spu && ctx->stats.util_state == state) { + ktime_get_ts(&ts); + time += timespec_to_ns(&ts) - ctx->stats.tstamp; + } + + return time / NSEC_PER_MSEC; +} + +static unsigned long long spufs_slb_flts(struct spu_context *ctx) +{ + unsigned long long slb_flts = ctx->stats.slb_flt; + + if (ctx->state == SPU_STATE_RUNNABLE) { + slb_flts += (ctx->spu->stats.slb_flt - + ctx->stats.slb_flt_base); + } + + return slb_flts; +} + +static unsigned long long spufs_class2_intrs(struct spu_context *ctx) +{ + unsigned long long class2_intrs = ctx->stats.class2_intr; + + if (ctx->state == SPU_STATE_RUNNABLE) { + class2_intrs += (ctx->spu->stats.class2_intr - + ctx->stats.class2_intr_base); + } + + return class2_intrs; +} + + +static int spufs_show_stat(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + + spu_acquire(ctx); + seq_printf(s, "%s %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + ctx_state_names[ctx->stats.util_state], + spufs_acct_time(ctx, SPU_UTIL_USER), + spufs_acct_time(ctx, SPU_UTIL_SYSTEM), + spufs_acct_time(ctx, SPU_UTIL_IOWAIT), + spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED), + ctx->stats.vol_ctx_switch, + ctx->stats.invol_ctx_switch, + spufs_slb_flts(ctx), + ctx->stats.hash_flt, + ctx->stats.min_flt, + ctx->stats.maj_flt, + spufs_class2_intrs(ctx), + ctx->stats.libassist); + spu_release(ctx); + return 0; +} + +static int spufs_stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_stat_fops = { + .open = spufs_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + struct tree_descr spufs_dir_contents[] = { + { "capabilities", &spufs_caps_fops, 0444, }, { "mem", &spufs_mem_fops, 0666, }, { "regs", &spufs_regs_fops, 0666, }, { "mbox", &spufs_mbox_fops, 0444, }, @@ -2006,10 +2079,13 @@ struct tree_descr spufs_dir_contents[] = { { "wbox_info", &spufs_wbox_info_fops, 0444, }, { "dma_info", &spufs_dma_info_fops, 0444, }, { "proxydma_info", &spufs_proxydma_info_fops, 0444, }, + { "tid", &spufs_tid_fops, 0444, }, + { "stat", &spufs_stat_fops, 0444, }, {}, }; struct tree_descr spufs_dir_nosched_contents[] = { + { "capabilities", &spufs_caps_fops, 0444, }, { "mem", &spufs_mem_fops, 0666, }, { "mbox", &spufs_mbox_fops, 0444, }, { "ibox", &spufs_ibox_fops, 0444, }, @@ -2017,8 +2093,8 @@ struct tree_descr spufs_dir_nosched_contents[] = { { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, - { "signal1", &spufs_signal1_fops, 0666, }, - { "signal2", &spufs_signal2_fops, 0666, }, + { "signal1", &spufs_signal1_nosched_fops, 0222, }, + { "signal2", &spufs_signal2_nosched_fops, 0222, }, { "signal1_type", &spufs_signal1_type, 0666, }, { "signal2_type", &spufs_signal2_type, 0666, }, { "mss", &spufs_mss_fops, 0666, }, @@ -2028,29 +2104,31 @@ struct tree_descr spufs_dir_nosched_contents[] = { { "psmap", &spufs_psmap_fops, 0666, }, { "phys-id", &spufs_id_ops, 0666, }, { "object-id", &spufs_object_id_ops, 0666, }, + { "tid", &spufs_tid_fops, 0444, }, + { "stat", &spufs_stat_fops, 0444, }, {}, }; struct spufs_coredump_reader spufs_coredump_read[] = { - { "regs", __spufs_regs_read, NULL, 128 * 16 }, - { "fpcr", __spufs_fpcr_read, NULL, 16 }, - { "lslr", NULL, __spufs_lslr_get, 11 }, - { "decr", NULL, __spufs_decr_get, 11 }, - { "decr_status", NULL, __spufs_decr_status_get, 11 }, - { "mem", __spufs_mem_read, NULL, 256 * 1024, }, - { "signal1", __spufs_signal1_read, NULL, 4 }, - { "signal1_type", NULL, __spufs_signal1_type_get, 2 }, - { "signal2", __spufs_signal2_read, NULL, 4 }, - { "signal2_type", NULL, __spufs_signal2_type_get, 2 }, - { "event_mask", NULL, __spufs_event_mask_get, 8 }, - { "event_status", NULL, __spufs_event_status_get, 8 }, - { "mbox_info", __spufs_mbox_info_read, NULL, 4 }, - { "ibox_info", __spufs_ibox_info_read, NULL, 4 }, - { "wbox_info", __spufs_wbox_info_read, NULL, 16 }, - { "dma_info", __spufs_dma_info_read, NULL, 69 * 8 }, - { "proxydma_info", __spufs_proxydma_info_read, NULL, 35 * 8 }, - { "object-id", NULL, __spufs_object_id_get, 19 }, - { }, + { "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])}, + { "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) }, + { "lslr", NULL, spufs_lslr_get, 19 }, + { "decr", NULL, spufs_decr_get, 19 }, + { "decr_status", NULL, spufs_decr_status_get, 19 }, + { "mem", __spufs_mem_read, NULL, LS_SIZE, }, + { "signal1", __spufs_signal1_read, NULL, sizeof(u32) }, + { "signal1_type", NULL, spufs_signal1_type_get, 19 }, + { "signal2", __spufs_signal2_read, NULL, sizeof(u32) }, + { "signal2_type", NULL, spufs_signal2_type_get, 19 }, + { "event_mask", NULL, spufs_event_mask_get, 19 }, + { "event_status", NULL, spufs_event_status_get, 19 }, + { "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) }, + { "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) }, + { "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)}, + { "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)}, + { "proxydma_info", __spufs_proxydma_info_read, + NULL, sizeof(struct spu_proxydma_info)}, + { "object-id", NULL, spufs_object_id_get, 19 }, + { "npc", NULL, spufs_npc_get, 19 }, + { NULL }, }; -int spufs_coredump_num_notes = ARRAY_SIZE(spufs_coredump_read) - 1; - diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 9d3784c..7555057 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -36,12 +36,14 @@ #include <asm/prom.h> #include <asm/semaphore.h> #include <asm/spu.h> +#include <asm/spu_priv1.h> #include <asm/uaccess.h> #include "spufs.h" static struct kmem_cache *spufs_inode_cache; char *isolated_loader; +static int isolated_loader_size; static struct inode * spufs_alloc_inode(struct super_block *sb) @@ -70,10 +72,7 @@ spufs_init_once(void *p, struct kmem_cache * cachep, unsigned long flags) { struct spufs_inode_info *ei = p; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&ei->vfs_inode); - } + inode_init_once(&ei->vfs_inode); } static struct inode * @@ -179,7 +178,7 @@ static int spufs_rmdir(struct inode *parent, struct dentry *dir) static int spufs_fill_dir(struct dentry *dir, struct tree_descr *files, int mode, struct spu_context *ctx) { - struct dentry *dentry; + struct dentry *dentry, *tmp; int ret; while (files->name && files->name[0]) { @@ -195,24 +194,37 @@ static int spufs_fill_dir(struct dentry *dir, struct tree_descr *files, } return 0; out: - spufs_prune_dir(dir); + /* + * remove all children from dir. dir->inode is not set so don't + * just simply use spufs_prune_dir() and panic afterwards :) + * dput() looks like it will do the right thing: + * - dec parent's ref counter + * - remove child from parent's child list + * - free child's inode if possible + * - free child + */ + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { + dput(dentry); + } + + shrink_dcache_parent(dir); return ret; } static int spufs_dir_close(struct inode *inode, struct file *file) { struct spu_context *ctx; - struct inode *dir; - struct dentry *dentry; + struct inode *parent; + struct dentry *dir; int ret; - dentry = file->f_dentry; - dir = dentry->d_parent->d_inode; - ctx = SPUFS_I(dentry->d_inode)->i_ctx; + dir = file->f_dentry; + parent = dir->d_parent->d_inode; + ctx = SPUFS_I(dir->d_inode)->i_ctx; - mutex_lock(&dir->i_mutex); - ret = spufs_rmdir(dir, dentry); - mutex_unlock(&dir->i_mutex); + mutex_lock(&parent->i_mutex); + ret = spufs_rmdir(parent, dir); + mutex_unlock(&parent->i_mutex); WARN_ON(ret); /* We have to give up the mm_struct */ @@ -221,11 +233,7 @@ static int spufs_dir_close(struct inode *inode, struct file *file) return dcache_dir_close(inode, file); } -/* const */struct inode_operations spufs_dir_inode_operations = { - .lookup = simple_lookup, -}; - -struct file_operations spufs_context_fops = { +const struct file_operations spufs_context_fops = { .open = dcache_dir_open, .release = spufs_dir_close, .llseek = dcache_dir_lseek, @@ -258,7 +266,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, goto out_iput; ctx->flags = flags; - inode->i_op = &spufs_dir_inode_operations; + inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; if (flags & SPU_CREATE_NOSCHED) ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents, @@ -276,6 +284,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, goto out; out_free_ctx: + spu_forget(ctx); put_spu_context(ctx); out_iput: iput(inode); @@ -316,8 +325,8 @@ spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, int count, node; int aff_supp; - aff_supp = !list_empty(&(list_entry(be_spu_info[0].spus.next, - struct spu, be_list))->aff_list); + aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next, + struct spu, cbe_list))->aff_list); if (!aff_supp) return ERR_PTR(-EINVAL); @@ -357,8 +366,8 @@ spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, count++; for (node = 0; node < MAX_NUMNODES; node++) { - if ((be_spu_info[node].n_spus - atomic_read( - &be_spu_info[node].reserved_spus)) >= count) + if ((cbe_spu_info[node].n_spus - atomic_read( + &cbe_spu_info[node].reserved_spus)) >= count) break; } @@ -493,7 +502,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, int mode) if (!gang) goto out_iput; - inode->i_op = &spufs_dir_inode_operations; + inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; d_instantiate(dentry, inode); @@ -611,13 +620,14 @@ out: /* File system initialization */ enum { - Opt_uid, Opt_gid, Opt_err, + Opt_uid, Opt_gid, Opt_mode, Opt_err, }; static match_table_t spufs_tokens = { - { Opt_uid, "uid=%d" }, - { Opt_gid, "gid=%d" }, - { Opt_err, NULL }, + { Opt_uid, "uid=%d" }, + { Opt_gid, "gid=%d" }, + { Opt_mode, "mode=%o" }, + { Opt_err, NULL }, }; static int @@ -644,6 +654,11 @@ spufs_parse_options(char *options, struct inode *root) return 0; root->i_gid = option; break; + case Opt_mode: + if (match_octal(&args[0], &option)) + return 0; + root->i_mode = option | S_IFDIR; + break; default: return 0; } @@ -653,7 +668,8 @@ spufs_parse_options(char *options, struct inode *root) static void spufs_exit_isolated_loader(void) { - kfree(isolated_loader); + free_pages((unsigned long) isolated_loader, + get_order(isolated_loader_size)); } static void @@ -671,11 +687,12 @@ spufs_init_isolated_loader(void) if (!loader) return; - /* kmalloc should align on a 16 byte boundary..* */ - isolated_loader = kmalloc(size, GFP_KERNEL); + /* the loader must be align on a 16 byte boundary */ + isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size)); if (!isolated_loader) return; + isolated_loader_size = size; memcpy(isolated_loader, loader, size); printk(KERN_INFO "spufs: SPU isolation mode enabled\n"); } @@ -686,12 +703,16 @@ spufs_create_root(struct super_block *sb, void *data) struct inode *inode; int ret; + ret = -ENODEV; + if (!spu_management_ops) + goto out; + ret = -ENOMEM; inode = spufs_new_inode(sb, S_IFDIR | 0775); if (!inode) goto out; - inode->i_op = &spufs_dir_inode_operations; + inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; SPUFS_I(inode)->i_ctx = NULL; @@ -749,6 +770,10 @@ static int __init spufs_init(void) { int ret; + ret = -ENODEV; + if (!spu_management_ops) + goto out; + ret = -ENOMEM; spufs_inode_cache = kmem_cache_create("spufs_inode_cache", sizeof(struct spufs_inode_info), 0, @@ -756,25 +781,24 @@ static int __init spufs_init(void) if (!spufs_inode_cache) goto out; - if (spu_sched_init() != 0) { - kmem_cache_destroy(spufs_inode_cache); - goto out; - } - ret = register_filesystem(&spufs_type); + ret = spu_sched_init(); if (ret) goto out_cache; - ret = register_spu_syscalls(&spufs_calls); + ret = register_filesystem(&spufs_type); if (ret) - goto out_fs; - ret = register_arch_coredump_calls(&spufs_coredump_calls); + goto out_sched; + ret = register_spu_syscalls(&spufs_calls); if (ret) goto out_fs; spufs_init_isolated_loader(); return 0; + out_fs: unregister_filesystem(&spufs_type); +out_sched: + spu_sched_exit(); out_cache: kmem_cache_destroy(spufs_inode_cache); out: @@ -786,7 +810,6 @@ static void __exit spufs_exit(void) { spu_sched_exit(); spufs_exit_isolated_loader(); - unregister_arch_coredump_calls(&spufs_coredump_calls); unregister_spu_syscalls(&spufs_calls); unregister_filesystem(&spufs_type); kmem_cache_destroy(spufs_inode_cache); diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c index 3310c61..55ac3a6 100644 --- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c +++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c @@ -28,7 +28,7 @@ #include <asm/spu_csa.h> #include <asm/mmu.h> -int spu_alloc_lscsa(struct spu_state *csa) +static int spu_alloc_lscsa_std(struct spu_state *csa) { struct spu_lscsa *lscsa; unsigned char *p; @@ -46,7 +46,7 @@ int spu_alloc_lscsa(struct spu_state *csa) return 0; } -void spu_free_lscsa(struct spu_state *csa) +static void spu_free_lscsa_std(struct spu_state *csa) { /* Clear reserved bit before vfree. */ unsigned char *p; @@ -60,3 +60,12 @@ void spu_free_lscsa(struct spu_state *csa) vfree(csa->lscsa); } +int spu_alloc_lscsa(struct spu_state *csa) +{ + return spu_alloc_lscsa_std(csa); +} + +void spu_free_lscsa(struct spu_state *csa) +{ + spu_free_lscsa_std(csa); +} diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 4fda5b9..d7c7113 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -18,7 +18,7 @@ void spufs_stop_callback(struct spu *spu) wake_up_all(&ctx->stop_wq); } -static inline int spu_stopped(struct spu_context *ctx, u32 * stat) +static inline int spu_stopped(struct spu_context *ctx, u32 *stat) { struct spu *spu; u64 pte_fault; @@ -31,7 +31,8 @@ static inline int spu_stopped(struct spu_context *ctx, u32 * stat) return 1; pte_fault = spu->dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); - return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0; + return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending_value) ? + 1 : 0; } static int spu_setup_isolated(struct spu_context *ctx) @@ -44,13 +45,18 @@ static int spu_setup_isolated(struct spu_context *ctx) const u32 status_loading = SPU_STATUS_RUNNING | SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS; + ret = -ENODEV; if (!isolated_loader) - return -ENODEV; - - ret = spu_acquire_exclusive(ctx); - if (ret) goto out; + /* + * We need to exclude userspace access to the context. + * + * To protect against memory access we invalidate all ptes + * and make sure the pagefault handlers block on the mutex. + */ + spu_unmap_mappings(ctx); + mfc_cntl = &ctx->spu->priv2->mfc_control_RW; /* purge the MFC DMA queue to ensure no spurious accesses before we @@ -63,7 +69,7 @@ static int spu_setup_isolated(struct spu_context *ctx) printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n", __FUNCTION__); ret = -EIO; - goto out_unlock; + goto out; } cond_resched(); } @@ -100,12 +106,15 @@ static int spu_setup_isolated(struct spu_context *ctx) pr_debug("%s: isolated LOAD failed\n", __FUNCTION__); ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); ret = -EACCES; + goto out_drop_priv; + } - } else if (!(status & SPU_STATUS_ISOLATED_STATE)) { + if (!(status & SPU_STATUS_ISOLATED_STATE)) { /* This isn't allowed by the CBEA, but check anyway */ pr_debug("%s: SPU fell out of isolated mode?\n", __FUNCTION__); ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP); ret = -EINVAL; + goto out_drop_priv; } out_drop_priv: @@ -113,30 +122,21 @@ out_drop_priv: sr1 |= MFC_STATE1_PROBLEM_STATE_MASK; spu_mfc_sr1_set(ctx->spu, sr1); -out_unlock: - spu_release(ctx); out: return ret; } -static inline int spu_run_init(struct spu_context *ctx, u32 * npc) +static int spu_run_init(struct spu_context *ctx, u32 *npc) { - int ret; - unsigned long runcntl = SPU_RUNCNTL_RUNNABLE; - - ret = spu_acquire_runnable(ctx, 0); - if (ret) - return ret; + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); if (ctx->flags & SPU_CREATE_ISOLATE) { + unsigned long runcntl; + if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) { - /* Need to release ctx, because spu_setup_isolated will - * acquire it exclusively. - */ - spu_release(ctx); - ret = spu_setup_isolated(ctx); - if (!ret) - ret = spu_acquire_runnable(ctx, 0); + int ret = spu_setup_isolated(ctx); + if (ret) + return ret; } /* if userspace has set the runcntrl register (eg, to issue an @@ -145,43 +145,55 @@ static inline int spu_run_init(struct spu_context *ctx, u32 * npc) (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); if (runcntl == 0) runcntl = SPU_RUNCNTL_RUNNABLE; + ctx->ops->runcntl_write(ctx, runcntl); } else { - spu_start_tick(ctx); + unsigned long mode = SPU_PRIVCNTL_MODE_NORMAL; ctx->ops->npc_write(ctx, *npc); + if (test_thread_flag(TIF_SINGLESTEP)) + mode = SPU_PRIVCNTL_MODE_SINGLE_STEP; + out_be64(&ctx->spu->priv2->spu_privcntl_RW, mode); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); } - ctx->ops->runcntl_write(ctx, runcntl); - return ret; + spuctx_switch_state(ctx, SPU_UTIL_USER); + + return 0; } -static inline int spu_run_fini(struct spu_context *ctx, u32 * npc, - u32 * status) +static int spu_run_fini(struct spu_context *ctx, u32 *npc, + u32 *status) { int ret = 0; - spu_stop_tick(ctx); *status = ctx->ops->status_read(ctx); *npc = ctx->ops->npc_read(ctx); + + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); spu_release(ctx); if (signal_pending(current)) ret = -ERESTARTSYS; + return ret; } -static inline int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc, +static int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc, u32 *status) { int ret; - if ((ret = spu_run_fini(ctx, npc, status)) != 0) + ret = spu_run_fini(ctx, npc, status); + if (ret) return ret; - if (*status & (SPU_STATUS_STOPPED_BY_STOP | - SPU_STATUS_STOPPED_BY_HALT)) { + + if (*status & (SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_STOPPED_BY_HALT)) return *status; - } - if ((ret = spu_run_init(ctx, npc)) != 0) + + ret = spu_acquire_runnable(ctx, 0); + if (ret) return ret; + + spuctx_switch_state(ctx, SPU_UTIL_USER); return 0; } @@ -193,7 +205,7 @@ static inline int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc, * This means we can only do a very rough approximation of POSIX * signal semantics. */ -int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret, +static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret, unsigned int *npc) { int ret; @@ -229,7 +241,7 @@ int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret, return ret; } -int spu_process_callback(struct spu_context *ctx) +static int spu_process_callback(struct spu_context *ctx) { struct spu_syscall_block s; u32 ls_pointer, npc; @@ -274,42 +286,67 @@ static inline int spu_process_events(struct spu_context *ctx) struct spu *spu = ctx->spu; int ret = 0; - if (spu->class_0_pending) + if (spu->class_0_pending_value) ret = spu_irq_class_0_bottom(spu); if (!ret && signal_pending(current)) ret = -ERESTARTSYS; return ret; } -long spufs_run_spu(struct file *file, struct spu_context *ctx, - u32 *npc, u32 *event) +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) { int ret; - struct spu * spu; + struct spu *spu; u32 status; - if (down_interruptible(&ctx->run_sema)) + if (mutex_lock_interruptible(&ctx->run_mutex)) return -ERESTARTSYS; ctx->ops->master_start(ctx); ctx->event_return = 0; + + spu_acquire(ctx); + if (ctx->state == SPU_STATE_SAVED) { + __spu_update_sched_info(ctx); + spu_set_timeslice(ctx); + + ret = spu_activate(ctx, 0); + if (ret) { + spu_release(ctx); + goto out; + } + } else { + /* + * We have to update the scheduling priority under active_mutex + * to protect against find_victim(). + * + * No need to update the timeslice ASAP, it will get updated + * once the current one has expired. + */ + spu_update_sched_info(ctx); + } + ret = spu_run_init(ctx, npc); - if (ret) + if (ret) { + spu_release(ctx); goto out; + } do { ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); - spu = ctx->spu; if (unlikely(ret)) break; - if (unlikely(test_bit(SPU_SCHED_NOTIFY_ACTIVE, - &ctx->sched_flags))) { - clear_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags); + spu = ctx->spu; + if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags))) { if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { spu_switch_notify(spu, ctx); continue; } } + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx); @@ -323,16 +360,21 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) { ret = spu_reacquire_runnable(ctx, npc, &status); - if (ret) { - spu_stop_tick(ctx); + if (ret) goto out2; - } continue; } ret = spu_process_events(ctx); } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP | - SPU_STATUS_STOPPED_BY_HALT))); + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_SINGLE_STEP))); + + if ((status & SPU_STATUS_STOPPED_BY_STOP) && + (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100) && + (ctx->state == SPU_STATE_RUNNABLE)) + ctx->stats.libassist++; + ctx->ops->master_stop(ctx); ret = spu_run_fini(ctx, npc, &status); @@ -342,10 +384,15 @@ out2: if ((ret == 0) || ((ret == -ERESTARTSYS) && ((status & SPU_STATUS_STOPPED_BY_HALT) || + (status & SPU_STATUS_SINGLE_STEP) || ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT != 0x2104))))) ret = status; + /* Note: we don't need to force_sig SIGTRAP on single-step + * since we have TIF_SINGLESTEP set, thus the kernel will do + * it upon return from the syscall anyawy + */ if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) { force_sig(SIGTRAP, current); @@ -354,6 +401,6 @@ out2: out: *event = ctx->event_return; - up(&ctx->run_sema); + mutex_unlock(&ctx->run_mutex); return ret; } diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index b8d0709..d5e1dc0 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -30,12 +30,14 @@ #include <linux/completion.h> #include <linux/vmalloc.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/numa.h> #include <linux/mutex.h> #include <linux/notifier.h> +#include <linux/kthread.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <asm/io.h> #include <asm/mmu_context.h> @@ -44,123 +46,123 @@ #include <asm/spu_priv1.h> #include "spufs.h" -#define SPU_TIMESLICE (HZ) - struct spu_prio_array { DECLARE_BITMAP(bitmap, MAX_PRIO); struct list_head runq[MAX_PRIO]; spinlock_t runq_lock; - struct list_head active_list[MAX_NUMNODES]; - struct mutex active_mutex[MAX_NUMNODES]; + int nr_waiting; }; +static unsigned long spu_avenrun[3]; static struct spu_prio_array *spu_prio; -static struct workqueue_struct *spu_sched_wq; - -static struct spu_context *grab_runnable_context(int prio); -static void spu_unbind_context(struct spu *spu, struct spu_context *ctx); +static struct task_struct *spusched_task; +static struct timer_list spusched_timer; +/* + * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). + */ +#define NORMAL_PRIO 120 -static inline int node_allowed(int node) -{ - cpumask_t mask; +/* + * Frequency of the spu scheduler tick. By default we do one SPU scheduler + * tick for every 10 CPU scheduler ticks. + */ +#define SPUSCHED_TICK (10) - if (!nr_cpus_node(node)) - return 0; - mask = node_to_cpumask(node); - if (!cpus_intersects(mask, current->cpus_allowed)) - return 0; - return 1; -} +/* + * These are the 'tuning knobs' of the scheduler: + * + * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is + * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs. + */ +#define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1) +#define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK)) -void spu_start_tick(struct spu_context *ctx) -{ - if (ctx->policy == SCHED_RR) { - /* - * Make sure the exiting bit is cleared. - */ - clear_bit(SPU_SCHED_EXITING, &ctx->sched_flags); - mb(); - queue_delayed_work(spu_sched_wq, &ctx->sched_work, SPU_TIMESLICE); - } -} +#define MAX_USER_PRIO (MAX_PRIO - MAX_RT_PRIO) +#define SCALE_PRIO(x, prio) \ + max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE) -void spu_stop_tick(struct spu_context *ctx) +/* + * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values: + * [800ms ... 100ms ... 5ms] + * + * The higher a thread's priority, the bigger timeslices + * it gets during one round of execution. But even the lowest + * priority thread gets MIN_TIMESLICE worth of execution time. + */ +void spu_set_timeslice(struct spu_context *ctx) { - if (ctx->policy == SCHED_RR) { - /* - * While the work can be rearming normally setting this flag - * makes sure it does not rearm itself anymore. - */ - set_bit(SPU_SCHED_EXITING, &ctx->sched_flags); - mb(); - cancel_delayed_work(&ctx->sched_work); - } + if (ctx->prio < NORMAL_PRIO) + ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio); + else + ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio); } -void spu_sched_tick(void *data) +/* + * Update scheduling information from the owning thread. + */ +void __spu_update_sched_info(struct spu_context *ctx) { - struct spu_context *ctx = data; - struct spu *spu; - int preempted = 0; + /* + * 32-Bit assignment are atomic on powerpc, and we don't care about + * memory ordering here because retriving the controlling thread is + * per defintion racy. + */ + ctx->tid = current->pid; /* - * If this context is beeing stopped avoid rescheduling from the - * scheduler tick because we would deadlock on the state_mutex. - * The caller will yield the spu later on anyway. + * We do our own priority calculations, so we normally want + * ->static_prio to start with. Unfortunately thies field + * contains junk for threads with a realtime scheduling + * policy so we have to look at ->prio in this case. */ - if (test_bit(SPU_SCHED_EXITING, &ctx->sched_flags)) - return; + if (rt_prio(current->prio)) + ctx->prio = current->prio; + else + ctx->prio = current->static_prio; + ctx->policy = current->policy; - mutex_lock(&ctx->state_mutex); - spu = ctx->spu; - if (spu) { - struct spu_context *new; + /* + * A lot of places that don't hold list_mutex poke into + * cpus_allowed, including grab_runnable_context which + * already holds the runq_lock. So abuse runq_lock + * to protect this field aswell. + */ + spin_lock(&spu_prio->runq_lock); + ctx->cpus_allowed = current->cpus_allowed; + spin_unlock(&spu_prio->runq_lock); +} - new = grab_runnable_context(ctx->prio + 1); - if (new) { - spu_unbind_context(spu, ctx); - spu_free(spu); - if (new) - wake_up(&new->stop_wq); - preempted = 1; - } - } - mutex_unlock(&ctx->state_mutex); +void spu_update_sched_info(struct spu_context *ctx) +{ + int node = ctx->spu->node; - if (preempted) { - /* - * We need to break out of the wait loop in spu_run manually - * to ensure this context gets put on the runqueue again - * ASAP. - */ - wake_up(&ctx->stop_wq); - } else - spu_start_tick(ctx); + mutex_lock(&cbe_spu_info[node].list_mutex); + __spu_update_sched_info(ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); } -/** - * spu_add_to_active_list - add spu to active list - * @spu: spu to add to the active list - */ -static void spu_add_to_active_list(struct spu *spu) +static int __node_allowed(struct spu_context *ctx, int node) { - mutex_lock(&spu_prio->active_mutex[spu->node]); - list_add_tail(&spu->list, &spu_prio->active_list[spu->node]); - mutex_unlock(&spu_prio->active_mutex[spu->node]); + if (nr_cpus_node(node)) { + cpumask_t mask = node_to_cpumask(node); + + if (cpus_intersects(mask, ctx->cpus_allowed)) + return 1; + } + + return 0; } -/** - * spu_remove_from_active_list - remove spu from active list - * @spu: spu to remove from the active list - */ -static void spu_remove_from_active_list(struct spu *spu) +static int node_allowed(struct spu_context *ctx, int node) { - int node = spu->node; + int rval; - mutex_lock(&spu_prio->active_mutex[node]); - list_del_init(&spu->list); - mutex_unlock(&spu_prio->active_mutex[node]); + spin_lock(&spu_prio->runq_lock); + rval = __node_allowed(ctx, node); + spin_unlock(&spu_prio->runq_lock); + + return rval; } static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); @@ -174,20 +176,27 @@ void spu_switch_notify(struct spu *spu, struct spu_context *ctx) static void notify_spus_active(void) { int node; - /* Wake up the active spu_contexts. When the awakened processes - * see their "notify_active" flag is set, they will call - * spu_switch_notify(); + + /* + * Wake up the active spu_contexts. + * + * When the awakened processes see their "notify_active" flag is set, + * they will call spu_switch_notify(); */ - for (node = 0; node < MAX_NUMNODES; node++) { + for_each_online_node(node) { struct spu *spu; - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry(spu, &spu_prio->active_list[node], list) { - struct spu_context *ctx = spu->ctx; - set_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags); - mb(); - wake_up_all(&ctx->stop_wq); + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state != SPU_FREE) { + struct spu_context *ctx = spu->ctx; + set_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags); + mb(); + wake_up_all(&ctx->stop_wq); + } } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); } } @@ -216,10 +225,14 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, spu->number, spu->node); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + if (ctx->flags & SPU_CREATE_NOSCHED) - atomic_inc(&be_spu_info[spu->node].reserved_spus); - if (!list_empty(&ctx->aff_list)) - atomic_inc(&ctx->gang->aff_sched_count); + atomic_inc(&cbe_spu_info[spu->node].reserved_spus); + + ctx->stats.slb_flt_base = spu->stats.slb_flt; + ctx->stats.class2_intr_base = spu->stats.class2_intr; + spu->ctx = ctx; spu->flags = 0; ctx->spu = spu; @@ -238,8 +251,153 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) spu->timestamp = jiffies; spu_cpu_affinity_set(spu, raw_smp_processor_id()); spu_switch_notify(spu, ctx); - spu_add_to_active_list(spu); ctx->state = SPU_STATE_RUNNABLE; + + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); +} + +/* + * Must be used with the list_mutex held. + */ +static inline int sched_spu(struct spu *spu) +{ + BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); + + return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); +} + +static void aff_merge_remaining_ctxs(struct spu_gang *gang) +{ + struct spu_context *ctx; + + list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { + if (list_empty(&ctx->aff_list)) + list_add(&ctx->aff_list, &gang->aff_list_head); + } + gang->aff_flags |= AFF_MERGED; +} + +static void aff_set_offsets(struct spu_gang *gang) +{ + struct spu_context *ctx; + int offset; + + offset = -1; + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset--; + } + + offset = 0; + list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset++; + } + + gang->aff_flags |= AFF_OFFSETS_SET; +} + +static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, + int group_size, int lowest_offset) +{ + struct spu *spu; + int node, n; + + /* + * TODO: A better algorithm could be used to find a good spu to be + * used as reference location for the ctxs chain. + */ + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if ((!mem_aff || spu->has_mem_affinity) && + sched_spu(spu)) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + return spu; + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + return NULL; +} + +static void aff_set_ref_point_location(struct spu_gang *gang) +{ + int mem_aff, gs, lowest_offset; + struct spu_context *ctx; + struct spu *tmp; + + mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; + lowest_offset = 0; + gs = 0; + + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + gs++; + + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + lowest_offset = ctx->aff_offset; + } + + gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs, + lowest_offset); +} + +static struct spu *ctx_location(struct spu *ref, int offset, int node) +{ + struct spu *spu; + + spu = NULL; + if (offset >= 0) { + list_for_each_entry(spu, ref->aff_list.prev, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset--; + } + } else { + list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset++; + } + } + + return spu; +} + +/* + * affinity_check is called each time a context is going to be scheduled. + * It returns the spu ptr on which the context must run. + */ +static int has_affinity(struct spu_context *ctx) +{ + struct spu_gang *gang = ctx->gang; + + if (list_empty(&ctx->aff_list)) + return 0; + + if (!gang->aff_ref_spu) { + if (!(gang->aff_flags & AFF_MERGED)) + aff_merge_remaining_ctxs(gang); + if (!(gang->aff_flags & AFF_OFFSETS_SET)) + aff_set_offsets(gang); + aff_set_ref_point_location(gang); + } + + return gang->aff_ref_spu != NULL; } /** @@ -251,13 +409,20 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, spu->pid, spu->number, spu->node); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if (spu->ctx->flags & SPU_CREATE_NOSCHED) + atomic_dec(&cbe_spu_info[spu->node].reserved_spus); + + if (ctx->gang){ + mutex_lock(&ctx->gang->aff_mutex); + if (has_affinity(ctx)) { + if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) + ctx->gang->aff_ref_spu = NULL; + } + mutex_unlock(&ctx->gang->aff_mutex); + } - if (spu->ctx->flags & SPU_CREATE_NOSCHED) - atomic_dec(&be_spu_info[spu->node].reserved_spus); - if (!list_empty(&ctx->aff_list)) - if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) - ctx->gang->aff_ref_spu = NULL; - spu_remove_from_active_list(spu); spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); spu_save(&ctx->csa, spu); @@ -272,9 +437,17 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) spu->pid = 0; spu->tgid = 0; ctx->ops = &spu_backing_ops; - ctx->spu = NULL; spu->flags = 0; spu->ctx = NULL; + + ctx->stats.slb_flt += + (spu->stats.slb_flt - ctx->stats.slb_flt_base); + ctx->stats.class2_intr += + (spu->stats.class2_intr - ctx->stats.class2_intr_base); + + /* This maps the underlying spu state to idle */ + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + ctx->spu = NULL; } /** @@ -283,20 +456,39 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) */ static void __spu_add_to_rq(struct spu_context *ctx) { - int prio = ctx->prio; - - list_add_tail(&ctx->rq, &spu_prio->runq[prio]); - set_bit(prio, spu_prio->bitmap); + /* + * Unfortunately this code path can be called from multiple threads + * on behalf of a single context due to the way the problem state + * mmap support works. + * + * Fortunately we need to wake up all these threads at the same time + * and can simply skip the runqueue addition for every but the first + * thread getting into this codepath. + * + * It's still quite hacky, and long-term we should proxy all other + * threads through the owner thread so that spu_run is in control + * of all the scheduling activity for a given context. + */ + if (list_empty(&ctx->rq)) { + list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]); + set_bit(ctx->prio, spu_prio->bitmap); + if (!spu_prio->nr_waiting++) + __mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); + } } static void __spu_del_from_rq(struct spu_context *ctx) { int prio = ctx->prio; - if (!list_empty(&ctx->rq)) + if (!list_empty(&ctx->rq)) { + if (!--spu_prio->nr_waiting) + del_timer(&spusched_timer); list_del_init(&ctx->rq); - if (list_empty(&spu_prio->runq[prio])) - clear_bit(prio, spu_prio->bitmap); + + if (list_empty(&spu_prio->runq[prio])) + clear_bit(prio, spu_prio->bitmap); + } } static void spu_prio_wait(struct spu_context *ctx) @@ -319,68 +511,55 @@ static void spu_prio_wait(struct spu_context *ctx) remove_wait_queue(&ctx->stop_wq, &wait); } -/** - * grab_runnable_context - try to find a runnable context - * - * Remove the highest priority context on the runqueue and return it - * to the caller. Returns %NULL if no runnable context was found. - */ -static struct spu_context *grab_runnable_context(int prio) -{ - struct spu_context *ctx = NULL; - int best; - - spin_lock(&spu_prio->runq_lock); - best = sched_find_first_bit(spu_prio->bitmap); - if (best < prio) { - struct list_head *rq = &spu_prio->runq[best]; - - BUG_ON(list_empty(rq)); - - ctx = list_entry(rq->next, struct spu_context, rq); - __spu_del_from_rq(ctx); - } - spin_unlock(&spu_prio->runq_lock); - - return ctx; -} - -/** - * spu_reschedule - try to find a runnable context for a spu - * @spu: spu available - * - * This function is called whenever a spu becomes idle. It looks for the - * most suitable runnable spu context and schedules it for execution. - */ -static void spu_reschedule(struct spu *spu) -{ - struct spu_context *ctx; - - spu_free(spu); - - ctx = grab_runnable_context(MAX_PRIO); - if (ctx) - wake_up(&ctx->stop_wq); -} - static struct spu *spu_get_idle(struct spu_context *ctx) { - struct spu *spu = NULL; - int node = cpu_to_node(raw_smp_processor_id()); - int n; - - spu = affinity_check(ctx); - if (spu) - return spu_alloc_spu(spu); + struct spu *spu, *aff_ref_spu; + int node, n; + if (ctx->gang) { + mutex_lock(&ctx->gang->aff_mutex); + if (has_affinity(ctx)) { + aff_ref_spu = ctx->gang->aff_ref_spu; + atomic_inc(&ctx->gang->aff_sched_count); + mutex_unlock(&ctx->gang->aff_mutex); + node = aff_ref_spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + spu = ctx_location(aff_ref_spu, ctx->aff_offset, node); + if (spu && spu->alloc_state == SPU_FREE) + goto found; + mutex_unlock(&cbe_spu_info[node].list_mutex); + + mutex_lock(&ctx->gang->aff_mutex); + if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) + ctx->gang->aff_ref_spu = NULL; + mutex_unlock(&ctx->gang->aff_mutex); + + return NULL; + } + mutex_unlock(&ctx->gang->aff_mutex); + } + node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; - if (!node_allowed(node)) + if (!node_allowed(ctx, node)) continue; - spu = spu_alloc_node(node); - if (spu) - break; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state == SPU_FREE) + goto found; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); } + + return NULL; + + found: + spu->alloc_state = SPU_USED; + mutex_unlock(&cbe_spu_info[node].list_mutex); + pr_debug("Got SPU %d %d\n", spu->number, spu->node); + spu_init_channels(spu); return spu; } @@ -407,18 +586,18 @@ static struct spu *find_victim(struct spu_context *ctx) node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; - if (!node_allowed(node)) + if (!node_allowed(ctx, node)) continue; - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry(spu, &spu_prio->active_list[node], list) { + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { struct spu_context *tmp = spu->ctx; - if (tmp->rt_priority < ctx->rt_priority && - (!victim || tmp->rt_priority < victim->rt_priority)) + if (tmp && tmp->prio > ctx->prio && + (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); if (victim) { /* @@ -443,7 +622,14 @@ static struct spu *find_victim(struct spu_context *ctx) victim = NULL; goto restart; } + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; spu_unbind_context(spu, victim); + mutex_unlock(&cbe_spu_info[node].list_mutex); + + victim->stats.invol_ctx_switch++; + spu->stats.invol_ctx_switch++; mutex_unlock(&victim->state_mutex); /* * We need to break out of the wait loop in spu_run @@ -463,28 +649,38 @@ static struct spu *find_victim(struct spu_context *ctx) * @ctx: spu context to schedule * @flags: flags (currently ignored) * - * Tries to find a free spu to run @ctx. If no free spu is availble + * Tries to find a free spu to run @ctx. If no free spu is available * add the context to the runqueue so it gets woken up once an spu * is available. */ int spu_activate(struct spu_context *ctx, unsigned long flags) { - - if (ctx->spu) - return 0; - do { struct spu *spu; + /* + * If there are multiple threads waiting for a single context + * only one actually binds the context while the others will + * only be able to acquire the state_mutex once the context + * already is in runnable state. + */ + if (ctx->spu) + return 0; + spu = spu_get_idle(ctx); /* * If this is a realtime thread we try to get it running by * preempting a lower priority thread. */ - if (!spu && ctx->rt_priority) + if (!spu && rt_prio(ctx->prio)) spu = find_victim(ctx); if (spu) { + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); spu_bind_context(spu, ctx); + cbe_spu_info[node].nr_active++; + mutex_unlock(&cbe_spu_info[node].list_mutex); return 0; } @@ -495,6 +691,65 @@ int spu_activate(struct spu_context *ctx, unsigned long flags) } /** + * grab_runnable_context - try to find a runnable context + * + * Remove the highest priority context on the runqueue and return it + * to the caller. Returns %NULL if no runnable context was found. + */ +static struct spu_context *grab_runnable_context(int prio, int node) +{ + struct spu_context *ctx; + int best; + + spin_lock(&spu_prio->runq_lock); + best = find_first_bit(spu_prio->bitmap, prio); + while (best < prio) { + struct list_head *rq = &spu_prio->runq[best]; + + list_for_each_entry(ctx, rq, rq) { + /* XXX(hch): check for affinity here aswell */ + if (__node_allowed(ctx, node)) { + __spu_del_from_rq(ctx); + goto found; + } + } + best++; + } + ctx = NULL; + found: + spin_unlock(&spu_prio->runq_lock); + return ctx; +} + +static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) +{ + struct spu *spu = ctx->spu; + struct spu_context *new = NULL; + + if (spu) { + new = grab_runnable_context(max_prio, spu->node); + if (new || force) { + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + spu_unbind_context(spu, ctx); + spu->alloc_state = SPU_FREE; + cbe_spu_info[node].nr_active--; + mutex_unlock(&cbe_spu_info[node].list_mutex); + + ctx->stats.vol_ctx_switch++; + spu->stats.vol_ctx_switch++; + + if (new) + wake_up(&new->stop_wq); + } + + } + + return new != NULL; +} + +/** * spu_deactivate - unbind a context from it's physical spu * @ctx: spu context to unbind * @@ -503,12 +758,7 @@ int spu_activate(struct spu_context *ctx, unsigned long flags) */ void spu_deactivate(struct spu_context *ctx) { - struct spu *spu = ctx->spu; - - if (spu) { - spu_unbind_context(spu, ctx); - spu_reschedule(spu); - } + __spu_deactivate(ctx, 1, MAX_PRIO); } /** @@ -521,200 +771,220 @@ void spu_deactivate(struct spu_context *ctx) */ void spu_yield(struct spu_context *ctx) { - struct spu *spu; - int need_yield = 0; - - if (mutex_trylock(&ctx->state_mutex)) { - if ((spu = ctx->spu) != NULL) { - int best = sched_find_first_bit(spu_prio->bitmap); - if (best < MAX_PRIO) { - pr_debug("%s: yielding SPU %d NODE %d\n", - __FUNCTION__, spu->number, spu->node); - spu_deactivate(ctx); - need_yield = 1; - } - } + if (!(ctx->flags & SPU_CREATE_NOSCHED)) { + mutex_lock(&ctx->state_mutex); + __spu_deactivate(ctx, 0, MAX_PRIO); mutex_unlock(&ctx->state_mutex); } - if (unlikely(need_yield)) - yield(); } -int __init spu_sched_init(void) +static noinline void spusched_tick(struct spu_context *ctx) { - int i; - - spu_sched_wq = create_singlethread_workqueue("spusched"); - if (!spu_sched_wq) - return 1; + if (ctx->flags & SPU_CREATE_NOSCHED) + return; + if (ctx->policy == SCHED_FIFO) + return; - spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); - if (!spu_prio) { - printk(KERN_WARNING "%s: Unable to allocate priority queue.\n", - __FUNCTION__); - destroy_workqueue(spu_sched_wq); - return 1; - } - for (i = 0; i < MAX_PRIO; i++) { - INIT_LIST_HEAD(&spu_prio->runq[i]); - __clear_bit(i, spu_prio->bitmap); - } - __set_bit(MAX_PRIO, spu_prio->bitmap); - for (i = 0; i < MAX_NUMNODES; i++) { - mutex_init(&spu_prio->active_mutex[i]); - INIT_LIST_HEAD(&spu_prio->active_list[i]); - } - spin_lock_init(&spu_prio->runq_lock); - return 0; -} + if (--ctx->time_slice) + return; -void __exit spu_sched_exit(void) -{ - struct spu *spu, *tmp; - int node; + /* + * Unfortunately list_mutex ranks outside of state_mutex, so + * we have to trylock here. If we fail give the context another + * tick and try again. + */ + if (mutex_trylock(&ctx->state_mutex)) { + struct spu *spu = ctx->spu; + struct spu_context *new; - for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], - list) { - list_del_init(&spu->list); - spu_free(spu); + new = grab_runnable_context(ctx->prio + 1, spu->node); + if (new) { + spu_unbind_context(spu, ctx); + ctx->stats.invol_ctx_switch++; + spu->stats.invol_ctx_switch++; + spu->alloc_state = SPU_FREE; + cbe_spu_info[spu->node].nr_active--; + wake_up(&new->stop_wq); + /* + * We need to break out of the wait loop in + * spu_run manually to ensure this context + * gets put on the runqueue again ASAP. + */ + wake_up(&ctx->stop_wq); } - mutex_unlock(&spu_prio->active_mutex[node]); + spu_set_timeslice(ctx); + mutex_unlock(&ctx->state_mutex); + } else { + ctx->time_slice++; } - kfree(spu_prio); - destroy_workqueue(spu_sched_wq); } -static void aff_merge_remaining_ctxs(struct spu_gang *gang) +/** + * count_active_contexts - count nr of active tasks + * + * Return the number of tasks currently running or waiting to run. + * + * Note that we don't take runq_lock / list_mutex here. Reading + * a single 32bit value is atomic on powerpc, and we don't care + * about memory ordering issues here. + */ +static unsigned long count_active_contexts(void) { - struct spu_context *ctx; + int nr_active = 0, node; - list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { - if (list_empty(&ctx->aff_list)) - list_add(&ctx->aff_list, &gang->aff_list_head); - } - gang->aff_flags |= AFF_MERGED; + for (node = 0; node < MAX_NUMNODES; node++) + nr_active += cbe_spu_info[node].nr_active; + nr_active += spu_prio->nr_waiting; + + return nr_active; } -static void aff_set_offsets(struct spu_gang *gang) +/** + * spu_calc_load - given tick count, update the avenrun load estimates. + * @tick: tick count + * + * No locking against reading these values from userspace, as for + * the CPU loadavg code. + */ +static void spu_calc_load(unsigned long ticks) { - struct spu_context *ctx; - int offset; - - offset = -1; - list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, - aff_list) { - if (&ctx->aff_list == &gang->aff_list_head) - break; - ctx->aff_offset = offset--; + unsigned long active_tasks; /* fixed-point */ + static int count = LOAD_FREQ; + + count -= ticks; + + if (unlikely(count < 0)) { + active_tasks = count_active_contexts() * FIXED_1; + do { + CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks); + CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks); + CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks); + count += LOAD_FREQ; + } while (count < 0); } - - offset = 0; - list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { - if (&ctx->aff_list == &gang->aff_list_head) - break; - ctx->aff_offset = offset++; - } - - gang->aff_flags |= AFF_OFFSETS_SET; } -static inline int sched_spu(struct spu *spu) +static void spusched_wake(unsigned long data) { - return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); + mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); + wake_up_process(spusched_task); + spu_calc_load(SPUSCHED_TICK); } -static struct spu * -aff_ref_location(int mem_aff, int group_size, int prio, int lowest_offset) +static int spusched_thread(void *unused) { struct spu *spu; - int node, n; + int node; - /* TODO: A better algorithm could be used to find a good spu to be - * used as reference location for the ctxs chain. - */ - node = cpu_to_node(raw_smp_processor_id()); - for (n = 0; n < MAX_NUMNODES; n++, node++) { - node = (node < MAX_NUMNODES) ? node : 0; - if (!node_allowed(node)) - continue; - list_for_each_entry(spu, &be_spu_info[node].spus, be_list) { - if ((!mem_aff || spu->has_mem_affinity) && - sched_spu(spu)) - return spu; + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + for (node = 0; node < MAX_NUMNODES; node++) { + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->ctx) + spusched_tick(spu->ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); } } - return NULL; + + return 0; } -static void aff_set_ref_point_location(struct spu_gang *gang) +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static int show_spu_loadavg(struct seq_file *s, void *private) { - int mem_aff, gs, lowest_offset; - struct spu_context *ctx; - struct spu *tmp; + int a, b, c; - mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; - lowest_offset = 0; - gs = 0; - list_for_each_entry(tmp, &gang->aff_list_head, aff_list) - gs++; + a = spu_avenrun[0] + (FIXED_1/200); + b = spu_avenrun[1] + (FIXED_1/200); + c = spu_avenrun[2] + (FIXED_1/200); - list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, - aff_list) { - if (&ctx->aff_list == &gang->aff_list_head) - break; - lowest_offset = ctx->aff_offset; - } + /* + * Note that last_pid doesn't really make much sense for the + * SPU loadavg (it even seems very odd on the CPU side..), + * but we include it here to have a 100% compatible interface. + */ + seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + LOAD_INT(a), LOAD_FRAC(a), + LOAD_INT(b), LOAD_FRAC(b), + LOAD_INT(c), LOAD_FRAC(c), + count_active_contexts(), + atomic_read(&nr_spu_contexts), + last_pid); + return 0; +} - gang->aff_ref_spu = aff_ref_location(mem_aff, gs, ctx->prio, - lowest_offset); +static int spu_loadavg_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_spu_loadavg, NULL); } -static struct spu* ctx_location(struct spu *ref, int offset) +static const struct file_operations spu_loadavg_fops = { + .open = spu_loadavg_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int __init spu_sched_init(void) { - struct spu *spu; + struct proc_dir_entry *entry; + int err = -ENOMEM, i; - spu = NULL; - if (offset >= 0) { - list_for_each_entry(spu, ref->aff_list.prev, aff_list) { - if (offset == 0) - break; - if (sched_spu(spu)) - offset--; - } - } else { - list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { - if (offset == 0) - break; - if (sched_spu(spu)) - offset++; - } + spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); + if (!spu_prio) + goto out; + + for (i = 0; i < MAX_PRIO; i++) { + INIT_LIST_HEAD(&spu_prio->runq[i]); + __clear_bit(i, spu_prio->bitmap); } - return spu; + spin_lock_init(&spu_prio->runq_lock); + + setup_timer(&spusched_timer, spusched_wake, 0); + + spusched_task = kthread_run(spusched_thread, NULL, "spusched"); + if (IS_ERR(spusched_task)) { + err = PTR_ERR(spusched_task); + goto out_free_spu_prio; + } + + entry = create_proc_entry("spu_loadavg", 0, NULL); + if (!entry) + goto out_stop_kthread; + entry->proc_fops = &spu_loadavg_fops; + + pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n", + SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE); + return 0; + + out_stop_kthread: + kthread_stop(spusched_task); + out_free_spu_prio: + kfree(spu_prio); + out: + return err; } -/** - * affinity_check is called each time a context is going to be scheduled. - * It returns the spu ptr on which the context must run. - */ -struct spu* affinity_check(struct spu_context *ctx) +void spu_sched_exit(void) { - struct spu_gang *gang; + struct spu *spu; + int node; - if (list_empty(&ctx->aff_list)) - return NULL; - gang = ctx->gang; - mutex_lock(&gang->aff_mutex); - if (!gang->aff_ref_spu) { - if (!(gang->aff_flags & AFF_MERGED)) - aff_merge_remaining_ctxs(gang); - if (!(gang->aff_flags & AFF_OFFSETS_SET)) - aff_set_offsets(gang); - aff_set_ref_point_location(gang); + remove_proc_entry("spu_loadavg", NULL); + + del_timer_sync(&spusched_timer); + kthread_stop(spusched_task); + + for (node = 0; node < MAX_NUMNODES; node++) { + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->alloc_state != SPU_FREE) + spu->alloc_state = SPU_FREE; + mutex_unlock(&cbe_spu_info[node].list_mutex); } - mutex_unlock(&gang->aff_mutex); - if (!gang->aff_ref_spu) - return NULL; - return ctx_location(gang->aff_ref_spu, ctx->aff_offset); + kfree(spu_prio); } diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c index 0bf723d..21a9c95 100644 --- a/arch/powerpc/platforms/cell/spufs/spu_restore.c +++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c @@ -84,13 +84,13 @@ static inline void restore_decr(void) unsigned int decr_running; unsigned int decr; - /* Restore, Step 6: + /* Restore, Step 6(moved): * If the LSCSA "decrementer running" flag is set * then write the SPU_WrDec channel with the * decrementer value from LSCSA. */ offset = LSCSA_QW_OFFSET(decr_status); - decr_running = regs_spill[offset].slot[0]; + decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING; if (decr_running) { offset = LSCSA_QW_OFFSET(decr); decr = regs_spill[offset].slot[0]; @@ -296,7 +296,7 @@ static inline void restore_complete(void) * This code deviates from the documented sequence in the * following aspects: * - * 1. The EA for LSCSA is passed from PPE in the + * 1. The EA for LSCSA is passed from PPE in the * signal notification channels. * 2. The register spill area is pulled by SPU * into LS, rather than pushed by PPE. @@ -318,10 +318,10 @@ int main() build_dma_list(lscsa_ea); /* Step 3. */ restore_upper_240kb(lscsa_ea); /* Step 4. */ /* Step 5: done by 'exit'. */ - restore_decr(); /* Step 6. */ enqueue_putllc(lscsa_ea); /* Step 7. */ set_tag_update(); /* Step 8. */ read_tag_status(); /* Step 9. */ + restore_decr(); /* moved Step 6. */ read_llar_status(); /* Step 10. */ write_ppu_mb(); /* Step 11. */ write_ppuint_mb(); /* Step 12. */ diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped index 15183d2..f383b02 100644 --- a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped +++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped @@ -10,7 +10,7 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x24fd8081, 0x1cd80081, 0x33001180, -0x42030003, +0x42034003, 0x33800284, 0x1c010204, 0x40200000, @@ -24,22 +24,22 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x23fffd84, 0x1c100183, 0x217ffa85, -0x3080a000, -0x3080a201, -0x3080a402, -0x3080a603, -0x3080a804, -0x3080aa05, -0x3080ac06, -0x3080ae07, -0x3080b008, -0x3080b209, -0x3080b40a, -0x3080b60b, -0x3080b80c, -0x3080ba0d, -0x3080bc0e, -0x3080be0f, +0x3080b000, +0x3080b201, +0x3080b402, +0x3080b603, +0x3080b804, +0x3080ba05, +0x3080bc06, +0x3080be07, +0x3080c008, +0x3080c209, +0x3080c40a, +0x3080c60b, +0x3080c80c, +0x3080ca0d, +0x3080cc0e, +0x3080ce0f, 0x00003ffc, 0x00000000, 0x00000000, @@ -48,19 +48,18 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x3ec00083, 0xb0a14103, 0x01a00204, -0x3ec10082, -0x4202800e, -0x04000703, -0xb0a14202, -0x21a00803, -0x3fbf028d, -0x3f20068d, -0x3fbe0682, +0x3ec10083, +0x4202c002, +0xb0a14203, +0x21a00802, +0x3fbf028a, +0x3f20050a, +0x3fbe0502, 0x3fe30102, 0x21a00882, -0x3f82028f, -0x3fe3078f, -0x3fbf0784, +0x3f82028b, +0x3fe3058b, +0x3fbf0584, 0x3f200204, 0x3fbe0204, 0x3fe30204, @@ -75,252 +74,285 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x21a00083, 0x40800082, 0x21a00b02, -0x10002818, -0x42a00002, -0x32800007, -0x4207000c, -0x18008208, -0x40a0000b, -0x4080020a, -0x40800709, -0x00200000, -0x42070002, -0x3ac30384, +0x10002612, +0x42a00003, +0x42074006, +0x1800c204, +0x40a00008, +0x40800789, +0x1c010305, +0x34000302, 0x1cffc489, -0x00200000, -0x18008383, -0x38830382, -0x4cffc486, -0x3ac28185, -0xb0408584, -0x28830382, -0x1c020387, -0x38828182, -0xb0408405, -0x1802c408, -0x28828182, -0x217ff886, -0x04000583, -0x21a00803, -0x3fbe0682, -0x3fe30102, -0x04000106, -0x21a00886, -0x04000603, -0x21a00903, -0x40803c02, -0x21a00982, -0x40800003, -0x04000184, -0x21a00a04, +0x3ec00303, +0x3ec00287, +0xb0408403, +0x24000302, +0x34000282, +0x1c020306, +0xb0408207, +0x18020204, +0x24000282, +0x217ffa09, +0x04000402, +0x21a00802, +0x3fbe0504, +0x3fe30204, +0x21a00884, +0x42074002, +0x21a00902, +0x40803c03, +0x21a00983, +0x04000485, +0x21a00a05, 0x40802202, 0x21a00a82, -0x42028005, -0x34208702, -0x21002282, -0x21a00804, -0x21a00886, -0x3fbf0782, +0x21a00805, +0x21a00884, +0x3fbf0582, 0x3f200102, 0x3fbe0102, 0x3fe30102, 0x21a00902, 0x40804003, 0x21a00983, -0x21a00a04, +0x21a00a05, 0x40805a02, 0x21a00a82, 0x40800083, 0x21a00b83, 0x01a00c02, -0x01a00d83, -0x3420c282, +0x30809c03, +0x34000182, +0x14004102, +0x21002082, +0x01a00d82, +0x3080a003, +0x34000182, 0x21a00e02, -0x34210283, -0x21a00f03, -0x34200284, -0x77400200, -0x3421c282, +0x3080a203, +0x34000182, +0x21a00f02, +0x3080a403, +0x34000182, +0x77400100, +0x3080a603, +0x34000182, 0x21a00702, -0x34218283, -0x21a00083, -0x34214282, +0x3080a803, +0x34000182, +0x21a00082, +0x3080aa03, +0x34000182, 0x21a00b02, -0x4200480c, -0x00200000, -0x1c010286, -0x34220284, -0x34220302, -0x0f608203, -0x5c024204, -0x3b81810b, -0x42013c02, -0x00200000, -0x18008185, -0x38808183, -0x3b814182, -0x21004e84, +0x4020007f, +0x3080ae02, +0x42004805, +0x3080ac04, +0x34000103, +0x34000202, +0x1cffc183, +0x3b810106, +0x0f608184, +0x42013802, +0x5c020183, +0x38810102, +0x3b810102, +0x21000e83, 0x4020007f, 0x35000100, -0x000004e0, -0x000002a0, -0x000002e8, -0x00000428, +0x00000470, +0x000002f8, +0x00000430, 0x00000360, -0x000002e8, -0x000004a0, -0x00000468, +0x000002f8, 0x000003c8, +0x000004a8, +0x00000298, 0x00000360, +0x00200000, 0x409ffe02, 0x30801203, -0x40800204, -0x3ec40085, -0x10009c09, -0x3ac10606, -0xb060c105, -0x4020007f, -0x4020007f, +0x40800208, +0x3ec40084, +0x40800407, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, -0x38810602, -0xb0408586, -0x28810602, -0x32004180, -0x34204702, +0x38820282, +0x41004003, +0xb0408189, +0x28820282, +0x3881c282, +0xb0408304, +0x2881c282, +0x00400000, +0x40800003, +0x35000000, +0x30809e03, +0x34000182, 0x21a00382, 0x4020007f, -0x327fdc80, +0x327fde00, 0x409ffe02, 0x30801203, -0x40800204, -0x3ec40087, -0x40800405, -0x00200000, -0x40800606, -0x3ac10608, -0x3ac14609, -0x3ac1860a, -0xb060c107, +0x40800206, +0x3ec40084, +0x40800407, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, +0x38818282, 0x41004003, -0x38810602, -0x4020007f, -0xb0408188, -0x4020007f, -0x28810602, -0x41201002, -0x38814603, -0x10009c09, -0xb060c109, -0x4020007f, -0x28814603, +0xb040818a, +0x10005b0b, +0x41201003, +0x28818282, +0x3881c282, +0xb0408184, 0x41193f83, -0x38818602, 0x60ffc003, -0xb040818a, -0x28818602, -0x32003080, +0x2881c282, +0x38820282, +0xb0408189, +0x28820282, +0x327fef80, 0x409ffe02, 0x30801203, -0x40800204, -0x3ec40087, -0x41201008, -0x10009c14, -0x40800405, -0x3ac10609, -0x40800606, -0x3ac1460a, -0xb060c107, -0x3ac1860b, +0x40800207, +0x3ec40086, +0x4120100b, +0x10005b14, +0x40800404, +0x3ac1c289, +0x40800608, +0xb060c106, +0x3ac10286, +0x3ac2028a, 0x20801203, -0x38810602, -0xb0408409, -0x28810602, -0x38814603, -0xb060c40a, -0x4020007f, -0x28814603, +0x3881c282, 0x41193f83, -0x38818602, 0x60ffc003, -0xb040818b, -0x28818602, -0x32002380, -0x409ffe02, -0x30801204, -0x40800205, -0x3ec40083, -0x40800406, -0x3ac14607, -0x3ac18608, -0xb0810103, -0x41004002, -0x20801204, -0x4020007f, -0x38814603, -0x10009c0b, -0xb060c107, -0x4020007f, -0x4020007f, -0x28814603, -0x38818602, -0x4020007f, +0xb0408589, +0x2881c282, +0x38810282, +0xb0408586, +0x28810282, +0x38820282, +0xb040818a, +0x28820282, 0x4020007f, -0xb0408588, -0x28818602, +0x327fe280, +0x409ffe02, +0x30801203, +0x40800207, +0x3ec40084, +0x40800408, +0x10005b14, +0x40800609, +0x3ac1c28a, +0x3ac2028b, +0xb060c104, +0x3ac24284, +0x20801203, +0x41201003, +0x3881c282, +0xb040830a, +0x2881c282, +0x38820282, +0xb040818b, +0x41193f83, +0x60ffc003, +0x28820282, +0x38824282, +0xb0408184, +0x28824282, 0x4020007f, -0x32001780, +0x327fd580, 0x409ffe02, -0x1000640e, -0x40800204, +0x1000658e, +0x40800206, 0x30801203, -0x40800405, -0x3ec40087, -0x40800606, -0x3ac10608, -0x3ac14609, -0x3ac1860a, -0xb060c107, +0x40800407, +0x3ec40084, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, 0x413d8003, -0x38810602, +0x38818282, 0x4020007f, -0x327fd780, -0x409ffe02, -0x10007f0c, -0x40800205, -0x30801204, -0x40800406, -0x3ec40083, -0x3ac14607, -0x3ac18608, -0xb0810103, -0x413d8002, -0x20801204, -0x38814603, +0x327fd800, +0x409ffe03, +0x30801202, +0x40800207, +0x3ec40084, +0x10005b09, +0x3ac1c288, +0xb0408184, 0x4020007f, -0x327feb80, +0x4020007f, +0x20801202, +0x3881c282, +0xb0408308, +0x2881c282, +0x327fc680, 0x409ffe02, +0x1000588b, +0x40800208, 0x30801203, -0x40800204, -0x3ec40087, -0x40800405, -0x1000650a, -0x40800606, -0x3ac10608, -0x3ac14609, -0x3ac1860a, -0xb060c107, +0x40800407, +0x3ec40084, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, -0x38810602, -0xb0408588, -0x4020007f, -0x327fc980, -0x00400000, -0x40800003, -0x4020007f, -0x35000000, +0x413d8003, +0x38820282, +0x327fbd80, +0x00200000, +0x00000da0, +0x00000000, +0x00000000, +0x00000000, +0x00000d90, +0x00000000, +0x00000000, +0x00000000, +0x00000db0, +0x00000000, +0x00000000, +0x00000000, +0x00000dc0, +0x00000000, +0x00000000, +0x00000000, +0x00000d80, +0x00000000, +0x00000000, +0x00000000, +0x00000df0, +0x00000000, +0x00000000, +0x00000000, +0x00000de0, +0x00000000, +0x00000000, +0x00000000, +0x00000dd0, +0x00000000, +0x00000000, +0x00000000, +0x00000e04, +0x00000000, +0x00000000, 0x00000000, +0x00000e00, 0x00000000, 0x00000000, 0x00000000, diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c index 196033b..ae95cc1 100644 --- a/arch/powerpc/platforms/cell/spufs/spu_save.c +++ b/arch/powerpc/platforms/cell/spufs/spu_save.c @@ -44,7 +44,7 @@ static inline void save_event_mask(void) * Read the SPU_RdEventMsk channel and save to the LSCSA. */ offset = LSCSA_QW_OFFSET(event_mask); - regs_spill[offset].slot[0] = spu_readch(SPU_RdEventStatMask); + regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask); } static inline void save_tag_mask(void) diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 4f68d13..ca47b99 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -26,6 +26,7 @@ #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/fs.h> +#include <linux/cpumask.h> #include <asm/spu.h> #include <asm/spu_csa.h> @@ -41,8 +42,8 @@ struct spu_gang; /* ctx->sched_flags */ enum { - SPU_SCHED_EXITING = 0, SPU_SCHED_NOTIFY_ACTIVE, + SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ }; struct spu_context { @@ -51,17 +52,17 @@ struct spu_context { spinlock_t mmio_lock; /* protects mmio access */ struct address_space *local_store; /* local store mapping. */ struct address_space *mfc; /* 'mfc' area mappings. */ - struct address_space *cntl; /* 'control' area mappings. */ - struct address_space *signal1; /* 'signal1' area mappings. */ - struct address_space *signal2; /* 'signal2' area mappings. */ + struct address_space *cntl; /* 'control' area mappings. */ + struct address_space *signal1; /* 'signal1' area mappings. */ + struct address_space *signal2; /* 'signal2' area mappings. */ struct address_space *mss; /* 'mss' area mappings. */ struct address_space *psmap; /* 'psmap' area mappings. */ - spinlock_t mapping_lock; + struct mutex mapping_lock; u64 object_id; /* user space pointer for oprofile */ enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state; struct mutex state_mutex; - struct semaphore run_sema; + struct mutex run_mutex; struct mm_struct *owner; @@ -81,17 +82,38 @@ struct spu_context { struct list_head gang_list; struct spu_gang *gang; - struct kref * prof_priv_kref; - void (* prof_priv_release) (struct kref *kref); + struct kref *prof_priv_kref; + void ( * prof_priv_release) (struct kref *kref); + + /* owner thread */ + pid_t tid; /* scheduler fields */ struct list_head rq; - struct work_struct sched_work; + unsigned int time_slice; unsigned long sched_flags; - unsigned long rt_priority; + cpumask_t cpus_allowed; int policy; int prio; + /* statistics */ + struct { + /* updates protected by ctx->state_mutex */ + enum spu_utilization_state util_state; + unsigned long long tstamp; /* time of last state switch */ + unsigned long long times[SPU_UTIL_MAX]; + unsigned long long vol_ctx_switch; + unsigned long long invol_ctx_switch; + unsigned long long min_flt; + unsigned long long maj_flt; + unsigned long long hash_flt; + unsigned long long slb_flt; + unsigned long long slb_flt_base; /* # at last ctx switch */ + unsigned long long class2_intr; + unsigned long long class2_intr_base; /* # at last ctx switch */ + unsigned long long libassist; + } stats; + struct list_head aff_list; int aff_head; int aff_offset; @@ -178,11 +200,15 @@ extern struct tree_descr spufs_dir_contents[]; extern struct tree_descr spufs_dir_nosched_contents[]; /* system call implementation */ -long spufs_run_spu(struct file *file, - struct spu_context *ctx, u32 *npc, u32 *status); -long spufs_create(struct nameidata *nd, - unsigned int flags, mode_t mode, struct file *filp); -extern struct file_operations spufs_context_fops; +extern struct spufs_calls spufs_calls; +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); +long spufs_create(struct nameidata *nd, unsigned int flags, + mode_t mode, struct file *filp); +/* ELF coredump callbacks for writing SPU ELF notes */ +extern int spufs_coredump_extra_notes_size(void); +extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset); + +extern const struct file_operations spufs_context_fops; /* gang management */ struct spu_gang *alloc_spu_gang(void); @@ -198,6 +224,7 @@ int spufs_handle_class1(struct spu_context *ctx); struct spu *affinity_check(struct spu_context *ctx); /* context management */ +extern atomic_t nr_spu_contexts; static inline void spu_acquire(struct spu_context *ctx) { mutex_lock(&ctx->state_mutex); @@ -217,23 +244,23 @@ void spu_unmap_mappings(struct spu_context *ctx); void spu_forget(struct spu_context *ctx); int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); void spu_acquire_saved(struct spu_context *ctx); -int spu_acquire_exclusive(struct spu_context *ctx); +void spu_release_saved(struct spu_context *ctx); int spu_activate(struct spu_context *ctx, unsigned long flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); void spu_switch_notify(struct spu *spu, struct spu_context *ctx); -void spu_start_tick(struct spu_context *ctx); -void spu_stop_tick(struct spu_context *ctx); -void spu_sched_tick(void *data); +void spu_set_timeslice(struct spu_context *ctx); +void spu_update_sched_info(struct spu_context *ctx); +void __spu_update_sched_info(struct spu_context *ctx); int __init spu_sched_init(void); -void __exit spu_sched_exit(void); +void spu_sched_exit(void); extern char *isolated_loader; /* * spufs_wait - * Same as wait_event_interruptible(), except that here + * Same as wait_event_interruptible(), except that here * we need to call spu_release(ctx) before sleeping, and * then spu_acquire(ctx) when awoken. */ @@ -246,14 +273,13 @@ extern char *isolated_loader; prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE); \ if (condition) \ break; \ - if (!signal_pending(current)) { \ - spu_release(ctx); \ - schedule(); \ - spu_acquire(ctx); \ - continue; \ + if (signal_pending(current)) { \ + __ret = -ERESTARTSYS; \ + break; \ } \ - __ret = -ERESTARTSYS; \ - break; \ + spu_release(ctx); \ + schedule(); \ + spu_acquire(ctx); \ } \ finish_wait(&(wq), &__wait); \ __ret; \ @@ -274,10 +300,47 @@ struct spufs_coredump_reader { char *name; ssize_t (*read)(struct spu_context *ctx, char __user *buffer, size_t size, loff_t *pos); - u64 (*get)(void *data); + u64 (*get)(struct spu_context *ctx); size_t size; }; extern struct spufs_coredump_reader spufs_coredump_read[]; extern int spufs_coredump_num_notes; +/* + * This function is a little bit too large for an inline, but + * as fault.c is built into the kernel we can't move it out of + * line. + */ +static inline void spuctx_switch_state(struct spu_context *ctx, + enum spu_utilization_state new_state) +{ + unsigned long long curtime; + signed long long delta; + struct timespec ts; + struct spu *spu; + enum spu_utilization_state old_state; + + ktime_get_ts(&ts); + curtime = timespec_to_ns(&ts); + delta = curtime - ctx->stats.tstamp; + + WARN_ON(!mutex_is_locked(&ctx->state_mutex)); + WARN_ON(delta < 0); + + spu = ctx->spu; + old_state = ctx->stats.util_state; + ctx->stats.util_state = new_state; + ctx->stats.tstamp = curtime; + + /* + * Update the physical SPU utilization statistics. + */ + if (spu) { + ctx->stats.times[old_state] += delta; + spu->stats.times[old_state] += delta; + spu->stats.util_state = new_state; + spu->stats.tstamp = curtime; + } +} + #endif diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 1e50c4d..39ef503 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -39,7 +39,6 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/unistd.h> @@ -71,7 +70,7 @@ } #endif /* debug */ -#define POLL_WHILE_FALSE(_c) POLL_WHILE_TRUE(!(_c)) +#define POLL_WHILE_FALSE(_c) POLL_WHILE_TRUE(!(_c)) static inline void acquire_spu_lock(struct spu *spu) { @@ -181,7 +180,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) case MFC_CNTL_SUSPEND_COMPLETE: if (csa) { csa->priv2.mfc_control_RW = - in_be64(&priv2->mfc_control_RW) | + MFC_CNTL_SUSPEND_MASK | MFC_CNTL_SUSPEND_DMA_QUEUE; } break; @@ -191,9 +190,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == MFC_CNTL_SUSPEND_COMPLETE); if (csa) { - csa->priv2.mfc_control_RW = - in_be64(&priv2->mfc_control_RW) & - ~MFC_CNTL_SUSPEND_DMA_QUEUE; + csa->priv2.mfc_control_RW = 0; } break; } @@ -252,16 +249,8 @@ static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu) * Read MFC_CNTL[Ds]. Update saved copy of * CSA.MFC_CNTL[Ds]. */ - if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) { - csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; - csa->suspend_time = get_cycles(); - out_be64(&priv2->spu_chnlcntptr_RW, 7ULL); - eieio(); - csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW); - eieio(); - } else { - csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; - } + csa->priv2.mfc_control_RW |= + in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING; } static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) @@ -272,7 +261,8 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) * Write MFC_CNTL[Dh] set to a '1' to halt * the decrementer. */ - out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED); + out_be64(&priv2->mfc_control_RW, + MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK); eieio(); } @@ -388,6 +378,19 @@ static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu) csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW); } +static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save the Prxy_TagStatus register in the CSA. + * + * It is unnecessary to restore dma_tagstatus_R, however, + * dma_tagstatus_R in the CSA is accessed via backing_ops, so + * we must save it. + */ + csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R); +} + static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; @@ -603,7 +606,7 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu) static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; int i; /* Save, Step 42: @@ -614,7 +617,7 @@ static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); /* Save the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -971,13 +974,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu) */ } -static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) +static inline void suspend_mfc_and_halt_decr(struct spu_state *csa, + struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; /* Restore, Step 7: - * Restore, Step 47. - * Write MFC_Cntl[Dh,Sc]='1','1' to suspend + * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend * the queue and halt the decrementer. */ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | @@ -1078,7 +1081,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; u64 idx; int i; @@ -1090,7 +1093,7 @@ static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) out_be64(&priv2->spu_chnldata_RW, 0UL); /* Reset the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -1277,7 +1280,15 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu) cycles_t resume_time = get_cycles(); cycles_t delta_time = resume_time - csa->suspend_time; + csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING; + if (csa->lscsa->decr.slot[0] < delta_time) { + csa->lscsa->decr_status.slot[0] |= + SPU_DECR_STATUS_WRAPPED; + } + csa->lscsa->decr.slot[0] -= delta_time; + } else { + csa->lscsa->decr_status.slot[0] = 0; } } @@ -1386,6 +1397,18 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu) send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); } +static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 47. + * Write MFC_Cntl[Sc,Sm]='1','0' to suspend + * the queue. + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); + eieio(); +} + static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) { /* Restore, Step 49: @@ -1536,32 +1559,27 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) * "wrapped" flag is set, OR in a '1' to * CSA.SPU_Event_Status[Tm]. */ - if (csa->lscsa->decr_status.slot[0] == 1) { - csa->spu_chnldata_RW[0] |= 0x20; - } - if ((csa->lscsa->decr_status.slot[0] == 1) && - (csa->spu_chnlcnt_RW[0] == 0 && - ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) && - ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) { + if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED)) + return; + + if ((csa->spu_chnlcnt_RW[0] == 0) && + (csa->spu_chnldata_RW[1] & 0x20) && + !(csa->spu_chnldata_RW[0] & 0x20)) csa->spu_chnlcnt_RW[0] = 1; - } + + csa->spu_chnldata_RW[0] |= 0x20; } static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; int i; /* Restore, Step 59: + * Restore the following CH: [0,3,4,24,25,27] */ - - /* Restore CH 1 without count */ - out_be64(&priv2->spu_chnlcntptr_RW, 1); - out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]); - - /* Restore the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -1813,6 +1831,7 @@ static void save_csa(struct spu_state *prev, struct spu *spu) save_mfc_queues(prev, spu); /* Step 19. */ save_ppu_querymask(prev, spu); /* Step 20. */ save_ppu_querytype(prev, spu); /* Step 21. */ + save_ppu_tagstatus(prev, spu); /* NEW. */ save_mfc_csr_tsq(prev, spu); /* Step 22. */ save_mfc_csr_cmd(prev, spu); /* Step 23. */ save_mfc_csr_ato(prev, spu); /* Step 24. */ @@ -1919,7 +1938,7 @@ static void harvest(struct spu_state *prev, struct spu *spu) set_switch_pending(prev, spu); /* Step 5. */ stop_spu_isolate(spu); /* NEW. */ remove_other_spu_access(prev, spu); /* Step 6. */ - suspend_mfc(prev, spu); /* Step 7. */ + suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */ wait_suspend_mfc_complete(prev, spu); /* Step 8. */ if (!suspend_spe(prev, spu)) /* Step 9. */ clear_spu_status(prev, spu); /* Step 10. */ @@ -2116,7 +2135,7 @@ int spu_restore(struct spu_state *new, struct spu *spu) spu->slb_replace = 0; new->dar = 0; new->dsisr = 0; - spu->class_0_pending = 0; + spu->class_0_pending_value = 0; rc = __do_spu_restore(new, spu); release_spu_lock(spu); if (rc) { @@ -2127,19 +2146,6 @@ int spu_restore(struct spu_state *new, struct spu *spu) } EXPORT_SYMBOL_GPL(spu_restore); -/** - * spu_harvest - SPU harvest (reset) operation - * @spu: pointer to SPU iomem structure. - * - * Perform SPU harvest (reset) operation. - */ -void spu_harvest(struct spu *spu) -{ - acquire_spu_lock(spu); - harvest(NULL, spu); - release_spu_lock(spu); -} - static void init_prob(struct spu_state *csa) { csa->spu_chnlcnt_RW[9] = 1; diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 3ce0cbb..71f8f7e 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp, goto out; i = SPUFS_I(filp->f_dentry->d_inode); - ret = spufs_run_spu(filp, i->i_ctx, &npc, &status); + ret = spufs_run_spu(i->i_ctx, &npc, &status); if (put_user(npc, unpc)) ret = -EFAULT; @@ -58,26 +58,8 @@ out: return ret; } -#ifndef MODULE -asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) -{ - int fput_needed; - struct file *filp; - long ret; - - ret = -EBADF; - filp = fget_light(fd, &fput_needed); - if (filp) { - ret = do_spu_run(filp, unpc, ustatus); - fput_light(filp, fput_needed); - } - - return ret; -} -#endif - -asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags, - mode_t mode, struct file *neighbor) +static long do_spu_create(const char __user *pathname, unsigned int flags, + mode_t mode, struct file *neighbor) { char *tmp; int ret; @@ -99,32 +81,10 @@ asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags, return ret; } -#ifndef MODULE -asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags, - mode_t mode, int neighbor_fd) -{ - int fput_needed; - struct file *neighbor; - long ret; - - if (flags & SPU_CREATE_AFFINITY_SPU) { - ret = -EBADF; - neighbor = fget_light(neighbor_fd, &fput_needed); - if (neighbor) { - ret = do_spu_create(pathname, flags, mode, neighbor); - fput_light(neighbor, fput_needed); - } - } - else { - ret = do_spu_create(pathname, flags, mode, NULL); - } - - return ret; -} -#endif - struct spufs_calls spufs_calls = { .create_thread = do_spu_create, .spu_run = do_spu_run, + .coredump_extra_notes_size = spufs_coredump_extra_notes_size, + .coredump_extra_notes_write = spufs_coredump_extra_notes_write, .owner = THIS_MODULE, }; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d70697e..0c9d7a3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1642,9 +1642,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) sz += thread_status_size; -#ifdef ELF_CORE_WRITE_EXTRA_NOTES - sz += ELF_CORE_EXTRA_NOTES_SIZE; -#endif + sz += elf_coredump_extra_notes_size(); fill_elf_note_phdr(&phdr, sz, offset); offset += sz; @@ -1687,9 +1685,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) if (!writenote(notes + i, file)) goto end_coredump; -#ifdef ELF_CORE_WRITE_EXTRA_NOTES - ELF_CORE_WRITE_EXTRA_NOTES; -#endif + if (elf_coredump_extra_notes_write(file, &file->f_pos)) + goto end_coredump; /* write out the thread status notes section */ list_for_each(t, &thread_list) { diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h index 4895c22..3c6d947 100644 --- a/include/asm-powerpc/elf.h +++ b/include/asm-powerpc/elf.h @@ -412,13 +412,8 @@ do { \ /* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */ #define NT_SPU 1 -extern int arch_notes_size(void); -extern void arch_write_notes(struct file *file); - -#define ELF_CORE_EXTRA_NOTES_SIZE arch_notes_size() -#define ELF_CORE_WRITE_EXTRA_NOTES arch_write_notes(file) - #define ARCH_HAVE_EXTRA_ELF_NOTES -#endif /* CONFIG_PPC_CELL */ + +#endif /* CONFIG_SPU_BASE */ #endif /* _ASM_POWERPC_ELF_H */ diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index 0b6bd3e..186d6d6 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h @@ -104,7 +104,15 @@ struct spu_context; struct spu_runqueue; - +struct device_node; + +enum spu_utilization_state { + SPU_UTIL_USER, + SPU_UTIL_SYSTEM, + SPU_UTIL_IOWAIT, + SPU_UTIL_IDLE_LOADED, + SPU_UTIL_MAX +}; struct spu { char *name; @@ -112,10 +120,10 @@ struct spu { u8 *local_store; unsigned long problem_phys; struct spu_problem __iomem *problem; - struct spu_priv1 __iomem *priv1; /* obsolete */ + struct spu_priv1 __iomem *priv1; struct spu_priv2 __iomem *priv2; - struct list_head list; - struct list_head sched_list; + struct list_head list; /* obsolete */ + struct list_head sched_list; /* obsolete */ int number; int nid; /* obsolete */ unsigned int irqs[3]; @@ -131,8 +139,8 @@ struct spu { struct spu_runqueue *rq; unsigned long long timestamp; pid_t pid; - int prio; - int class_0_pending; + int prio; /* obsolete */ + int class_0_pending; /* replaced by _value */ spinlock_t register_lock; void (* wbox_callback)(struct spu *spu); @@ -146,37 +154,77 @@ struct spu { struct sys_device sysdev; - /* Additions for RHEL5U1 */ + /* Additions for RHEL5U2 */ #ifndef __GENKSYMS__ + enum { SPU_FREE, SPU_USED } alloc_state; + u64 class_0_pending_value; + void (* dma_callback)(struct spu *spu, int type); pid_t tgid; int has_mem_affinity; struct list_head aff_list; - struct list_head be_list; + struct list_head cbe_list; struct list_head full_list; void* pdata; /* platform private data */ + + u64 spe_id; + + /* of based platforms only */ + struct device_node *devnode; + + /* beat only */ + u64 shadow_int_mask_RW[3]; + + struct { + /* protected by interrupt reentrancy */ + enum spu_utilization_state util_state; + unsigned long long tstamp; + unsigned long long times[SPU_UTIL_MAX]; + unsigned long long vol_ctx_switch; + unsigned long long invol_ctx_switch; + unsigned long long min_flt; + unsigned long long maj_flt; + unsigned long long hash_flt; + unsigned long long slb_flt; + unsigned long long class2_intr; + unsigned long long libassist; + } stats; + #endif }; -struct be_spu_info { +struct cbe_spu_info { + struct mutex list_mutex; struct list_head spus; - struct list_head free_spus; int n_spus; + int nr_active; atomic_t reserved_spus; }; -extern struct be_spu_info be_spu_info[]; +extern struct cbe_spu_info cbe_spu_info[]; -struct spu *spu_alloc(void); -struct spu *spu_alloc_node(int node); -struct spu *spu_alloc_spu(struct spu *spu); -void spu_free(struct spu *spu); +void spu_init_channels(struct spu *spu); int spu_irq_class_0_bottom(struct spu *spu); int spu_irq_class_1_bottom(struct spu *spu); void spu_irq_setaffinity(struct spu *spu, int cpu); +#ifdef CONFIG_KEXEC +void crash_register_spus(struct list_head *list); +#else +static inline void crash_register_spus(struct list_head *list) +{ +} +#endif + +extern void spu_invalidate_slbs(struct spu *spu); +extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); + +/* Calls from the memory management to the SPU */ +struct mm_struct; +extern void spu_flush_all_slbs(struct mm_struct *mm); + /* This interface allows a profiler (e.g., OProfile) to store a ref * to spu context information that it creates. This caching technique * avoids the need to recreate this information after a save/restore operation. @@ -189,14 +237,7 @@ void spu_set_profile_private_kref(struct spu_context * ctx, struct kref * prof_info_kref, void (* prof_info_release) (struct kref * kref)); -void * spu_get_profile_private_kref(struct spu_context * ctx); - -extern void spu_invalidate_slbs(struct spu *spu); -extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); - -/* Calls from the memory management to the SPU */ -struct mm_struct; -extern void spu_flush_all_slbs(struct mm_struct *mm); +void *spu_get_profile_private_kref(struct spu_context *ctx); /* system callbacks from the SPU */ struct spu_syscall_block { @@ -207,19 +248,14 @@ extern long spu_sys_callback(struct spu_syscall_block *s); /* syscalls implemented in spufs */ struct file; -extern struct spufs_calls { - asmlinkage long (*create_thread)(const char __user *name, +struct spufs_calls { + long (*create_thread)(const char __user *name, unsigned int flags, mode_t mode, struct file *neighbor); - asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc, + long (*spu_run)(struct file *filp, __u32 __user *unpc, __u32 __user *ustatus); - struct module *owner; -} spufs_calls; - -/* coredump calls implemented in spufs */ -struct spu_coredump_calls { - asmlinkage int (*arch_notes_size)(void); - asmlinkage void (*arch_write_notes)(struct file *file); + int (*coredump_extra_notes_size)(void); + int (*coredump_extra_notes_write)(struct file *file, loff_t *foffset); struct module *owner; }; @@ -243,21 +279,8 @@ struct spu_coredump_calls { #define SPU_CREATE_FLAG_ALL 0x003f /* mask of all valid flags */ -#ifdef CONFIG_SPU_FS_MODULE int register_spu_syscalls(struct spufs_calls *calls); void unregister_spu_syscalls(struct spufs_calls *calls); -#else -static inline int register_spu_syscalls(struct spufs_calls *calls) -{ - return 0; -} -static inline void unregister_spu_syscalls(struct spufs_calls *calls) -{ -} -#endif /* MODULE */ - -int register_arch_coredump_calls(struct spu_coredump_calls *calls); -void unregister_arch_coredump_calls(struct spu_coredump_calls *calls); int spu_add_sysdev_attr(struct sysdev_attribute *attr); void spu_remove_sysdev_attr(struct sysdev_attribute *attr); @@ -409,6 +432,7 @@ struct spu_priv2 { #define MFC_CNTL_RESUME_DMA_QUEUE (0ull << 0) #define MFC_CNTL_SUSPEND_DMA_QUEUE (1ull << 0) #define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK (1ull << 0) +#define MFC_CNTL_SUSPEND_MASK (1ull << 4) #define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION (0ull << 8) #define MFC_CNTL_SUSPEND_IN_PROGRESS (1ull << 8) #define MFC_CNTL_SUSPEND_COMPLETE (3ull << 8) @@ -477,6 +501,7 @@ struct spu_priv1 { #define MFC_STATE1_PROBLEM_STATE_MASK 0x08ull #define MFC_STATE1_RELOCATE_MASK 0x10ull #define MFC_STATE1_MASTER_RUN_CONTROL_MASK 0x20ull +#define MFC_STATE1_TABLE_SEARCH_MASK 0x40ull u64 mfc_lpid_RW; /* 0x008 */ u64 spu_idr_RW; /* 0x010 */ u64 mfc_vr_RO; /* 0x018 */ diff --git a/include/asm-powerpc/spu_csa.h b/include/asm-powerpc/spu_csa.h index 90df203..01a3b93 100644 --- a/include/asm-powerpc/spu_csa.h +++ b/include/asm-powerpc/spu_csa.h @@ -50,6 +50,12 @@ #define SPU_STOPPED_STATUS_P_I 8 #define SPU_STOPPED_STATUS_R 9 +/* + * Definitions for software decrementer status flag. + */ +#define SPU_DECR_STATUS_RUNNING 0x1 +#define SPU_DECR_STATUS_WRAPPED 0x2 + #ifndef __ASSEMBLY__ /** * spu_reg128 - generic 128-bit register definition. @@ -63,7 +69,7 @@ struct spu_reg128 { * @gprs: Array of saved registers. * @fpcr: Saved floating point status control register. * @decr: Saved decrementer value. - * @decr_status: Indicates decrementer run status. + * @decr_status: Indicates software decrementer status flags. * @ppu_mb: Saved PPU mailbox data. * @ppuint_mb: Saved PPU interrupting mailbox data. * @tag_mask: Saved tag group mask. diff --git a/include/asm-powerpc/spu_priv1.h b/include/asm-powerpc/spu_priv1.h index 83239b1..5663789 100644 --- a/include/asm-powerpc/spu_priv1.h +++ b/include/asm-powerpc/spu_priv1.h @@ -38,7 +38,7 @@ struct spu_priv1_ops { u64 (*mfc_dar_get) (struct spu *spu); u64 (*mfc_dsisr_get) (struct spu *spu); void (*mfc_dsisr_set) (struct spu *spu, u64 dsisr); - void (*mfc_sdr_set) (struct spu *spu, u64 sdr/* Obsolete */); + void (*mfc_sdr_set) (struct spu *spu, u64 sdr /* unused */); void (*mfc_sr1_set) (struct spu *spu, u64 sr1); u64 (*mfc_sr1_get) (struct spu *spu); void (*mfc_tclass_id_set) (struct spu *spu, u64 tclass_id); @@ -178,6 +178,7 @@ struct spu_management_ops { int (*enumerate_spus)(int (*fn)(void *data)); int (*create_spu)(struct spu *spu, void *data); int (*destroy_spu)(struct spu *spu); + int (*init_affinity)(void); }; extern const struct spu_management_ops* spu_management_ops; @@ -200,6 +201,12 @@ spu_destroy_spu (struct spu *spu) return spu_management_ops->destroy_spu(spu); } +static inline int +spu_init_affinity (void) +{ + return spu_management_ops->init_affinity(); +} + /* * The declarations folowing are put here for convenience * and only intended to be used by the platform setup code. diff --git a/include/linux/elf.h b/include/linux/elf.h index 9c9efb3..d89ce5b 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -355,13 +355,6 @@ typedef struct elf64_shdr { #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ #ifndef __ASSEMBLY__ -#ifndef ARCH_HAVE_EXTRA_ELF_NOTES -static inline int arch_notes_size(void) { return 0; } -static inline void arch_write_notes(struct file *file) { } - -#define ELF_CORE_EXTRA_NOTES_SIZE arch_notes_size() -#define ELF_CORE_WRITE_EXTRA_NOTES arch_write_notes(file) -#endif /* ARCH_HAVE_EXTRA_ELF_NOTES */ /* Note header in a PT_NOTE section */ typedef struct elf32_note { @@ -393,7 +386,16 @@ extern Elf64_Dyn _DYNAMIC []; #endif -#endif /* __ASSEMBLY__ */ +/* Optional callbacks to write extra ELF notes. */ +#ifndef ARCH_HAVE_EXTRA_ELF_NOTES +static inline int elf_coredump_extra_notes_size(void) { return 0; } +static inline int elf_coredump_extra_notes_write(struct file *file, + loff_t *foffset) { return 0; } +#else +extern int elf_coredump_extra_notes_size(void); +extern int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset); +#endif +#endif /* __ASSEMBLY__ */ #endif /* _LINUX_ELF_H */