Sophie: kernel-2.6.18-128.29.1.el5 src

kernel-2.6.18-128.29.1.el5.src.rpm

From: Doug Ledford <dledford@redhat.com>
Subject: [Patch RHEL5.1] Repost: Update OFED code to 1.2
Date: Wed, 20 Jun 2007 02:44:35 -0400
Bugzilla: 225581
Message-Id: <1182321875.5625.98.camel@firewall.xsintricity.com>
Changelog: [openib] Update OFED code to 1.2


This is for bugzilla 225581.  It updates the Infiniband support to OFED
1.2 (rc5 this time instead of rc4).  As per Jeff Garzik's request, this
is an incremental patch.  However, as I pointed out in my emails, even
the incremental is rather large.  As before, I've left iSER untouched.
Also, as per Don's request, it's intended to just be added to the end of
the patches in the spec file, not inserted in the middle.

-- 
Doug Ledford <dledford@redhat.com>
              GPG KeyID: CFBFF194
              http://people.redhat.com/dledford

Infiniband specific RPMs available at
              http://people.redhat.com/dledford/Infiniband

--- linux-2.6.18.noarch/drivers/infiniband/core/addr.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/addr.c
@@ -47,6 +47,7 @@ struct addr_req {
 	struct sockaddr src_addr;
 	struct sockaddr dst_addr;
 	struct rdma_dev_addr *addr;
+	struct rdma_addr_client *client;
 	void *context;
 	void (*callback)(int status, struct sockaddr *src_addr,
 			 struct rdma_dev_addr *addr, void *context);
@@ -61,12 +62,35 @@ static LIST_HEAD(req_list);
 static DECLARE_WORK(work, process_req, NULL);
 static struct workqueue_struct *addr_wq;
 
-static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
-		     unsigned char *dst_dev_addr)
+void rdma_addr_register_client(struct rdma_addr_client *client)
+{
+	atomic_set(&client->refcount, 1);
+	init_completion(&client->comp);
+}
+EXPORT_SYMBOL(rdma_addr_register_client);
+
+static inline void put_client(struct rdma_addr_client *client)
+{
+	if (atomic_dec_and_test(&client->refcount))
+		complete(&client->comp);
+}
+
+void rdma_addr_unregister_client(struct rdma_addr_client *client)
+{
+	put_client(client);
+	wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(rdma_addr_unregister_client);
+
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+		     const unsigned char *dst_dev_addr)
 {
 	switch (dev->type) {
 	case ARPHRD_INFINIBAND:
-		dev_addr->dev_type = IB_NODE_CA;
+		dev_addr->dev_type = RDMA_NODE_IB_CA;
+		break;
+	case ARPHRD_ETHER:
+		dev_addr->dev_type = RDMA_NODE_RNIC;
 		break;
 	default:
 		return -EADDRNOTAVAIL;
@@ -78,18 +102,19 @@ static int copy_addr(struct rdma_dev_add
 		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 	return 0;
 }
+EXPORT_SYMBOL(rdma_copy_addr);
 
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
 	struct net_device *dev;
-	u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+	__be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
 	int ret;
 
 	dev = ip_dev_find(ip);
 	if (!dev)
 		return -EADDRNOTAVAIL;
 
-	ret = copy_addr(dev_addr, dev, NULL);
+	ret = rdma_copy_addr(dev_addr, dev, NULL);
 	dev_put(dev);
 	return ret;
 }
@@ -114,7 +139,7 @@ static void queue_req(struct addr_req *r
 
 	mutex_lock(&lock);
 	list_for_each_entry_reverse(temp_req, &req_list, list) {
-		if (time_after(req->timeout, temp_req->timeout))
+		if (time_after_eq(req->timeout, temp_req->timeout))
 			break;
 	}
 
@@ -161,7 +186,7 @@ static int addr_resolve_remote(struct so
 
 	/* If the device does ARP internally, return 'done' */
 	if (rt->idev->dev->flags & IFF_NOARP) {
-		copy_addr(addr, rt->idev->dev, NULL);
+		rdma_copy_addr(addr, rt->idev->dev, NULL);
 		goto put;
 	}
 
@@ -181,7 +206,7 @@ static int addr_resolve_remote(struct so
 		src_in->sin_addr.s_addr = rt->rt_src;
 	}
 
-	ret = copy_addr(addr, neigh->dev, neigh->ha);
+	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
 release:
 	neigh_release(neigh);
 put:
@@ -200,19 +225,17 @@ static void process_req(void *data)
 
 	mutex_lock(&lock);
 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
-		if (req->status) {
+		if (req->status == -ENODATA) {
 			src_in = (struct sockaddr_in *) &req->src_addr;
 			dst_in = (struct sockaddr_in *) &req->dst_addr;
 			req->status = addr_resolve_remote(src_in, dst_in,
 							  req->addr);
+			if (req->status && time_after_eq(jiffies, req->timeout))
+				req->status = -ETIMEDOUT;
+			else if (req->status == -ENODATA)
+				continue;
 		}
-		if (req->status && time_after(jiffies, req->timeout))
-			req->status = -ETIMEDOUT;
-		else if (req->status == -ENODATA)
-			continue;
-
-		list_del(&req->list);
-		list_add_tail(&req->list, &done_list);
+		list_move_tail(&req->list, &done_list);
 	}
 
 	if (!list_empty(&req_list)) {
@@ -225,6 +248,7 @@ static void process_req(void *data)
 		list_del(&req->list);
 		req->callback(req->status, &req->src_addr, req->addr,
 			      req->context);
+		put_client(req->client);
 		kfree(req);
 	}
 }
@@ -235,7 +259,7 @@ static int addr_resolve_local(struct soc
 {
 	struct net_device *dev;
 	u32 src_ip = src_in->sin_addr.s_addr;
-	u32 dst_ip = dst_in->sin_addr.s_addr;
+	__be32 dst_ip = dst_in->sin_addr.s_addr;
 	int ret;
 
 	dev = ip_dev_find(dst_ip);
@@ -245,7 +269,7 @@ static int addr_resolve_local(struct soc
 	if (ZERONET(src_ip)) {
 		src_in->sin_family = dst_in->sin_family;
 		src_in->sin_addr.s_addr = dst_ip;
-		ret = copy_addr(addr, dev, dev->dev_addr);
+		ret = rdma_copy_addr(addr, dev, dev->dev_addr);
 	} else if (LOOPBACK(src_ip)) {
 		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
 		if (!ret)
@@ -260,7 +284,8 @@ static int addr_resolve_local(struct soc
 	return ret;
 }
 
-int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct rdma_addr_client *client,
+		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
@@ -281,6 +306,8 @@ int rdma_resolve_ip(struct sockaddr *src
 	req->addr = addr;
 	req->callback = callback;
 	req->context = context;
+	req->client = client;
+	atomic_inc(&client->refcount);
 
 	src_in = (struct sockaddr_in *) &req->src_addr;
 	dst_in = (struct sockaddr_in *) &req->dst_addr;
@@ -301,6 +328,7 @@ int rdma_resolve_ip(struct sockaddr *src
 		break;
 	default:
 		ret = req->status;
+		atomic_dec(&client->refcount);
 		kfree(req);
 		break;
 	}
@@ -317,8 +345,7 @@ void rdma_addr_cancel(struct rdma_dev_ad
 		if (req->addr == addr) {
 			req->status = -ECANCELED;
 			req->timeout = jiffies;
-			list_del(&req->list);
-			list_add(&req->list, &req_list);
+			list_move(&req->list, &req_list);
 			set_timeout(req->timeout);
 			break;
 		}
@@ -327,14 +354,13 @@ void rdma_addr_cancel(struct rdma_dev_ad
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 
-static int netevent_callback(struct notifier_block *self, unsigned long event, 
+static int netevent_callback(struct notifier_block *self, unsigned long event,
 	void *ctx)
 {
-	if (event == NETEVENT_NEIGH_UPDATE) {  
+	if (event == NETEVENT_NEIGH_UPDATE) {
 		struct neighbour *neigh = ctx;
 
-		if (neigh->dev->type == ARPHRD_INFINIBAND &&
-		    (neigh->nud_state & NUD_VALID)) {
+		if (neigh->nud_state & NUD_VALID) {
 			set_timeout(jiffies);
 		}
 	}
--- linux-2.6.18.noarch/drivers/infiniband/core/cache.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/cache.c
@@ -62,12 +62,13 @@ struct ib_update_work {
 
 static inline int start_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : 1;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
 }
 
 static inline int end_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+		0 : device->phys_port_cnt;
 }
 
 int ib_get_cached_gid(struct ib_device *device,
--- linux-2.6.18.noarch/drivers/infiniband/core/cma.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/cma.c
@@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/idr.h>
+#include <linux/inetdevice.h>
 
 #include <net/tcp.h>
 
@@ -43,6 +44,8 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
+#include <rdma/ib_local_sa.h>
+#include <rdma/iw_cm.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -53,10 +56,7 @@ module_param_named(tavor_quirk, tavor_qu
 MODULE_PARM_DESC(tavor_quirk, "Tavor performance quirk: limit MTU to 1K if > 0");
 
 #define CMA_CM_RESPONSE_TIMEOUT 20
-
-static int CMA_MAX_CM_RETRIES = 15;
-module_param_named(max_cm_retries, CMA_MAX_CM_RETRIES, int, 0644);
-MODULE_PARM_DESC(max_cm_retries, "How many times to retry a connection attempt");
+#define CMA_MAX_CM_RETRIES 15
 
 static void cma_add_one(struct ib_device *device);
 static void cma_remove_one(struct ib_device *device);
@@ -67,12 +67,17 @@ static struct ib_client cma_client = {
 	.remove = cma_remove_one
 };
 
+static struct ib_sa_client sa_client;
+static struct rdma_addr_client addr_client;
 static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
 static struct workqueue_struct *cma_wq;
 static DEFINE_IDR(sdp_ps);
 static DEFINE_IDR(tcp_ps);
+static DEFINE_IDR(udp_ps);
+static DEFINE_IDR(ipoib_ps);
+static int next_port;
 
 struct cma_device {
 	struct list_head	list;
@@ -117,6 +122,7 @@ struct rdma_id_private {
 	struct list_head	list;
 	struct list_head	listen_list;
 	struct cma_device	*cma_dev;
+	struct list_head	mc_list;
 
 	enum cma_state		state;
 	spinlock_t		lock;
@@ -131,18 +137,27 @@ struct rdma_id_private {
 	int			query_id;
 	union {
 		struct ib_cm_id	*ib;
+		struct iw_cm_id	*iw;
 	} cm_id;
 
-	union {
-		struct ib_cm_req_opt *req;
-	} options;
-
 	u32			seq_num;
+	u32			qkey;
 	u32			qp_num;
-	enum ib_qp_type		qp_type;
 	u8			srq;
 };
 
+struct cma_multicast {
+	struct rdma_id_private *id_priv;
+	union {
+		struct ib_sa_multicast *ib;
+	} multicast;
+	struct list_head	list;
+	void			*context;
+	struct sockaddr		addr;
+	u8			pad[sizeof(struct sockaddr_in6) -
+				    sizeof(struct sockaddr)];
+};
+
 struct cma_work {
 	struct work_struct	work;
 	struct rdma_id_private	*id;
@@ -248,6 +263,11 @@ static inline void sdp_set_ip_ver(struct
 	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
 }
 
+static inline int cma_is_ud_ps(enum rdma_port_space ps)
+{
+	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
+}
+
 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 			      struct cma_device *cma_dev)
 {
@@ -270,36 +290,62 @@ static void cma_detach_from_dev(struct r
 	id_priv->cma_dev = NULL;
 }
 
-static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
+static int cma_set_qkey(struct ib_device *device, u8 port_num,
+			enum rdma_port_space ps,
+			struct rdma_dev_addr *dev_addr, u32 *qkey)
+{
+	struct ib_sa_mcmember_rec rec;
+	int ret = 0;
+
+	switch (ps) {
+	case RDMA_PS_UDP:
+		*qkey = RDMA_UDP_QKEY;
+		break;
+	case RDMA_PS_IPOIB:
+		ib_addr_get_mgid(dev_addr, &rec.mgid);
+		ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
+		*qkey = be32_to_cpu(rec.qkey);
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	struct cma_device *cma_dev;
 	union ib_gid gid;
 	int ret = -ENODEV;
 
-	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
-
+	switch (rdma_node_get_transport(dev_addr->dev_type)) {
+	case RDMA_TRANSPORT_IB:
+		ib_addr_get_sgid(dev_addr, &gid);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+		iw_addr_get_sgid(dev_addr, &gid);
+		break;
+	default:
+		return -ENODEV;
+	}
 
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
 					 &id_priv->id.port_num, NULL);
 		if (!ret) {
-			cma_attach_to_dev(id_priv, cma_dev);
+			ret = cma_set_qkey(cma_dev->device,
+					   id_priv->id.port_num,
+					   id_priv->id.ps, dev_addr,
+					   &id_priv->qkey);
+			if (!ret)
+				cma_attach_to_dev(id_priv, cma_dev);
 			break;
 		}
 	}
 	return ret;
 }
 
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
-{
-	switch (id_priv->id.route.addr.dev_addr.dev_type) {
-	case IB_NODE_CA:
-		return cma_acquire_ib_dev(id_priv);
-	default:
-		return -ENODEV;
-	}
-}
-
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
@@ -331,31 +377,50 @@ struct rdma_cm_id *rdma_create_id(rdma_c
 	init_waitqueue_head(&id_priv->wait_remove);
 	atomic_set(&id_priv->dev_remove, 0);
 	INIT_LIST_HEAD(&id_priv->listen_list);
-	INIT_LIST_HEAD(&id_priv->list);
+	INIT_LIST_HEAD(&id_priv->mc_list);
 	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
 
 	return &id_priv->id;
 }
 EXPORT_SYMBOL(rdma_create_id);
 
-static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
 	struct ib_qp_attr qp_attr;
-	struct rdma_dev_addr *dev_addr;
-	int ret;
+	int qp_attr_mask, ret;
 
-	dev_addr = &id_priv->id.route.addr.dev_addr;
-	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
-				  ib_addr_get_pkey(dev_addr),
-				  &qp_attr.pkey_index);
+	qp_attr.qp_state = IB_QPS_INIT;
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
 	if (ret)
 		return ret;
 
+	qp_attr.qp_state = IB_QPS_RTS;
+	qp_attr.sq_psn = 0;
+	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+
+	return ret;
+}
+
+static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
 	qp_attr.qp_state = IB_QPS_INIT;
-	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
-	qp_attr.port_num = id_priv->id.port_num;
-	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
-					  IB_QP_PKEY_INDEX | IB_QP_PORT);
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 }
 
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
@@ -373,21 +438,15 @@ int rdma_create_qp(struct rdma_cm_id *id
 	if (IS_ERR(qp))
 		return PTR_ERR(qp);
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
-		ret = cma_init_ib_qp(id_priv, qp);
-		break;
-	default:
-		ret = -ENOSYS;
-		break;
-	}
-
+	if (cma_is_ud_ps(id_priv->id.ps))
+		ret = cma_init_ud_qp(id_priv, qp);
+	else
+		ret = cma_init_conn_qp(id_priv, qp);
 	if (ret)
 		goto err;
 
 	id->qp = qp;
 	id_priv->qp_num = qp->qp_num;
-	id_priv->qp_type = qp->qp_type;
 	id_priv->srq = (qp->srq != NULL);
 	return 0;
 err:
@@ -455,20 +514,56 @@ static int cma_modify_qp_err(struct rdma
 	return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
 }
 
+static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
+			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	int ret;
+
+	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
+				  ib_addr_get_pkey(dev_addr),
+				  &qp_attr->pkey_index);
+	if (ret)
+		return ret;
+
+	qp_attr->port_num = id_priv->id.port_num;
+	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+	if (cma_is_ud_ps(id_priv->id.ps)) {
+		qp_attr->qkey = id_priv->qkey;
+		*qp_attr_mask |= IB_QP_QKEY;
+	} else {
+		qp_attr->qp_access_flags = 0;
+		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
+	}
+	return 0;
+}
+
 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 		       int *qp_attr_mask)
 {
 	struct rdma_id_private *id_priv;
-	int ret;
+	int ret = 0;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
-		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
-					 qp_attr_mask);
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
+			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
+		else
+			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
+						 qp_attr_mask);
 		if (qp_attr->qp_state == IB_QPS_RTR)
 			qp_attr->rq_psn = id_priv->seq_num;
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		if (!id_priv->cm_id.iw) {
+			qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+		} else
+			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+						 qp_attr_mask);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -501,9 +596,17 @@ static inline int cma_any_addr(struct so
 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
 }
 
+static inline __be16 cma_port(struct sockaddr *addr)
+{
+	if (addr->sa_family == AF_INET)
+		return ((struct sockaddr_in *) addr)->sin_port;
+	else
+		return ((struct sockaddr_in6 *) addr)->sin6_port;
+}
+
 static inline int cma_any_port(struct sockaddr *addr)
 {
-	return !((struct sockaddr_in *) addr)->sin_port;
+	return !cma_port(addr);
 }
 
 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
@@ -585,24 +688,10 @@ static inline int cma_user_data_offset(e
 	}
 }
 
-static int cma_notify_user(struct rdma_id_private *id_priv,
-			   enum rdma_cm_event_type type, int status,
-			   void *data, u8 data_len)
-{
-	struct rdma_cm_event event;
-
-	event.event = type;
-	event.status = status;
-	event.private_data = data;
-	event.private_data_len = data_len;
-
-	return id_priv->id.event_handler(&id_priv->id, &event);
-}
-
 static void cma_cancel_route(struct rdma_id_private *id_priv)
 {
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		if (id_priv->query)
 			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
 		break;
@@ -622,11 +711,15 @@ static void cma_destroy_listen(struct rd
 	cma_exch(id_priv, CMA_DESTROYING);
 
 	if (id_priv->cma_dev) {
-		switch (id_priv->id.device->node_type) {
-		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
@@ -691,6 +784,19 @@ static void cma_release_port(struct rdma
 	mutex_unlock(&lock);
 }
 
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+	struct cma_multicast *mc;
+
+	while (!list_empty(&id_priv->mc_list)) {
+		mc = container_of(id_priv->mc_list.next,
+				  struct cma_multicast, list);
+		list_del(&mc->list);
+		ib_sa_free_multicast(mc->multicast.ib);
+		kfree(mc);
+	}
+}
+
 void rdma_destroy_id(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
@@ -703,15 +809,20 @@ void rdma_destroy_id(struct rdma_cm_id *
 	mutex_lock(&lock);
 	if (id_priv->cma_dev) {
 		mutex_unlock(&lock);
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
-	  	mutex_lock(&lock);
+		cma_leave_mc_groups(id_priv);
+		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
 	}
 	mutex_unlock(&lock);
@@ -721,7 +832,6 @@ void rdma_destroy_id(struct rdma_cm_id *
 	wait_for_completion(&id_priv->comp);
 
 	kfree(id_priv->id.route.path_rec);
-	kfree(id_priv->options.req);
 	kfree(id_priv);
 }
 EXPORT_SYMBOL(rdma_destroy_id);
@@ -760,46 +870,61 @@ static int cma_verify_rep(struct rdma_id
 	return 0;
 }
 
+static void cma_set_rep_event_data(struct rdma_cm_event *event,
+				   struct ib_cm_rep_event_param *rep_data,
+				   void *private_data)
+{
+	event->param.conn.private_data = private_data;
+	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
+	event->param.conn.responder_resources = rep_data->responder_resources;
+	event->param.conn.initiator_depth = rep_data->initiator_depth;
+	event->param.conn.flow_control = rep_data->flow_control;
+	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
+	event->param.conn.srq = rep_data->srq;
+	event->param.conn.qp_num = rep_data->remote_qpn;
+}
+
 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv = cm_id->context;
-	enum rdma_cm_event_type event;
-	u8 private_data_len = 0;
-	int ret = 0, status = 0;
+	struct rdma_cm_event event;
+	int ret = 0;
 
 	atomic_inc(&id_priv->dev_remove);
 	if (!cma_comp(id_priv, CMA_CONNECT))
 		goto out;
 
+	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
 	case IB_CM_REQ_ERROR:
 	case IB_CM_REP_ERROR:
-		event = RDMA_CM_EVENT_UNREACHABLE;
-		status = -ETIMEDOUT;
+		event.event = RDMA_CM_EVENT_UNREACHABLE;
+		event.status = -ETIMEDOUT;
 		break;
 	case IB_CM_REP_RECEIVED:
-		status = cma_verify_rep(id_priv, ib_event->private_data);
-		if (status)
-			event = RDMA_CM_EVENT_CONNECT_ERROR;
+		event.status = cma_verify_rep(id_priv, ib_event->private_data);
+		if (event.status)
+			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
 		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
-			status = cma_rep_recv(id_priv);
-			event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
-					 RDMA_CM_EVENT_ESTABLISHED;
+			event.status = cma_rep_recv(id_priv);
+			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
+						     RDMA_CM_EVENT_ESTABLISHED;
 		} else
-			event = RDMA_CM_EVENT_CONNECT_RESPONSE;
-		private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
+			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
+		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
+				       ib_event->private_data);
 		break;
 	case IB_CM_RTU_RECEIVED:
 	case IB_CM_USER_ESTABLISHED:
-		event = RDMA_CM_EVENT_ESTABLISHED;
+		event.event = RDMA_CM_EVENT_ESTABLISHED;
 		break;
 	case IB_CM_DREQ_ERROR:
-		status = -ETIMEDOUT; /* fall through */
+		event.status = -ETIMEDOUT; /* fall through */
 	case IB_CM_DREQ_RECEIVED:
 	case IB_CM_DREP_RECEIVED:
 		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
 			goto out;
-		event = RDMA_CM_EVENT_DISCONNECTED;
+		event.event = RDMA_CM_EVENT_DISCONNECTED;
 		break;
 	case IB_CM_TIMEWAIT_EXIT:
 	case IB_CM_MRA_RECEIVED:
@@ -807,9 +932,10 @@ static int cma_ib_handler(struct ib_cm_i
 		goto out;
 	case IB_CM_REJ_RECEIVED:
 		cma_modify_qp_err(&id_priv->id);
-		status = ib_event->param.rej_rcvd.reason;
-		event = RDMA_CM_EVENT_REJECTED;
-		private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
+		event.status = ib_event->param.rej_rcvd.reason;
+		event.event = RDMA_CM_EVENT_REJECTED;
+		event.param.conn.private_data = ib_event->private_data;
+		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
 		break;
 	default:
 		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
@@ -817,8 +943,7 @@ static int cma_ib_handler(struct ib_cm_i
 		goto out;
 	}
 
-	ret = cma_notify_user(id_priv, event, status, ib_event->private_data,
-			      private_data_len);
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.ib = NULL;
@@ -832,8 +957,8 @@ out:
 	return ret;
 }
 
-static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
-					  struct ib_cm_event *ib_event)
+static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
+					       struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv;
 	struct rdma_cm_id *id;
@@ -842,23 +967,25 @@ static struct rdma_id_private *cma_new_i
 	__u16 port;
 	u8 ip_ver;
 
+	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
+			     &ip_ver, &port, &src, &dst))
+		goto err;
+
 	id = rdma_create_id(listen_id->event_handler, listen_id->context,
 			    listen_id->ps);
 	if (IS_ERR(id))
-		return NULL;
+		goto err;
+
+	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
+			  ip_ver, port, src, dst);
 
 	rt = &id->route;
 	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
-	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL);
+	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
+			       GFP_KERNEL);
 	if (!rt->path_rec)
-		goto err;
-
-	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
-			     &ip_ver, &port, &src, &dst))
-		goto err;
+		goto destroy_id;
 
-	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
-			  ip_ver, port, src, dst);
 	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
 	if (rt->num_paths == 2)
 		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
@@ -866,7 +993,45 @@ static struct rdma_id_private *cma_new_i
 	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
 	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
-	rt->addr.dev_addr.dev_type = IB_NODE_CA;
+	rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	id_priv->state = CMA_CONNECT;
+	return id_priv;
+
+destroy_id:
+	rdma_destroy_id(id);
+err:
+	return NULL;
+}
+
+static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
+					      struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *id_priv;
+	struct rdma_cm_id *id;
+	union cma_ip_addr *src, *dst;
+	__u16 port;
+	u8 ip_ver;
+	int ret;
+
+	id = rdma_create_id(listen_id->event_handler, listen_id->context,
+			    listen_id->ps);
+	if (IS_ERR(id))
+		return NULL;
+
+
+	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
+			     &ip_ver, &port, &src, &dst))
+		goto err;
+
+	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
+			  ip_ver, port, src, dst);
+
+	ret = rdma_translate_ip(&id->route.addr.src_addr,
+				&id->route.addr.dev_addr);
+	if (ret)
+		goto err;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = CMA_CONNECT;
@@ -876,9 +1041,25 @@ err:
 	return NULL;
 }
 
+static void cma_set_req_event_data(struct rdma_cm_event *event,
+				   struct ib_cm_req_event_param *req_data,
+				   void *private_data, int offset)
+{
+	event->param.conn.private_data = private_data + offset;
+	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
+	event->param.conn.responder_resources = req_data->responder_resources;
+	event->param.conn.initiator_depth = req_data->initiator_depth;
+	event->param.conn.flow_control = req_data->flow_control;
+	event->param.conn.retry_count = req_data->retry_count;
+	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
+	event->param.conn.srq = req_data->srq;
+	event->param.conn.qp_num = req_data->remote_qpn;
+}
+
 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *listen_id, *conn_id;
+	struct rdma_cm_event event;
 	int offset, ret;
 
 	listen_id = cm_id->context;
@@ -888,7 +1069,19 @@ static int cma_req_handler(struct ib_cm_
 		goto out;
 	}
 
-	conn_id = cma_new_id(&listen_id->id, ib_event);
+	memset(&event, 0, sizeof event);
+	offset = cma_user_data_offset(listen_id->id.ps);
+	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
+	if (cma_is_ud_ps(listen_id->id.ps)) {
+		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
+		event.param.ud.private_data = ib_event->private_data + offset;
+		event.param.ud.private_data_len =
+				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
+	} else {
+		conn_id = cma_new_conn_id(&listen_id->id, ib_event);
+		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
+				       ib_event->private_data, offset);
+	}
 	if (!conn_id) {
 		ret = -ENOMEM;
 		goto out;
@@ -896,31 +1089,27 @@ static int cma_req_handler(struct ib_cm_
 
 	atomic_inc(&conn_id->dev_remove);
 	mutex_lock(&lock);
-	ret = cma_acquire_ib_dev(conn_id);
+	ret = cma_acquire_dev(conn_id);
 	mutex_unlock(&lock);
-	if (ret) {
-		ret = -ENODEV;
-		cma_exch(conn_id, CMA_DESTROYING);
-		cma_release_remove(conn_id);
-		rdma_destroy_id(&conn_id->id);
-		goto out;
-	}
+	if (ret)
+		goto release_conn_id;
 
 	conn_id->cm_id.ib = cm_id;
 	cm_id->context = conn_id;
 	cm_id->cm_handler = cma_ib_handler;
 
-	offset = cma_user_data_offset(listen_id->id.ps);
-	ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
-			      ib_event->private_data + offset,
-			      IB_CM_REQ_PRIVATE_DATA_SIZE - offset);
-	if (ret) {
-		/* Destroy the CM ID by returning a non-zero value. */
-		conn_id->cm_id.ib = NULL;
-		cma_exch(conn_id, CMA_DESTROYING);
-		cma_release_remove(conn_id);
-		rdma_destroy_id(&conn_id->id);
-	}
+	ret = conn_id->id.event_handler(&conn_id->id, &event);
+	if (!ret)
+		goto out;
+
+	/* Destroy the CM ID by returning a non-zero value. */
+	conn_id->cm_id.ib = NULL;
+
+release_conn_id:
+	cma_exch(conn_id, CMA_DESTROYING);
+	cma_release_remove(conn_id);
+	rdma_destroy_id(&conn_id->id);
+
 out:
 	cma_release_remove(listen_id);
 	return ret;
@@ -928,8 +1117,7 @@ out:
 
 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
 {
-	return cpu_to_be64(((u64)ps << 16) +
-	       be16_to_cpu(((struct sockaddr_in *) addr)->sin_port));
+	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
 }
 
 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
@@ -982,61 +1170,235 @@ static void cma_set_compare_data(enum rd
 	}
 }
 
-static int cma_ib_listen(struct rdma_id_private *id_priv)
+static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 {
-	struct ib_cm_compare_data compare_data;
-	struct sockaddr *addr;
-	__be64 svc_id;
-	int ret;
+	struct rdma_id_private *id_priv = iw_id->context;
+	struct rdma_cm_event event;
+	struct sockaddr_in *sin;
+	int ret = 0;
 
-	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
-					    id_priv);
-	if (IS_ERR(id_priv->cm_id.ib))
-		return PTR_ERR(id_priv->cm_id.ib);
+	memset(&event, 0, sizeof event);
+	atomic_inc(&id_priv->dev_remove);
 
-	addr = &id_priv->id.route.addr.src_addr;
-	svc_id = cma_get_service_id(id_priv->id.ps, addr);
-	if (cma_any_addr(addr))
-		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
-	else {
-		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
-		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CLOSE:
+		event.event = RDMA_CM_EVENT_DISCONNECTED;
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+		*sin = iw_event->local_addr;
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+		*sin = iw_event->remote_addr;
+		switch (iw_event->status) {
+		case 0:
+			event.event = RDMA_CM_EVENT_ESTABLISHED;
+			break;
+		case -ECONNRESET:
+		case -ECONNREFUSED:
+			event.event = RDMA_CM_EVENT_REJECTED;
+			break;
+		case -ETIMEDOUT:
+			event.event = RDMA_CM_EVENT_UNREACHABLE;
+			break;
+		default:
+			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
+			break;
+		}
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		event.event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	default:
+		BUG_ON(1);
 	}
 
+	event.status = iw_event->status;
+	event.param.conn.private_data = iw_event->private_data;
+	event.param.conn.private_data_len = iw_event->private_data_len;
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
 	if (ret) {
-		ib_destroy_cm_id(id_priv->cm_id.ib);
-		id_priv->cm_id.ib = NULL;
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.iw = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
 	}
 
+	cma_release_remove(id_priv);
 	return ret;
 }
 
-static int cma_listen_handler(struct rdma_cm_id *id,
-			      struct rdma_cm_event *event)
-{
-	struct rdma_id_private *id_priv = id->context;
-
-	id->context = id_priv->id.context;
-	id->event_handler = id_priv->id.event_handler;
-	return id_priv->id.event_handler(id, event);
-}
-
-static void cma_listen_on_dev(struct rdma_id_private *id_priv,
-			      struct cma_device *cma_dev)
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+			       struct iw_cm_event *iw_event)
 {
-	struct rdma_id_private *dev_id_priv;
-	struct rdma_cm_id *id;
+	struct rdma_cm_id *new_cm_id;
+	struct rdma_id_private *listen_id, *conn_id;
+	struct sockaddr_in *sin;
+	struct net_device *dev = NULL;
+	struct rdma_cm_event event;
 	int ret;
+	struct ib_device_attr attr;
 
-	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
-	if (IS_ERR(id))
-		return;
-
-	dev_id_priv = container_of(id, struct rdma_id_private, id);
+	listen_id = cm_id->context;
+	atomic_inc(&listen_id->dev_remove);
+	if (!cma_comp(listen_id, CMA_LISTEN)) {
+		ret = -ECONNABORTED;
+		goto out;
+	}
 
-	dev_id_priv->state = CMA_ADDR_BOUND;
-	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
-	       ip_addr_size(&id_priv->id.route.addr.src_addr));
+	/* Create a new RDMA id for the new IW CM ID */
+	new_cm_id = rdma_create_id(listen_id->id.event_handler,
+				   listen_id->id.context,
+				   RDMA_PS_TCP);
+	if (!new_cm_id) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+	atomic_inc(&conn_id->dev_remove);
+	conn_id->state = CMA_CONNECT;
+
+	dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
+	if (!dev) {
+		ret = -EADDRNOTAVAIL;
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	mutex_lock(&lock);
+	ret = cma_acquire_dev(conn_id);
+	mutex_unlock(&lock);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	conn_id->cm_id.iw = cm_id;
+	cm_id->context = conn_id;
+	cm_id->cm_handler = cma_iw_handler;
+
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+	*sin = iw_event->local_addr;
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+	*sin = iw_event->remote_addr;
+
+	ret = ib_query_device(conn_id->id.device, &attr);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+	
+	memset(&event, 0, sizeof event);
+	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
+	event.param.conn.private_data = iw_event->private_data;
+	event.param.conn.private_data_len = iw_event->private_data_len;
+	event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;	
+	event.param.conn.responder_resources = attr.max_qp_rd_atom;
+	ret = conn_id->id.event_handler(&conn_id->id, &event);
+	if (ret) {
+		/* User wants to destroy the CM ID */
+		conn_id->cm_id.iw = NULL;
+		cma_exch(conn_id, CMA_DESTROYING);
+		cma_release_remove(conn_id);
+		rdma_destroy_id(&conn_id->id);
+	}
+
+out:
+	if (dev)
+		dev_put(dev);
+	cma_release_remove(listen_id);
+	return ret;
+}
+
+static int cma_ib_listen(struct rdma_id_private *id_priv)
+{
+	struct ib_cm_compare_data compare_data;
+	struct sockaddr *addr;
+	__be64 svc_id;
+	int ret;
+
+	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.ib))
+		return PTR_ERR(id_priv->cm_id.ib);
+
+	addr = &id_priv->id.route.addr.src_addr;
+	svc_id = cma_get_service_id(id_priv->id.ps, addr);
+	if (cma_any_addr(addr))
+		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
+	else {
+		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
+		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
+	}
+
+	if (ret) {
+		ib_destroy_cm_id(id_priv->cm_id.ib);
+		id_priv->cm_id.ib = NULL;
+	}
+
+	return ret;
+}
+
+static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
+{
+	int ret;
+	struct sockaddr_in *sin;
+
+	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+					    iw_conn_req_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.iw))
+		return PTR_ERR(id_priv->cm_id.iw);
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	id_priv->cm_id.iw->local_addr = *sin;
+
+	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
+
+	if (ret) {
+		iw_destroy_cm_id(id_priv->cm_id.iw);
+		id_priv->cm_id.iw = NULL;
+	}
+
+	return ret;
+}
+
+static int cma_listen_handler(struct rdma_cm_id *id,
+			      struct rdma_cm_event *event)
+{
+	struct rdma_id_private *id_priv = id->context;
+
+	id->context = id_priv->id.context;
+	id->event_handler = id_priv->id.event_handler;
+	return id_priv->id.event_handler(id, event);
+}
+
+static void cma_listen_on_dev(struct rdma_id_private *id_priv,
+			      struct cma_device *cma_dev)
+{
+	struct rdma_id_private *dev_id_priv;
+	struct rdma_cm_id *id;
+	int ret;
+
+	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
+	if (IS_ERR(id))
+		return;
+
+	dev_id_priv = container_of(id, struct rdma_id_private, id);
+
+	dev_id_priv->state = CMA_ADDR_BOUND;
+	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
+	       ip_addr_size(&id_priv->id.route.addr.src_addr));
 
 	cma_attach_to_dev(dev_id_priv, cma_dev);
 	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
@@ -1087,12 +1449,17 @@ int rdma_listen(struct rdma_cm_id *id, i
 
 	id_priv->backlog = backlog;
 	if (id->device) {
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
 			ret = cma_ib_listen(id_priv);
 			if (ret)
 				goto err;
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			ret = cma_iw_listen(id_priv, backlog);
+			if (ret)
+				goto err;
+			break;
 		default:
 			ret = -ENOSYS;
 			goto err;
@@ -1123,6 +1490,7 @@ static void cma_query_handler(int status
 		work->old_state = CMA_ROUTE_QUERY;
 		work->new_state = CMA_ADDR_RESOLVED;
 		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+		work->event.status = status;
 	}
 
 	queue_work(cma_wq, &work->work);
@@ -1139,25 +1507,27 @@ static int cma_query_ib_route(struct rdm
 	ib_addr_get_dgid(addr, &path_rec.dgid);
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
 	path_rec.numb_path = 1;
+	path_rec.reversible = 1;
 
 	if (tavor_quirk) {
 		path_rec.mtu_selector = IB_SA_LT;
 		path_rec.mtu = IB_MTU_2048;
 	}
 
-	id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
+	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
 				id_priv->id.port_num, &path_rec,
 				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
-				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
+				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
+				IB_SA_PATH_REC_REVERSIBLE,
 				timeout_ms, GFP_KERNEL,
 				cma_query_handler, work, &id_priv->query);
 
 	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
 }
 
-static void cma_work_handler(void *data)
+static void cma_work_handler(void *_work)
 {
-	struct cma_work *work = data;
+	struct cma_work *work = _work;
 	struct rdma_id_private *id_priv = work->id;
 	int destroy = 0;
 
@@ -1180,6 +1550,7 @@ out:
 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
 {
 	struct rdma_route *route = &id_priv->id.route;
+	struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
 	struct cma_work *work;
 	int ret;
 
@@ -1188,7 +1559,7 @@ static int cma_resolve_ib_route(struct r
 		return -ENOMEM;
 
 	work->id = id_priv;
-	INIT_WORK(&work->work, cma_work_handler, work);
+	INIT_WORK(&work->work, cma_work_handler, (void *)work);
 	work->old_state = CMA_ROUTE_QUERY;
 	work->new_state = CMA_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
@@ -1199,10 +1570,20 @@ static int cma_resolve_ib_route(struct r
 		goto err1;
 	}
 
-	ret = cma_query_ib_route(id_priv, timeout_ms, work);
-	if (ret)
-		goto err2;
-
+	ib_addr_get_sgid(addr, &route->path_rec->sgid);
+	ib_addr_get_dgid(addr, &route->path_rec->dgid);
+	ret = ib_get_path_rec(id_priv->id.device, id_priv->id.port_num,
+			      &route->path_rec->sgid, &route->path_rec->dgid,
+			      ib_addr_get_pkey(addr), route->path_rec);
+	if (!ret) {
+		route->num_paths = 1;
+		queue_work(cma_wq, &work->work);
+	} else {
+		if (ret == -ENODATA)
+			ret = cma_query_ib_route(id_priv, timeout_ms, work);
+		if (ret)
+			goto err2;
+	}
 	return 0;
 err2:
 	kfree(route->path_rec);
@@ -1236,64 +1617,22 @@ err:
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);
 
-static inline u8 cma_get_ib_remote_timeout(struct rdma_id_private *id_priv)
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
 {
-	return	id_priv->options.req ?
-		id_priv->options.req->remote_cm_response_timeout :
-		CMA_CM_RESPONSE_TIMEOUT;
-}
-
-static inline u8 cma_get_ib_local_timeout(struct rdma_id_private *id_priv)
-{
-	return	id_priv->options.req ?
-		id_priv->options.req->local_cm_response_timeout :
-		CMA_CM_RESPONSE_TIMEOUT;
-}
-
-static inline u8 cma_get_ib_cm_retries(struct rdma_id_private *id_priv)
-{
-	return	id_priv->options.req ?
-		id_priv->options.req->max_cm_retries : CMA_MAX_CM_RETRIES;
-}
-
-int rdma_get_ib_req_info(struct rdma_cm_id *id, struct ib_cm_req_opt *info)
-{
-	struct rdma_id_private *id_priv;
-
-	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp(id_priv, CMA_ROUTE_RESOLVED))
-		return -EINVAL;
-
-	info->remote_cm_response_timeout = cma_get_ib_remote_timeout(id_priv);
-	info->local_cm_response_timeout = cma_get_ib_local_timeout(id_priv);
-	info->max_cm_retries = cma_get_ib_cm_retries(id_priv);
-	return 0;
-}
-EXPORT_SYMBOL(rdma_get_ib_req_info);
-
-int rdma_set_ib_req_info(struct rdma_cm_id *id, struct ib_cm_req_opt *info)
-{
-	struct rdma_id_private *id_priv;
-
-	if (info->remote_cm_response_timeout > 0x1F ||
-	    info->local_cm_response_timeout > 0x1F ||
-	    info->max_cm_retries > 0xF)
-		return -EINVAL;
-
-	id_priv = container_of(id, struct rdma_id_private, id);
-	if (!cma_comp(id_priv, CMA_ROUTE_RESOLVED))
-		return -EINVAL;
+	struct cma_work *work;
 
-	if (!id_priv->options.req) {
-		id_priv->options.req = kmalloc(sizeof *info, GFP_KERNEL);
-		if (!id_priv->options.req)
-			return -ENOMEM;
-	}
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
 
-	*id_priv->options.req = *info;
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler, work);
+	work->old_state = CMA_ROUTE_QUERY;
+	work->new_state = CMA_ROUTE_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+	queue_work(cma_wq, &work->work);
 	return 0;
 }
-EXPORT_SYMBOL(rdma_set_ib_req_info);
 
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 {
@@ -1305,10 +1644,13 @@ int rdma_resolve_route(struct rdma_cm_id
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_resolve_ib_route(id_priv, timeout_ms);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_resolve_iw_route(id_priv, timeout_ms);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1334,19 +1676,18 @@ static int cma_bind_loopback(struct rdma
 	u8 p;
 
 	mutex_lock(&lock);
+	if (list_empty(&dev_list)) {
+		ret = -ENODEV;
+		goto out;
+	}
 	list_for_each_entry(cma_dev, &dev_list, list)
 		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
-			if (!ib_query_port (cma_dev->device, p, &port_attr) &&
+			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
 			    port_attr.state == IB_PORT_ACTIVE)
 				goto port_found;
 
-	if (!list_empty(&dev_list)) {
-		p = 1;
-		cma_dev = list_entry(dev_list.next, struct cma_device, list);
-	} else {
-		ret = -ENODEV;
-		goto out;
-	}
+	p = 1;
+	cma_dev = list_entry(dev_list.next, struct cma_device, list);
 
 port_found:
 	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
@@ -1370,8 +1711,9 @@ static void addr_handler(int status, str
 			 struct rdma_dev_addr *dev_addr, void *context)
 {
 	struct rdma_id_private *id_priv = context;
-	enum rdma_cm_event_type event;
+	struct rdma_cm_event event;
 
+	memset(&event, 0, sizeof event);
 	atomic_inc(&id_priv->dev_remove);
 
 	/*
@@ -1391,14 +1733,15 @@ static void addr_handler(int status, str
 	if (status) {
 		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
 			goto out;
-		event = RDMA_CM_EVENT_ADDR_ERROR;
+		event.event = RDMA_CM_EVENT_ADDR_ERROR;
+		event.status = status;
 	} else {
 		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
 		       ip_addr_size(src_addr));
-		event = RDMA_CM_EVENT_ADDR_RESOLVED;
+		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 	}
 
-	if (cma_notify_user(id_priv, event, status, NULL, 0)) {
+	if (id_priv->id.event_handler(&id_priv->id, &event)) {
 		cma_exch(id_priv, CMA_DESTROYING);
 		cma_release_remove(id_priv);
 		cma_deref_id(id_priv);
@@ -1479,8 +1822,8 @@ int rdma_resolve_addr(struct rdma_cm_id 
 	if (cma_any_addr(dst_addr))
 		ret = cma_resolve_loopback(id_priv);
 	else
-		ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr,
-				      &id->route.addr.dev_addr,
+		ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
+				      dst_addr, &id->route.addr.dev_addr,
 				      timeout_ms, addr_handler, id_priv);
 	if (ret)
 		goto err;
@@ -1508,33 +1851,74 @@ static int cma_alloc_port(struct idr *ps
 			  unsigned short snum)
 {
 	struct rdma_bind_list *bind_list;
-	int port, start, ret;
+	int port, ret;
 
 	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
 	if (!bind_list)
 		return -ENOMEM;
 
-	start = snum ? snum : sysctl_local_port_range[0];
+	do {
+		ret = idr_get_new_above(ps, bind_list, snum, &port);
+	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
+
+	if (ret)
+		goto err1;
+
+	if (port != snum) {
+		ret = -EADDRNOTAVAIL;
+		goto err2;
+	}
 
+	bind_list->ps = ps;
+	bind_list->port = (unsigned short) port;
+	cma_bind_port(bind_list, id_priv);
+	return 0;
+err2:
+	idr_remove(ps, port);
+err1:
+	kfree(bind_list);
+	return ret;
+}
+
+static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
+{
+	struct rdma_bind_list *bind_list;
+	int port, ret;
+
+	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
+	if (!bind_list)
+		return -ENOMEM;
+
+retry:
 	do {
-		ret = idr_get_new_above(ps, bind_list, start, &port);
+		ret = idr_get_new_above(ps, bind_list, next_port, &port);
 	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
 
 	if (ret)
-		goto err;
+		goto err1;
 
-	if ((snum && port != snum) ||
-	    (!snum && port > sysctl_local_port_range[1])) {
-		idr_remove(ps, port);
+	if (port > sysctl_local_port_range[1]) {
+		if (next_port != sysctl_local_port_range[0]) {
+			idr_remove(ps, port);
+			next_port = sysctl_local_port_range[0];
+			goto retry;
+		}
 		ret = -EADDRNOTAVAIL;
-		goto err;
+		goto err2;
 	}
 
+	if (port == sysctl_local_port_range[1])
+		next_port = sysctl_local_port_range[0];
+	else
+		next_port = port + 1;
+
 	bind_list->ps = ps;
 	bind_list->port = (unsigned short) port;
 	cma_bind_port(bind_list, id_priv);
 	return 0;
-err:
+err2:
+	idr_remove(ps, port);
+err1:
 	kfree(bind_list);
 	return ret;
 }
@@ -1566,7 +1950,7 @@ static int cma_use_port(struct idr *ps, 
 	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
 		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
 			return -EADDRNOTAVAIL;
-		
+
 		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
 		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
 			return -EADDRINUSE;
@@ -1588,13 +1972,19 @@ static int cma_get_port(struct rdma_id_p
 	case RDMA_PS_TCP:
 		ps = &tcp_ps;
 		break;
+	case RDMA_PS_UDP:
+		ps = &udp_ps;
+		break;
+	case RDMA_PS_IPOIB:
+		ps = &ipoib_ps;
+		break;
 	default:
 		return -EPROTONOSUPPORT;
 	}
 
 	mutex_lock(&lock);
 	if (cma_any_port(&id_priv->id.route.addr.src_addr))
-		ret = cma_alloc_port(ps, id_priv, 0);
+		ret = cma_alloc_any_port(ps, id_priv);
 	else
 		ret = cma_use_port(ps, id_priv);
 	mutex_unlock(&lock);
@@ -1616,22 +2006,29 @@ int rdma_bind_addr(struct rdma_cm_id *id
 
 	if (!cma_any_addr(addr)) {
 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
-		if (!ret) {
-			mutex_lock(&lock);
-			ret = cma_acquire_dev(id_priv);
-			mutex_unlock(&lock);
-		}
 		if (ret)
-			goto err;
+			goto err1;
+
+		mutex_lock(&lock);
+		ret = cma_acquire_dev(id_priv);
+		mutex_unlock(&lock);
+		if (ret)
+			goto err1;
 	}
 
 	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
 	ret = cma_get_port(id_priv);
 	if (ret)
-		goto err;
+		goto err2;
 
 	return 0;
-err:
+err2:
+	if (!cma_any_addr(addr)) {
+		mutex_lock(&lock);
+		cma_detach_from_dev(id_priv);
+		mutex_unlock(&lock);
+	}
+err1:
 	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
 	return ret;
 }
@@ -1669,6 +2066,110 @@ static int cma_format_hdr(void *hdr, enu
 	return 0;
 }
 
+static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
+				struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *id_priv = cm_id->context;
+	struct rdma_cm_event event;
+	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
+	int ret = 0;
+
+	memset(&event, 0, sizeof event);
+	atomic_inc(&id_priv->dev_remove);
+	if (!cma_comp(id_priv, CMA_CONNECT))
+		goto out;
+
+	switch (ib_event->event) {
+	case IB_CM_SIDR_REQ_ERROR:
+		event.event = RDMA_CM_EVENT_UNREACHABLE;
+		event.status = -ETIMEDOUT;
+		break;
+	case IB_CM_SIDR_REP_RECEIVED:
+		event.param.ud.private_data = ib_event->private_data;
+		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
+		if (rep->status != IB_SIDR_SUCCESS) {
+			event.event = RDMA_CM_EVENT_UNREACHABLE;
+			event.status = ib_event->param.sidr_rep_rcvd.status;
+			break;
+		}
+		if (id_priv->qkey != rep->qkey) {
+			event.event = RDMA_CM_EVENT_UNREACHABLE;
+			event.status = -EINVAL;
+			break;
+		}
+		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
+				     id_priv->id.route.path_rec,
+				     &event.param.ud.ah_attr);
+		event.param.ud.qp_num = rep->qpn;
+		event.param.ud.qkey = rep->qkey;
+		event.event = RDMA_CM_EVENT_ESTABLISHED;
+		event.status = 0;
+		break;
+	default:
+		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
+		       ib_event->event);
+		goto out;
+	}
+
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.ib = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
+	}
+out:
+	cma_release_remove(id_priv);
+	return ret;
+}
+
+static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
+			      struct rdma_conn_param *conn_param)
+{
+	struct ib_cm_sidr_req_param req;
+	struct rdma_route *route;
+	int ret;
+
+	req.private_data_len = sizeof(struct cma_hdr) +
+			       conn_param->private_data_len;
+	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+	if (!req.private_data)
+		return -ENOMEM;
+
+	if (conn_param->private_data && conn_param->private_data_len)
+		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
+		       conn_param->private_data, conn_param->private_data_len);
+
+	route = &id_priv->id.route;
+	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
+	if (ret)
+		goto out;
+
+	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
+					    cma_sidr_rep_handler, id_priv);
+	if (IS_ERR(id_priv->cm_id.ib)) {
+		ret = PTR_ERR(id_priv->cm_id.ib);
+		goto out;
+	}
+
+	req.path = route->path_rec;
+	req.service_id = cma_get_service_id(id_priv->id.ps,
+					    &route->addr.dst_addr);
+	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
+	req.max_cm_retries = CMA_MAX_CM_RETRIES;
+
+	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
+	if (ret) {
+		ib_destroy_cm_id(id_priv->cm_id.ib);
+		id_priv->cm_id.ib = NULL;
+	}
+out:
+	kfree(req.private_data);
+	return ret;
+}
+
 static int cma_connect_ib(struct rdma_id_private *id_priv,
 			  struct rdma_conn_param *conn_param)
 {
@@ -1708,16 +2209,16 @@ static int cma_connect_ib(struct rdma_id
 	req.service_id = cma_get_service_id(id_priv->id.ps,
 					    &route->addr.dst_addr);
 	req.qp_num = id_priv->qp_num;
-	req.qp_type = id_priv->qp_type;
+	req.qp_type = IB_QPT_RC;
 	req.starting_psn = id_priv->seq_num;
 	req.responder_resources = conn_param->responder_resources;
 	req.initiator_depth = conn_param->initiator_depth;
 	req.flow_control = conn_param->flow_control;
 	req.retry_count = conn_param->retry_count;
 	req.rnr_retry_count = conn_param->rnr_retry_count;
-	req.remote_cm_response_timeout = cma_get_ib_remote_timeout(id_priv);
-	req.local_cm_response_timeout = cma_get_ib_local_timeout(id_priv);
-	req.max_cm_retries = cma_get_ib_cm_retries(id_priv);
+	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
+	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
+	req.max_cm_retries = CMA_MAX_CM_RETRIES;
 	req.srq = id_priv->srq ? 1 : 0;
 
 	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
@@ -1731,6 +2232,49 @@ out:
 	return ret;
 }
 
+static int cma_connect_iw(struct rdma_id_private *id_priv,
+			  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_id *cm_id;
+	struct sockaddr_in* sin;
+	int ret;
+	struct iw_cm_conn_param iw_param;
+
+	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
+	if (IS_ERR(cm_id)) {
+		ret = PTR_ERR(cm_id);
+		goto out;
+	}
+
+	id_priv->cm_id.iw = cm_id;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+	cm_id->local_addr = *sin;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+	cm_id->remote_addr = *sin;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		goto out;
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp)
+		iw_param.qpn = id_priv->qp_num;
+	else
+		iw_param.qpn = conn_param->qp_num;
+	ret = iw_cm_connect(cm_id, &iw_param);
+out:
+	if (ret && !IS_ERR(cm_id)) {
+		iw_destroy_cm_id(cm_id);
+		id_priv->cm_id.iw = NULL;
+	}
+	return ret;
+}
+
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@@ -1742,13 +2286,18 @@ int rdma_connect(struct rdma_cm_id *id, 
 
 	if (!id->qp) {
 		id_priv->qp_num = conn_param->qp_num;
-		id_priv->qp_type = conn_param->qp_type;
 		id_priv->srq = conn_param->srq;
 	}
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
-		ret = cma_connect_ib(id_priv, conn_param);
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		if (cma_is_ud_ps(id->ps))
+			ret = cma_resolve_ib_udp(id_priv, conn_param);
+		else
+			ret = cma_connect_ib(id_priv, conn_param);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_connect_iw(id_priv, conn_param);
 		break;
 	default:
 		ret = -ENOSYS;
@@ -1795,7 +2344,7 @@ static int cma_accept_ib(struct rdma_id_
 	rep.private_data_len = conn_param->private_data_len;
 	rep.responder_resources = conn_param->responder_resources;
 	rep.initiator_depth = conn_param->initiator_depth;
-	rep.target_ack_delay = cma_get_ib_local_timeout(id_priv);
+	rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
 	rep.failover_accepted = 0;
 	rep.flow_control = conn_param->flow_control;
 	rep.rnr_retry_count = conn_param->rnr_retry_count;
@@ -1806,6 +2355,46 @@ out:
 	return ret;
 }
 
+static int cma_accept_iw(struct rdma_id_private *id_priv,
+		  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_conn_param iw_param;
+	int ret;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		return ret;
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp) {
+		iw_param.qpn = id_priv->qp_num;
+	} else
+		iw_param.qpn = conn_param->qp_num;
+
+	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
+}
+
+static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
+			     enum ib_cm_sidr_status status,
+			     const void *private_data, int private_data_len)
+{
+	struct ib_cm_sidr_rep_param rep;
+
+	memset(&rep, 0, sizeof rep);
+	rep.status = status;
+	if (status == IB_SIDR_SUCCESS) {
+		rep.qp_num = id_priv->qp_num;
+		rep.qkey = id_priv->qkey;
+	}
+	rep.private_data = private_data;
+	rep.private_data_len = private_data_len;
+
+	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
+}
+
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@@ -1817,17 +2406,23 @@ int rdma_accept(struct rdma_cm_id *id, s
 
 	if (!id->qp && conn_param) {
 		id_priv->qp_num = conn_param->qp_num;
-		id_priv->qp_type = conn_param->qp_type;
 		id_priv->srq = conn_param->srq;
 	}
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
-		if (conn_param)
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		if (cma_is_ud_ps(id->ps))
+			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+						conn_param->private_data,
+						conn_param->private_data_len);
+		else if (conn_param)
 			ret = cma_accept_ib(id_priv, conn_param);
 		else
 			ret = cma_rep_recv(id_priv);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_accept_iw(id_priv, conn_param);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1844,7 +2439,7 @@ reject:
 }
 EXPORT_SYMBOL(rdma_accept);
 
-int rdma_establish(struct rdma_cm_id *id)
+int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
@@ -1854,8 +2449,8 @@ int rdma_establish(struct rdma_cm_id *id
 		return -EINVAL;
 
 	switch (id->device->node_type) {
-	case IB_NODE_CA:
-		ret = ib_cm_establish(id_priv->cm_id.ib);
+	case RDMA_NODE_IB_CA:
+		ret = ib_cm_notify(id_priv->cm_id.ib, event);
 		break;
 	default:
 		ret = 0;
@@ -1863,7 +2458,7 @@ int rdma_establish(struct rdma_cm_id *id
 	}
 	return ret;
 }
-EXPORT_SYMBOL(rdma_establish);
+EXPORT_SYMBOL(rdma_notify);
 
 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
 		u8 private_data_len)
@@ -1875,11 +2470,19 @@ int rdma_reject(struct rdma_cm_id *id, c
 	if (!cma_comp(id_priv, CMA_CONNECT))
 		return -EINVAL;
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
-		ret = ib_send_cm_rej(id_priv->cm_id.ib,
-				     IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
-				     private_data, private_data_len);
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		if (cma_is_ud_ps(id->ps))
+			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
+						private_data, private_data_len);
+		else
+			ret = ib_send_cm_rej(id_priv->cm_id.ib,
+					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
+					     0, private_data, private_data_len);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_reject(id_priv->cm_id.iw,
+				   private_data, private_data_len);
 		break;
 	default:
 		ret = -ENOSYS;
@@ -1899,17 +2502,20 @@ int rdma_disconnect(struct rdma_cm_id *i
 	    !cma_comp(id_priv, CMA_DISCONNECT))
 		return -EINVAL;
 
-	ret = cma_modify_qp_err(id);
-	if (ret)
-		goto out;
-
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_modify_qp_err(id);
+		if (ret)
+			goto out;
 		/* Initiate or respond to a disconnect. */
 		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
 			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+		break;
 	default:
+		ret = -EINVAL;
 		break;
 	}
 out:
@@ -1917,6 +2523,178 @@ out:
 }
 EXPORT_SYMBOL(rdma_disconnect);
 
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc = multicast->context;
+	struct rdma_cm_event event;
+	int ret;
+
+	id_priv = mc->id_priv;
+	atomic_inc(&id_priv->dev_remove);
+	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+		goto out;
+
+	if (!status && id_priv->id.qp)
+		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+					 multicast->rec.mlid);
+
+	memset(&event, 0, sizeof event);
+	event.status = status;
+	event.param.ud.private_data = mc->context;
+	if (!status) {
+		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
+		ib_init_ah_from_mcmember(id_priv->id.device,
+					 id_priv->id.port_num, &multicast->rec,
+					 &event.param.ud.ah_attr);
+		event.param.ud.qp_num = 0xFFFFFF;
+		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+	} else
+		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	if (ret) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return 0;
+	}
+out:
+	cma_release_remove(id_priv);
+	return 0;
+}
+
+static void cma_set_mgid(struct rdma_id_private *id_priv,
+			 struct sockaddr *addr, union ib_gid *mgid)
+{
+	unsigned char mc_map[MAX_ADDR_LEN];
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
+
+	if (cma_any_addr(addr)) {
+		memset(mgid, 0, sizeof *mgid);
+	} else if ((addr->sa_family == AF_INET6) &&
+		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+								 0xFF10A01B)) {
+		/* IPv6 address is an SA assigned MGID. */
+		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+	} else {
+		ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+		if (id_priv->id.ps == RDMA_PS_UDP)
+			mc_map[7] = 0x01;	/* Use RDMA CM signature */
+		mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
+		mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
+		*mgid = *(union ib_gid *) (mc_map + 4);
+	}
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+				 struct cma_multicast *mc)
+{
+	struct ib_sa_mcmember_rec rec;
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	ib_sa_comp_mask comp_mask;
+	int ret;
+
+	ib_addr_get_mgid(dev_addr, &rec.mgid);
+	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+				     &rec.mgid, &rec);
+	if (ret)
+		return ret;
+
+	cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
+	if (id_priv->id.ps == RDMA_PS_UDP)
+		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+	ib_addr_get_sgid(dev_addr, &rec.port_gid);
+	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+	rec.join_state = 1;
+
+	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
+		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+						id_priv->id.port_num, &rec,
+						comp_mask, GFP_KERNEL,
+						cma_ib_mc_handler, mc);
+	if (IS_ERR(mc->multicast.ib))
+		return PTR_ERR(mc->multicast.ib);
+
+	return 0;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+			void *context)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+		return -EINVAL;
+
+	mc = kmalloc(sizeof *mc, GFP_KERNEL);
+	if (!mc)
+		return -ENOMEM;
+
+	memcpy(&mc->addr, addr, ip_addr_size(addr));
+	mc->context = context;
+	mc->id_priv = id_priv;
+
+	spin_lock(&id_priv->lock);
+	list_add(&mc->list, &id_priv->mc_list);
+	spin_unlock(&id_priv->lock);
+
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_join_ib_multicast(id_priv, mc);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+
+	if (ret) {
+		spin_lock_irq(&id_priv->lock);
+		list_del(&mc->list);
+		spin_unlock_irq(&id_priv->lock);
+		kfree(mc);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(rdma_join_multicast);
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	spin_lock_irq(&id_priv->lock);
+	list_for_each_entry(mc, &id_priv->mc_list, list) {
+		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+			list_del(&mc->list);
+			spin_unlock_irq(&id_priv->lock);
+
+			if (id->qp)
+				ib_detach_mcast(id->qp,
+						&mc->multicast.ib->rec.mgid,
+						mc->multicast.ib->rec.mlid);
+			ib_sa_free_multicast(mc->multicast.ib);
+			kfree(mc);
+			return;
+		}
+	}
+	spin_unlock_irq(&id_priv->lock);
+}
+EXPORT_SYMBOL(rdma_leave_multicast);
+
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
@@ -1928,8 +2706,6 @@ static void cma_add_one(struct ib_device
 
 	cma_dev->device = device;
 	cma_dev->node_guid = device->node_guid;
-	if (!cma_dev->node_guid)
-		goto err;
 
 	init_completion(&cma_dev->comp);
 	atomic_set(&cma_dev->refcount, 1);
@@ -1941,13 +2717,11 @@ static void cma_add_one(struct ib_device
 	list_for_each_entry(id_priv, &listen_any_list, list)
 		cma_listen_on_dev(id_priv, cma_dev);
 	mutex_unlock(&lock);
-	return;
-err:
-	kfree(cma_dev);
 }
 
 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
 {
+	struct rdma_cm_event event;
 	enum cma_state state;
 
 	/* Record that we want to remove the device */
@@ -1962,18 +2736,16 @@ static int cma_remove_id_dev(struct rdma
 	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
 		return 0;
 
-	return cma_notify_user(id_priv, RDMA_CM_EVENT_DEVICE_REMOVAL,
-			       0, NULL, 0);
+	memset(&event, 0, sizeof event);
+	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
+	return id_priv->id.event_handler(&id_priv->id, &event);
 }
 
 static void cma_process_remove(struct cma_device *cma_dev)
 {
-	struct list_head remove_list;
 	struct rdma_id_private *id_priv;
 	int ret;
 
-	INIT_LIST_HEAD(&remove_list);
-
 	mutex_lock(&lock);
 	while (!list_empty(&cma_dev->id_list)) {
 		id_priv = list_entry(cma_dev->id_list.next,
@@ -1984,8 +2756,7 @@ static void cma_process_remove(struct cm
 			continue;
 		}
 
-		list_del(&id_priv->list);
-		list_add_tail(&id_priv->list, &remove_list);
+		list_del_init(&id_priv->list);
 		atomic_inc(&id_priv->refcount);
 		mutex_unlock(&lock);
 
@@ -2022,16 +2793,25 @@ static int cma_init(void)
 {
 	int ret;
 
+	get_random_bytes(&next_port, sizeof next_port);
+	next_port = ((unsigned int) next_port % 
+		    (sysctl_local_port_range[1] - sysctl_local_port_range[0])) +
+		    sysctl_local_port_range[0];
 	cma_wq = create_singlethread_workqueue("rdma_cm_wq");
 	if (!cma_wq)
 		return -ENOMEM;
 
+	ib_sa_register_client(&sa_client);
+	rdma_addr_register_client(&addr_client);
+
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
 	return 0;
 
 err:
+	rdma_addr_unregister_client(&addr_client);
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	return ret;
 }
@@ -2039,9 +2819,13 @@ err:
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	rdma_addr_unregister_client(&addr_client);
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	idr_destroy(&sdp_ps);
 	idr_destroy(&tcp_ps);
+	idr_destroy(&udp_ps);
+	idr_destroy(&ipoib_ps);
 }
 
 module_init(cma_init);
--- linux-2.6.18.noarch/drivers/infiniband/core/cm.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/cm.c
@@ -54,12 +54,16 @@ MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("InfiniBand CM");
 MODULE_LICENSE("Dual BSD/GPL");
 
-static int mra_timeout_limit = 30000;
-
-module_param(mra_timeout_limit, int, 0444);
-MODULE_PARM_DESC(mra_timeout_limit,
-                 "Limit the MRA timeout according to this value if != 0");
+#define PFX    "ib_cm: "
 
+/*
+ * Limit CM message timeouts to something reasonable:
+ * 8 seconds per message, with up to 15 retries
+ */
+static int max_timeout = 21;
+module_param(max_timeout, int, 0644);
+MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout "
+                             "(default=21, or ~8 seconds)");
 
 static void cm_add_one(struct ib_device *device);
 static void cm_remove_one(struct ib_device *device);
@@ -154,12 +158,12 @@ struct cm_id_private {
 	__be32 rq_psn;
 	int timeout_ms;
 	enum ib_mtu path_mtu;
+	__be16 pkey;
 	u8 private_data_len;
 	u8 max_cm_retries;
 	u8 peer_to_peer;
 	u8 responder_resources;
 	u8 initiator_depth;
-	u8 local_ack_timeout;
 	u8 retry_count;
 	u8 rnr_retry_count;
 	u8 service_timeout;
@@ -188,7 +192,7 @@ static int cm_alloc_msg(struct cm_id_pri
 	if (IS_ERR(ah))
 		return PTR_ERR(ah);
 
-	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 
+	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
 			       cm_id_priv->av.pkey_index,
 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC);
@@ -247,11 +251,10 @@ static void * cm_copy_private_data(const
 	if (!private_data || !private_data_len)
 		return NULL;
 
-	data = kmalloc(private_data_len, GFP_KERNEL);
+	data = kmemdup(private_data, private_data_len, GFP_KERNEL);
 	if (!data)
 		return ERR_PTR(-ENOMEM);
 
-	memcpy(data, private_data, private_data_len);
 	return data;
 }
 
@@ -316,7 +319,9 @@ static int cm_alloc_id(struct cm_id_priv
 	do {
 		spin_lock_irqsave(&cm.lock, flags);
 		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
-					next_id++, &id);
+					next_id, &id);
+		if (!ret)
+			next_id = ((unsigned) id + 1) & MAX_ID_MASK;
 		spin_unlock_irqrestore(&cm.lock, flags);
 	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
 
@@ -698,7 +703,7 @@ static void cm_enter_timewait(struct cm_
 	 * timewait before notifying the user that we've exited timewait.
 	 */
 	cm_id_priv->id.state = IB_CM_TIMEWAIT;
-	wait_time = cm_convert_to_ms(cm_id_priv->local_ack_timeout);
+	wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1);
 	queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
 			   msecs_to_jiffies(wait_time));
 	cm_id_priv->timewait_info = NULL;
@@ -898,11 +903,23 @@ static void cm_format_req(struct cm_req_
 	cm_req_set_init_depth(req_msg, param->initiator_depth);
 	cm_req_set_remote_resp_timeout(req_msg,
 				       param->remote_cm_response_timeout);
+       if (param->remote_cm_response_timeout > (u8) max_timeout) {
+               printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > "
+                      "%d, decreasing\n", param->remote_cm_response_timeout,
+                      max_timeout);
+               cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout);
+       }
 	cm_req_set_qp_type(req_msg, param->qp_type);
 	cm_req_set_flow_ctrl(req_msg, param->flow_control);
 	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
 	cm_req_set_local_resp_timeout(req_msg,
 				      param->local_cm_response_timeout);
+       if (param->local_cm_response_timeout > (u8) max_timeout) {
+               printk(KERN_WARNING PFX "req local_cm_response_timeout %d > "
+                      "%d, decreasing\n", param->local_cm_response_timeout,
+                      max_timeout);
+               cm_req_set_local_resp_timeout(req_msg, (u8) max_timeout);
+       }
 	cm_req_set_retry_count(req_msg, param->retry_count);
 	req_msg->pkey = param->primary_path->pkey;
 	cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
@@ -1012,11 +1029,17 @@ int ib_send_cm_req(struct ib_cm_id *cm_i
 				    param->primary_path->packet_life_time) * 2 +
 				 cm_convert_to_ms(
 				    param->remote_cm_response_timeout);
+       if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
+               printk(KERN_WARNING PFX "req timeout_ms %d > %d, decreasing\n",
+                      cm_id_priv->timeout_ms, cm_convert_to_ms(max_timeout));
+               cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+       }
 	cm_id_priv->max_cm_retries = param->max_cm_retries;
 	cm_id_priv->initiator_depth = param->initiator_depth;
 	cm_id_priv->responder_resources = param->responder_resources;
 	cm_id_priv->retry_count = param->retry_count;
 	cm_id_priv->path_mtu = param->primary_path->mtu;
+	cm_id_priv->pkey = param->primary_path->pkey;
 	cm_id_priv->qp_type = param->qp_type;
 
 	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
@@ -1031,8 +1054,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_i
 
 	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
 	cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
-	cm_id_priv->local_ack_timeout =
-				cm_req_get_primary_local_ack_timeout(req_msg);
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
@@ -1307,26 +1328,29 @@ static struct cm_id_private * cm_match_r
 
 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 
-	/* Check for duplicate REQ and stale connections. */
+	/* Check for possible duplicate REQ. */
 	spin_lock_irqsave(&cm.lock, flags);
 	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
-	if (!timewait_info)
-		timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
-
 	if (timewait_info) {
 		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
 					   timewait_info->work.remote_id);
-		cm_cleanup_timewait(cm_id_priv->timewait_info);
 		spin_unlock_irqrestore(&cm.lock, flags);
 		if (cur_cm_id_priv) {
 			cm_dup_req_handler(work, cur_cm_id_priv);
 			cm_deref_id(cur_cm_id_priv);
-		} else
-			cm_issue_rej(work->port, work->mad_recv_wc,
-				     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
-				     NULL, 0);
-		listen_cm_id_priv = NULL;
-		goto out;
+		}
+		return NULL;
+	}
+
+	/* Check for stale connections. */
+	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
+	if (timewait_info) {
+		cm_cleanup_timewait(cm_id_priv->timewait_info);
+		spin_unlock_irqrestore(&cm.lock, flags);
+		cm_issue_rej(work->port, work->mad_recv_wc,
+			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
+			     NULL, 0);
+		return NULL;
 	}
 
 	/* Find matching listen request. */
@@ -1372,7 +1396,7 @@ static int cm_req_handler(struct cm_work
 							    id.local_id);
 	if (IS_ERR(cm_id_priv->timewait_info)) {
 		ret = PTR_ERR(cm_id_priv->timewait_info);
-		goto error1;
+		goto destroy;
 	}
 	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
 	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
@@ -1381,7 +1405,8 @@ static int cm_req_handler(struct cm_work
 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
 	if (!listen_cm_id_priv) {
 		ret = -EINVAL;
-		goto error2;
+		kfree(cm_id_priv->timewait_info);
+		goto destroy;
 	}
 
 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1391,24 +1416,40 @@ static int cm_req_handler(struct cm_work
 
 	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
-	if (ret)
-		goto error3;
+	if (ret) {
+		ib_get_cached_gid(work->port->cm_dev->device,
+				  work->port->port_num, 0, &work->path[0].sgid);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+			       &work->path[0].sgid, sizeof work->path[0].sgid,
+			       NULL, 0);
+		goto rejected;
+	}
 	if (req_msg->alt_local_lid) {
 		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
-		if (ret)
-			goto error3;
+		if (ret) {
+			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+				       &work->path[0].sgid,
+				       sizeof work->path[0].sgid, NULL, 0);
+			goto rejected;
+		}
 	}
 	cm_id_priv->tid = req_msg->hdr.tid;
 	cm_id_priv->timeout_ms = cm_convert_to_ms(
 					cm_req_get_local_resp_timeout(req_msg));
+       if (cm_req_get_local_resp_timeout(req_msg) > (u8) max_timeout) {
+               printk(KERN_WARNING PFX "rcvd cm_local_resp_timeout %d > %d, "
+                      "decreasing used timeout_ms\n",
+                      cm_req_get_local_resp_timeout(req_msg), max_timeout);
+               cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+       }
+
 	cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
 	cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
 	cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
 	cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
 	cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
+	cm_id_priv->pkey = req_msg->pkey;
 	cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
-	cm_id_priv->local_ack_timeout =
-				cm_req_get_primary_local_ack_timeout(req_msg);
 	cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
 	cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
 	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
@@ -1418,12 +1459,11 @@ static int cm_req_handler(struct cm_work
 	cm_deref_id(listen_cm_id_priv);
 	return 0;
 
-error3:	atomic_dec(&cm_id_priv->refcount);
+rejected:
+	atomic_dec(&cm_id_priv->refcount);
 	cm_deref_id(listen_cm_id_priv);
-	cm_cleanup_timewait(cm_id_priv->timewait_info);
-error2:	kfree(cm_id_priv->timewait_info);
-	cm_id_priv->timewait_info = NULL;
-error1:	ib_destroy_cm_id(&cm_id_priv->id);
+destroy:
+	ib_destroy_cm_id(cm_id);
 	return ret;
 }
 
@@ -1713,7 +1753,7 @@ static int cm_establish_handler(struct c
 	unsigned long flags;
 	int ret;
 
-	/* See comment in ib_cm_establish about lookup. */
+	/* See comment in cm_establish about lookup. */
 	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
 	if (!cm_id_priv)
 		return -EINVAL;
@@ -2306,9 +2346,12 @@ static int cm_mra_handler(struct cm_work
 					cm_mra_get_service_timeout(mra_msg);
 	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
 		  cm_convert_to_ms(cm_id_priv->av.packet_life_time);
-
-	if (mra_timeout_limit && timeout > mra_timeout_limit)
-		timeout = mra_timeout_limit;
+       if (timeout > cm_convert_to_ms(max_timeout)) {
+               printk(KERN_WARNING PFX "calculated mra timeout %d > %d, "
+                      "decreasing used timeout_ms\n", timeout,
+                      cm_convert_to_ms(max_timeout));
+               timeout = cm_convert_to_ms(max_timeout);
+       }
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	switch (cm_id_priv->id.state) {
@@ -2402,11 +2445,16 @@ int ib_send_cm_lap(struct ib_cm_id *cm_i
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	if (cm_id->state != IB_CM_ESTABLISHED ||
-	    cm_id->lap_state != IB_CM_LAP_IDLE) {
+	    (cm_id->lap_state != IB_CM_LAP_UNINIT &&
+	     cm_id->lap_state != IB_CM_LAP_IDLE)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
+	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
+	if (ret)
+		goto out;
+
 	ret = cm_alloc_msg(cm_id_priv, &msg);
 	if (ret)
 		goto out;
@@ -2431,7 +2479,8 @@ out:	spin_unlock_irqrestore(&cm_id_priv-
 }
 EXPORT_SYMBOL(ib_send_cm_lap);
 
-static void cm_format_path_from_lap(struct ib_sa_path_rec *path,
+static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
+				    struct ib_sa_path_rec *path,
 				    struct cm_lap_msg *lap_msg)
 {
 	memset(path, 0, sizeof *path);
@@ -2443,10 +2492,10 @@ static void cm_format_path_from_lap(stru
 	path->hop_limit = lap_msg->alt_hop_limit;
 	path->traffic_class = cm_lap_get_traffic_class(lap_msg);
 	path->reversible = 1;
-	/* pkey is same as in REQ */
+	path->pkey = cm_id_priv->pkey;
 	path->sl = cm_lap_get_sl(lap_msg);
 	path->mtu_selector = IB_SA_EQ;
-	/* mtu is same as in REQ */
+	path->mtu = cm_id_priv->path_mtu;
 	path->rate_selector = IB_SA_EQ;
 	path->rate = cm_lap_get_packet_rate(lap_msg);
 	path->packet_life_time_selector = IB_SA_EQ;
@@ -2472,7 +2521,7 @@ static int cm_lap_handler(struct cm_work
 
 	param = &work->cm_event.param.lap_rcvd;
 	param->alternate_path = &work->path[0];
-	cm_format_path_from_lap(param->alternate_path, lap_msg);
+	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
 	work->cm_event.private_data = &lap_msg->private_data;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
@@ -2480,6 +2529,7 @@ static int cm_lap_handler(struct cm_work
 		goto unlock;
 
 	switch (cm_id_priv->id.lap_state) {
+	case IB_CM_LAP_UNINIT:
 	case IB_CM_LAP_IDLE:
 		break;
 	case IB_CM_MRA_LAP_SENT:
@@ -2502,6 +2552,10 @@ static int cm_lap_handler(struct cm_work
 
 	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
 	cm_id_priv->tid = lap_msg->hdr.tid;
+	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+				work->mad_recv_wc->recv_buf.grh,
+				&cm_id_priv->av);
+	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -2701,6 +2755,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id 
 	cm_id->service_id = param->service_id;
 	cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
 	cm_id_priv->timeout_ms = param->timeout_ms;
+       if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
+               printk(KERN_WARNING PFX "sidr req timeout_ms %d > %d, "
+                      "decreasing used timeout_ms\n", param->timeout_ms,
+                      cm_convert_to_ms(max_timeout));
+               cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+       }
 	cm_id_priv->max_cm_retries = param->max_cm_retries;
 	ret = cm_alloc_msg(cm_id_priv, &msg);
 	if (ret)
@@ -3040,7 +3100,7 @@ static void cm_work_handler(void *data)
 		cm_free_work(work);
 }
 
-int ib_cm_establish(struct ib_cm_id *cm_id)
+static int cm_establish(struct ib_cm_id *cm_id)
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_work *work;
@@ -3088,7 +3148,44 @@ int ib_cm_establish(struct ib_cm_id *cm_
 out:
 	return ret;
 }
-EXPORT_SYMBOL(ib_cm_establish);
+
+static int cm_migrate(struct ib_cm_id *cm_id)
+{
+	struct cm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state == IB_CM_ESTABLISHED &&
+	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
+	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
+		cm_id->lap_state = IB_CM_LAP_IDLE;
+		cm_id_priv->av = cm_id_priv->alt_av;
+	} else
+		ret = -EINVAL;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+
+int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
+{
+	int ret;
+
+	switch (event) {
+	case IB_EVENT_COMM_EST:
+		ret = cm_establish(cm_id);
+		break;
+	case IB_EVENT_PATH_MIG:
+		ret = cm_migrate(cm_id);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(ib_cm_notify);
 
 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
 			    struct ib_mad_recv_wc *mad_recv_wc)
@@ -3173,10 +3270,10 @@ static int cm_init_qp_init_attr(struct c
 	case IB_CM_ESTABLISHED:
 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
 				IB_QP_PKEY_INDEX | IB_QP_PORT;
-		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
-					   IB_ACCESS_REMOTE_WRITE;
+		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
 		if (cm_id_priv->responder_resources)
-			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
+			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
+						    IB_ACCESS_REMOTE_ATOMIC;
 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
 		qp_attr->port_num = cm_id_priv->av.port->port_num;
 		ret = 0;
@@ -3221,6 +3318,9 @@ static int cm_init_qp_rtr_attr(struct cm
 		if (cm_id_priv->alt_av.ah_attr.dlid) {
 			*qp_attr_mask |= IB_QP_ALT_PATH;
 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
+			qp_attr->alt_timeout =
+					cm_id_priv->alt_av.packet_life_time + 1;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
 		}
 		ret = 0;
@@ -3251,19 +3351,31 @@ static int cm_init_qp_rts_attr(struct cm
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
 	case IB_CM_ESTABLISHED:
-		*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
-		qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
-		if (cm_id_priv->qp_type == IB_QPT_RC) {
-			*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
-					 IB_QP_RNR_RETRY |
-					 IB_QP_MAX_QP_RD_ATOMIC;
-			qp_attr->timeout = cm_id_priv->local_ack_timeout;
-			qp_attr->retry_cnt = cm_id_priv->retry_count;
-			qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
-			qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
-		}
-		if (cm_id_priv->alt_av.ah_attr.dlid) {
-			*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
+		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
+			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
+			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
+			if (cm_id_priv->qp_type == IB_QPT_RC) {
+				*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
+						 IB_QP_RNR_RETRY |
+						 IB_QP_MAX_QP_RD_ATOMIC;
+				qp_attr->timeout =
+					cm_id_priv->av.packet_life_time + 1;
+				qp_attr->retry_cnt = cm_id_priv->retry_count;
+				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
+				qp_attr->max_rd_atomic =
+					cm_id_priv->initiator_depth;
+			}
+			if (cm_id_priv->alt_av.ah_attr.dlid) {
+				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
+				qp_attr->path_mig_state = IB_MIG_REARM;
+			}
+		} else {
+			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
+			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
+			qp_attr->alt_timeout =
+				cm_id_priv->alt_av.packet_life_time + 1;
+			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
 			qp_attr->path_mig_state = IB_MIG_REARM;
 		}
 		ret = 0;
@@ -3317,6 +3429,9 @@ static void cm_add_one(struct ib_device 
 	int ret;
 	u8 i;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
 			 device->phys_port_cnt, GFP_KERNEL);
 	if (!cm_dev)
--- linux-2.6.18.noarch/drivers/infiniband/core/cm.c.orig
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/cm.c.orig
@@ -0,0 +1,3561 @@
+/*
+ * Copyright (c) 2004-2006 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include <rdma/ib_cache.h>
+#include <rdma/ib_cm.h>
+#include "cm_msgs.h"
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("InfiniBand CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+#define PFX    "ib_cm: "
+
+/*
+ * Limit CM message timeouts to something reasonable:
+ * 8 seconds per message, with up to 15 retries
+ */
+static int max_timeout = 21;
+module_param(max_timeout, int, 0644);
+MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout "
+                             "(default=21, or ~8 seconds)");
+
+static void cm_add_one(struct ib_device *device);
+static void cm_remove_one(struct ib_device *device);
+
+static struct ib_client cm_client = {
+	.name   = "cm",
+	.add    = cm_add_one,
+	.remove = cm_remove_one
+};
+
+static struct ib_cm {
+	spinlock_t lock;
+	struct list_head device_list;
+	rwlock_t device_lock;
+	struct rb_root listen_service_table;
+	u64 listen_service_id;
+	/* struct rb_root peer_service_table; todo: fix peer to peer */
+	struct rb_root remote_qp_table;
+	struct rb_root remote_id_table;
+	struct rb_root remote_sidr_table;
+	struct idr local_id_table;
+	__be32 random_id_operand;
+	struct list_head timewait_list;
+	struct workqueue_struct *wq;
+} cm;
+
+struct cm_port {
+	struct cm_device *cm_dev;
+	struct ib_mad_agent *mad_agent;
+	u8 port_num;
+};
+
+struct cm_device {
+	struct list_head list;
+	struct ib_device *device;
+	__be64 ca_guid;
+	struct cm_port port[0];
+};
+
+struct cm_av {
+	struct cm_port *port;
+	union ib_gid dgid;
+	struct ib_ah_attr ah_attr;
+	u16 pkey_index;
+	u8 packet_life_time;
+};
+
+struct cm_work {
+	struct delayed_work work;
+	struct list_head list;
+	struct cm_port *port;
+	struct ib_mad_recv_wc *mad_recv_wc;	/* Received MADs */
+	__be32 local_id;			/* Established / timewait */
+	__be32 remote_id;
+	struct ib_cm_event cm_event;
+	struct ib_sa_path_rec path[0];
+};
+
+struct cm_timewait_info {
+	struct cm_work work;			/* Must be first. */
+	struct list_head list;
+	struct rb_node remote_qp_node;
+	struct rb_node remote_id_node;
+	__be64 remote_ca_guid;
+	__be32 remote_qpn;
+	u8 inserted_remote_qp;
+	u8 inserted_remote_id;
+};
+
+struct cm_id_private {
+	struct ib_cm_id	id;
+
+	struct rb_node service_node;
+	struct rb_node sidr_id_node;
+	spinlock_t lock;	/* Do not acquire inside cm.lock */
+	struct completion comp;
+	atomic_t refcount;
+
+	struct ib_mad_send_buf *msg;
+	struct cm_timewait_info *timewait_info;
+	/* todo: use alternate port on send failure */
+	struct cm_av av;
+	struct cm_av alt_av;
+	struct ib_cm_compare_data *compare_data;
+
+	void *private_data;
+	__be64 tid;
+	__be32 local_qpn;
+	__be32 remote_qpn;
+	enum ib_qp_type qp_type;
+	__be32 sq_psn;
+	__be32 rq_psn;
+	int timeout_ms;
+	enum ib_mtu path_mtu;
+	__be16 pkey;
+	u8 private_data_len;
+	u8 max_cm_retries;
+	u8 peer_to_peer;
+	u8 responder_resources;
+	u8 initiator_depth;
+	u8 retry_count;
+	u8 rnr_retry_count;
+	u8 service_timeout;
+
+	struct list_head work_list;
+	atomic_t work_count;
+};
+
+static void cm_work_handler(struct work_struct *work);
+
+static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
+{
+	if (atomic_dec_and_test(&cm_id_priv->refcount))
+		complete(&cm_id_priv->comp);
+}
+
+static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
+			struct ib_mad_send_buf **msg)
+{
+	struct ib_mad_agent *mad_agent;
+	struct ib_mad_send_buf *m;
+	struct ib_ah *ah;
+
+	mad_agent = cm_id_priv->av.port->mad_agent;
+	ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
+	if (IS_ERR(ah))
+		return PTR_ERR(ah);
+
+	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
+			       cm_id_priv->av.pkey_index,
+			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+			       GFP_ATOMIC);
+	if (IS_ERR(m)) {
+		ib_destroy_ah(ah);
+		return PTR_ERR(m);
+	}
+
+	/* Timeout set by caller if response is expected. */
+	m->ah = ah;
+	m->retries = cm_id_priv->max_cm_retries;
+
+	atomic_inc(&cm_id_priv->refcount);
+	m->context[0] = cm_id_priv;
+	*msg = m;
+	return 0;
+}
+
+static int cm_alloc_response_msg(struct cm_port *port,
+				 struct ib_mad_recv_wc *mad_recv_wc,
+				 struct ib_mad_send_buf **msg)
+{
+	struct ib_mad_send_buf *m;
+	struct ib_ah *ah;
+
+	ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
+				  mad_recv_wc->recv_buf.grh, port->port_num);
+	if (IS_ERR(ah))
+		return PTR_ERR(ah);
+
+	m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
+			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+			       GFP_ATOMIC);
+	if (IS_ERR(m)) {
+		ib_destroy_ah(ah);
+		return PTR_ERR(m);
+	}
+	m->ah = ah;
+	*msg = m;
+	return 0;
+}
+
+static void cm_free_msg(struct ib_mad_send_buf *msg)
+{
+	ib_destroy_ah(msg->ah);
+	if (msg->context[0])
+		cm_deref_id(msg->context[0]);
+	ib_free_send_mad(msg);
+}
+
+static void * cm_copy_private_data(const void *private_data,
+				   u8 private_data_len)
+{
+	void *data;
+
+	if (!private_data || !private_data_len)
+		return NULL;
+
+	data = kmemdup(private_data, private_data_len, GFP_KERNEL);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	return data;
+}
+
+static void cm_set_private_data(struct cm_id_private *cm_id_priv,
+				 void *private_data, u8 private_data_len)
+{
+	if (cm_id_priv->private_data && cm_id_priv->private_data_len)
+		kfree(cm_id_priv->private_data);
+
+	cm_id_priv->private_data = private_data;
+	cm_id_priv->private_data_len = private_data_len;
+}
+
+static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
+				    struct ib_grh *grh, struct cm_av *av)
+{
+	av->port = port;
+	av->pkey_index = wc->pkey_index;
+	ib_init_ah_from_wc(port->cm_dev->device, port->port_num, wc,
+			   grh, &av->ah_attr);
+}
+
+static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
+{
+	struct cm_device *cm_dev;
+	struct cm_port *port = NULL;
+	unsigned long flags;
+	int ret;
+	u8 p;
+
+	read_lock_irqsave(&cm.device_lock, flags);
+	list_for_each_entry(cm_dev, &cm.device_list, list) {
+		if (!ib_find_cached_gid(cm_dev->device, &path->sgid,
+					&p, NULL)) {
+			port = &cm_dev->port[p-1];
+			break;
+		}
+	}
+	read_unlock_irqrestore(&cm.device_lock, flags);
+
+	if (!port)
+		return -EINVAL;
+
+	ret = ib_find_cached_pkey(cm_dev->device, port->port_num,
+				  be16_to_cpu(path->pkey), &av->pkey_index);
+	if (ret)
+		return ret;
+
+	av->port = port;
+	ib_init_ah_from_path(cm_dev->device, port->port_num, path,
+			     &av->ah_attr);
+	av->packet_life_time = path->packet_life_time;
+	return 0;
+}
+
+static int cm_alloc_id(struct cm_id_private *cm_id_priv)
+{
+	unsigned long flags;
+	int ret, id;
+	static int next_id;
+
+	do {
+		spin_lock_irqsave(&cm.lock, flags);
+		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
+					next_id, &id);
+		if (!ret)
+			next_id = ((unsigned) id + 1) & MAX_ID_MASK;
+		spin_unlock_irqrestore(&cm.lock, flags);
+	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+
+	cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand);
+	return ret;
+}
+
+static void cm_free_id(__be32 local_id)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm.lock, flags);
+	idr_remove(&cm.local_id_table,
+		   (__force int) (local_id ^ cm.random_id_operand));
+	spin_unlock_irqrestore(&cm.lock, flags);
+}
+
+static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
+{
+	struct cm_id_private *cm_id_priv;
+
+	cm_id_priv = idr_find(&cm.local_id_table,
+			      (__force int) (local_id ^ cm.random_id_operand));
+	if (cm_id_priv) {
+		if (cm_id_priv->id.remote_id == remote_id)
+			atomic_inc(&cm_id_priv->refcount);
+		else
+			cm_id_priv = NULL;
+	}
+
+	return cm_id_priv;
+}
+
+static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
+{
+	struct cm_id_private *cm_id_priv;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm.lock, flags);
+	cm_id_priv = cm_get_id(local_id, remote_id);
+	spin_unlock_irqrestore(&cm.lock, flags);
+
+	return cm_id_priv;
+}
+
+static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
+{
+	int i;
+
+	for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
+		((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
+					     ((unsigned long *) mask)[i];
+}
+
+static int cm_compare_data(struct ib_cm_compare_data *src_data,
+			   struct ib_cm_compare_data *dst_data)
+{
+	u8 src[IB_CM_COMPARE_SIZE];
+	u8 dst[IB_CM_COMPARE_SIZE];
+
+	if (!src_data || !dst_data)
+		return 0;
+
+	cm_mask_copy(src, src_data->data, dst_data->mask);
+	cm_mask_copy(dst, dst_data->data, src_data->mask);
+	return memcmp(src, dst, IB_CM_COMPARE_SIZE);
+}
+
+static int cm_compare_private_data(u8 *private_data,
+				   struct ib_cm_compare_data *dst_data)
+{
+	u8 src[IB_CM_COMPARE_SIZE];
+
+	if (!dst_data)
+		return 0;
+
+	cm_mask_copy(src, private_data, dst_data->mask);
+	return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
+}
+
+static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
+{
+	struct rb_node **link = &cm.listen_service_table.rb_node;
+	struct rb_node *parent = NULL;
+	struct cm_id_private *cur_cm_id_priv;
+	__be64 service_id = cm_id_priv->id.service_id;
+	__be64 service_mask = cm_id_priv->id.service_mask;
+	int data_cmp;
+
+	while (*link) {
+		parent = *link;
+		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
+					  service_node);
+		data_cmp = cm_compare_data(cm_id_priv->compare_data,
+					   cur_cm_id_priv->compare_data);
+		if ((cur_cm_id_priv->id.service_mask & service_id) ==
+		    (service_mask & cur_cm_id_priv->id.service_id) &&
+		    (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
+		    !data_cmp)
+			return cur_cm_id_priv;
+
+		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
+			link = &(*link)->rb_left;
+		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
+			link = &(*link)->rb_right;
+		else if (service_id < cur_cm_id_priv->id.service_id)
+			link = &(*link)->rb_left;
+		else if (service_id > cur_cm_id_priv->id.service_id)
+			link = &(*link)->rb_right;
+		else if (data_cmp < 0)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+	rb_link_node(&cm_id_priv->service_node, parent, link);
+	rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
+	return NULL;
+}
+
+static struct cm_id_private * cm_find_listen(struct ib_device *device,
+					     __be64 service_id,
+					     u8 *private_data)
+{
+	struct rb_node *node = cm.listen_service_table.rb_node;
+	struct cm_id_private *cm_id_priv;
+	int data_cmp;
+
+	while (node) {
+		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
+		data_cmp = cm_compare_private_data(private_data,
+						   cm_id_priv->compare_data);
+		if ((cm_id_priv->id.service_mask & service_id) ==
+		     cm_id_priv->id.service_id &&
+		    (cm_id_priv->id.device == device) && !data_cmp)
+			return cm_id_priv;
+
+		if (device < cm_id_priv->id.device)
+			node = node->rb_left;
+		else if (device > cm_id_priv->id.device)
+			node = node->rb_right;
+		else if (service_id < cm_id_priv->id.service_id)
+			node = node->rb_left;
+		else if (service_id > cm_id_priv->id.service_id)
+			node = node->rb_right;
+		else if (data_cmp < 0)
+			node = node->rb_left;
+		else
+			node = node->rb_right;
+	}
+	return NULL;
+}
+
+static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
+						     *timewait_info)
+{
+	struct rb_node **link = &cm.remote_id_table.rb_node;
+	struct rb_node *parent = NULL;
+	struct cm_timewait_info *cur_timewait_info;
+	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
+	__be32 remote_id = timewait_info->work.remote_id;
+
+	while (*link) {
+		parent = *link;
+		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
+					     remote_id_node);
+		if (remote_id < cur_timewait_info->work.remote_id)
+			link = &(*link)->rb_left;
+		else if (remote_id > cur_timewait_info->work.remote_id)
+			link = &(*link)->rb_right;
+		else if (remote_ca_guid < cur_timewait_info->remote_ca_guid)
+			link = &(*link)->rb_left;
+		else if (remote_ca_guid > cur_timewait_info->remote_ca_guid)
+			link = &(*link)->rb_right;
+		else
+			return cur_timewait_info;
+	}
+	timewait_info->inserted_remote_id = 1;
+	rb_link_node(&timewait_info->remote_id_node, parent, link);
+	rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
+	return NULL;
+}
+
+static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
+						   __be32 remote_id)
+{
+	struct rb_node *node = cm.remote_id_table.rb_node;
+	struct cm_timewait_info *timewait_info;
+
+	while (node) {
+		timewait_info = rb_entry(node, struct cm_timewait_info,
+					 remote_id_node);
+		if (remote_id < timewait_info->work.remote_id)
+			node = node->rb_left;
+		else if (remote_id > timewait_info->work.remote_id)
+			node = node->rb_right;
+		else if (remote_ca_guid < timewait_info->remote_ca_guid)
+			node = node->rb_left;
+		else if (remote_ca_guid > timewait_info->remote_ca_guid)
+			node = node->rb_right;
+		else
+			return timewait_info;
+	}
+	return NULL;
+}
+
+static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
+						      *timewait_info)
+{
+	struct rb_node **link = &cm.remote_qp_table.rb_node;
+	struct rb_node *parent = NULL;
+	struct cm_timewait_info *cur_timewait_info;
+	__be64 remote_ca_guid = timewait_info->remote_ca_guid;
+	__be32 remote_qpn = timewait_info->remote_qpn;
+
+	while (*link) {
+		parent = *link;
+		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
+					     remote_qp_node);
+		if (remote_qpn < cur_timewait_info->remote_qpn)
+			link = &(*link)->rb_left;
+		else if (remote_qpn > cur_timewait_info->remote_qpn)
+			link = &(*link)->rb_right;
+		else if (remote_ca_guid < cur_timewait_info->remote_ca_guid)
+			link = &(*link)->rb_left;
+		else if (remote_ca_guid > cur_timewait_info->remote_ca_guid)
+			link = &(*link)->rb_right;
+		else
+			return cur_timewait_info;
+	}
+	timewait_info->inserted_remote_qp = 1;
+	rb_link_node(&timewait_info->remote_qp_node, parent, link);
+	rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
+	return NULL;
+}
+
+static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
+						    *cm_id_priv)
+{
+	struct rb_node **link = &cm.remote_sidr_table.rb_node;
+	struct rb_node *parent = NULL;
+	struct cm_id_private *cur_cm_id_priv;
+	union ib_gid *port_gid = &cm_id_priv->av.dgid;
+	__be32 remote_id = cm_id_priv->id.remote_id;
+
+	while (*link) {
+		parent = *link;
+		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
+					  sidr_id_node);
+		if (remote_id < cur_cm_id_priv->id.remote_id)
+			link = &(*link)->rb_left;
+		else if (remote_id > cur_cm_id_priv->id.remote_id)
+			link = &(*link)->rb_right;
+		else {
+			int cmp;
+			cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
+				     sizeof *port_gid);
+			if (cmp < 0)
+				link = &(*link)->rb_left;
+			else if (cmp > 0)
+				link = &(*link)->rb_right;
+			else
+				return cur_cm_id_priv;
+		}
+	}
+	rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
+	rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+	return NULL;
+}
+
+static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
+			       enum ib_cm_sidr_status status)
+{
+	struct ib_cm_sidr_rep_param param;
+
+	memset(&param, 0, sizeof param);
+	param.status = status;
+	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
+}
+
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+				 ib_cm_handler cm_handler,
+				 void *context)
+{
+	struct cm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
+	if (!cm_id_priv)
+		return ERR_PTR(-ENOMEM);
+
+	cm_id_priv->id.state = IB_CM_IDLE;
+	cm_id_priv->id.device = device;
+	cm_id_priv->id.cm_handler = cm_handler;
+	cm_id_priv->id.context = context;
+	cm_id_priv->id.remote_cm_qpn = 1;
+	ret = cm_alloc_id(cm_id_priv);
+	if (ret)
+		goto error;
+
+	spin_lock_init(&cm_id_priv->lock);
+	init_completion(&cm_id_priv->comp);
+	INIT_LIST_HEAD(&cm_id_priv->work_list);
+	atomic_set(&cm_id_priv->work_count, -1);
+	atomic_set(&cm_id_priv->refcount, 1);
+	return &cm_id_priv->id;
+
+error:
+	kfree(cm_id_priv);
+	return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL(ib_create_cm_id);
+
+static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
+{
+	struct cm_work *work;
+
+	if (list_empty(&cm_id_priv->work_list))
+		return NULL;
+
+	work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
+	list_del(&work->list);
+	return work;
+}
+
+static void cm_free_work(struct cm_work *work)
+{
+	if (work->mad_recv_wc)
+		ib_free_recv_mad(work->mad_recv_wc);
+	kfree(work);
+}
+
+static inline int cm_convert_to_ms(int iba_time)
+{
+	/* approximate conversion to ms from 4.096us x 2^iba_time */
+	return 1 << max(iba_time - 8, 0);
+}
+
+static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
+{
+	if (timewait_info->inserted_remote_id) {
+		rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
+		timewait_info->inserted_remote_id = 0;
+	}
+
+	if (timewait_info->inserted_remote_qp) {
+		rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
+		timewait_info->inserted_remote_qp = 0;
+	}
+}
+
+static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
+{
+	struct cm_timewait_info *timewait_info;
+
+	timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
+	if (!timewait_info)
+		return ERR_PTR(-ENOMEM);
+
+	timewait_info->work.local_id = local_id;
+	INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
+	timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
+	return timewait_info;
+}
+
+static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
+{
+	int wait_time;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm.lock, flags);
+	cm_cleanup_timewait(cm_id_priv->timewait_info);
+	list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
+	spin_unlock_irqrestore(&cm.lock, flags);
+
+	/*
+	 * The cm_id could be destroyed by the user before we exit timewait.
+	 * To protect against this, we search for the cm_id after exiting
+	 * timewait before notifying the user that we've exited timewait.
+	 */
+	cm_id_priv->id.state = IB_CM_TIMEWAIT;
+	wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1);
+	queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
+			   msecs_to_jiffies(wait_time));
+	cm_id_priv->timewait_info = NULL;
+}
+
+static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
+{
+	unsigned long flags;
+
+	cm_id_priv->id.state = IB_CM_IDLE;
+	if (cm_id_priv->timewait_info) {
+		spin_lock_irqsave(&cm.lock, flags);
+		cm_cleanup_timewait(cm_id_priv->timewait_info);
+		spin_unlock_irqrestore(&cm.lock, flags);
+		kfree(cm_id_priv->timewait_info);
+		cm_id_priv->timewait_info = NULL;
+	}
+}
+
+static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_work *work;
+	unsigned long flags;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+retest:
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id->state) {
+	case IB_CM_LISTEN:
+		cm_id->state = IB_CM_IDLE;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_lock_irqsave(&cm.lock, flags);
+		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
+		spin_unlock_irqrestore(&cm.lock, flags);
+		break;
+	case IB_CM_SIDR_REQ_SENT:
+		cm_id->state = IB_CM_IDLE;
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		break;
+	case IB_CM_SIDR_REQ_RCVD:
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
+		break;
+	case IB_CM_REQ_SENT:
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
+			       &cm_id_priv->av.port->cm_dev->ca_guid,
+			       sizeof cm_id_priv->av.port->cm_dev->ca_guid,
+			       NULL, 0);
+		break;
+	case IB_CM_REQ_RCVD:
+		if (err == -ENOMEM) {
+			/* Do not reject to allow future retries. */
+			cm_reset_to_idle(cm_id_priv);
+			spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		} else {
+			spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+				       NULL, 0, NULL, 0);
+		}
+		break;
+	case IB_CM_MRA_REQ_RCVD:
+	case IB_CM_REP_SENT:
+	case IB_CM_MRA_REP_RCVD:
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		/* Fall through */
+	case IB_CM_MRA_REQ_SENT:
+	case IB_CM_REP_RCVD:
+	case IB_CM_MRA_REP_SENT:
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+			       NULL, 0, NULL, 0);
+		break;
+	case IB_CM_ESTABLISHED:
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ib_send_cm_dreq(cm_id, NULL, 0);
+		goto retest;
+	case IB_CM_DREQ_SENT:
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		cm_enter_timewait(cm_id_priv);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		break;
+	case IB_CM_DREQ_RCVD:
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ib_send_cm_drep(cm_id, NULL, 0);
+		break;
+	default:
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		break;
+	}
+
+	cm_free_id(cm_id->local_id);
+	cm_deref_id(cm_id_priv);
+	wait_for_completion(&cm_id_priv->comp);
+	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
+		cm_free_work(work);
+	kfree(cm_id_priv->compare_data);
+	kfree(cm_id_priv->private_data);
+	kfree(cm_id_priv);
+}
+
+void ib_destroy_cm_id(struct ib_cm_id *cm_id)
+{
+	cm_destroy_id(cm_id, 0);
+}
+EXPORT_SYMBOL(ib_destroy_cm_id);
+
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
+		 struct ib_cm_compare_data *compare_data)
+{
+	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	service_mask = service_mask ? service_mask :
+		       __constant_cpu_to_be64(~0ULL);
+	service_id &= service_mask;
+	if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
+	    (service_id != IB_CM_ASSIGN_SERVICE_ID))
+		return -EINVAL;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	if (cm_id->state != IB_CM_IDLE)
+		return -EINVAL;
+
+	if (compare_data) {
+		cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
+						   GFP_KERNEL);
+		if (!cm_id_priv->compare_data)
+			return -ENOMEM;
+		cm_mask_copy(cm_id_priv->compare_data->data,
+			     compare_data->data, compare_data->mask);
+		memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
+		       IB_CM_COMPARE_SIZE);
+	}
+
+	cm_id->state = IB_CM_LISTEN;
+
+	spin_lock_irqsave(&cm.lock, flags);
+	if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
+		cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
+		cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
+	} else {
+		cm_id->service_id = service_id;
+		cm_id->service_mask = service_mask;
+	}
+	cur_cm_id_priv = cm_insert_listen(cm_id_priv);
+	spin_unlock_irqrestore(&cm.lock, flags);
+
+	if (cur_cm_id_priv) {
+		cm_id->state = IB_CM_IDLE;
+		kfree(cm_id_priv->compare_data);
+		cm_id_priv->compare_data = NULL;
+		ret = -EBUSY;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(ib_cm_listen);
+
+static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
+			  enum cm_msg_sequence msg_seq)
+{
+	u64 hi_tid, low_tid;
+
+	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
+	low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
+			  (msg_seq << 30));
+	return cpu_to_be64(hi_tid | low_tid);
+}
+
+static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
+			      __be16 attr_id, __be64 tid)
+{
+	hdr->base_version  = IB_MGMT_BASE_VERSION;
+	hdr->mgmt_class	   = IB_MGMT_CLASS_CM;
+	hdr->class_version = IB_CM_CLASS_VERSION;
+	hdr->method	   = IB_MGMT_METHOD_SEND;
+	hdr->attr_id	   = attr_id;
+	hdr->tid	   = tid;
+}
+
+static void cm_format_req(struct cm_req_msg *req_msg,
+			  struct cm_id_private *cm_id_priv,
+			  struct ib_cm_req_param *param)
+{
+	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
+			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
+
+	req_msg->local_comm_id = cm_id_priv->id.local_id;
+	req_msg->service_id = param->service_id;
+	req_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid;
+	cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
+	cm_req_set_resp_res(req_msg, param->responder_resources);
+	cm_req_set_init_depth(req_msg, param->initiator_depth);
+	cm_req_set_remote_resp_timeout(req_msg,
+				       param->remote_cm_response_timeout);
+       if (param->remote_cm_response_timeout > (u8) max_timeout) {
+               printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > "
+                      "%d, decreasing\n", param->remote_cm_response_timeout,
+                      max_timeout);
+               cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout);
+       }
+	cm_req_set_qp_type(req_msg, param->qp_type);
+	cm_req_set_flow_ctrl(req_msg, param->flow_control);
+	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
+	cm_req_set_local_resp_timeout(req_msg,
+				      param->local_cm_response_timeout);
+       if (param->local_cm_response_timeout > (u8) max_timeout) {
+               printk(KERN_WARNING PFX "req local_cm_response_timeout %d > "
+                      "%d, decreasing\n", param->local_cm_response_timeout,
+                      max_timeout);
+               cm_req_set_local_resp_timeout(req_msg, (u8) max_timeout);
+       }
+	cm_req_set_retry_count(req_msg, param->retry_count);
+	req_msg->pkey = param->primary_path->pkey;
+	cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
+	cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
+	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
+	cm_req_set_srq(req_msg, param->srq);
+
+	req_msg->primary_local_lid = param->primary_path->slid;
+	req_msg->primary_remote_lid = param->primary_path->dlid;
+	req_msg->primary_local_gid = param->primary_path->sgid;
+	req_msg->primary_remote_gid = param->primary_path->dgid;
+	cm_req_set_primary_flow_label(req_msg, param->primary_path->flow_label);
+	cm_req_set_primary_packet_rate(req_msg, param->primary_path->rate);
+	req_msg->primary_traffic_class = param->primary_path->traffic_class;
+	req_msg->primary_hop_limit = param->primary_path->hop_limit;
+	cm_req_set_primary_sl(req_msg, param->primary_path->sl);
+	cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */
+	cm_req_set_primary_local_ack_timeout(req_msg,
+		min(31, param->primary_path->packet_life_time + 1));
+
+	if (param->alternate_path) {
+		req_msg->alt_local_lid = param->alternate_path->slid;
+		req_msg->alt_remote_lid = param->alternate_path->dlid;
+		req_msg->alt_local_gid = param->alternate_path->sgid;
+		req_msg->alt_remote_gid = param->alternate_path->dgid;
+		cm_req_set_alt_flow_label(req_msg,
+					  param->alternate_path->flow_label);
+		cm_req_set_alt_packet_rate(req_msg, param->alternate_path->rate);
+		req_msg->alt_traffic_class = param->alternate_path->traffic_class;
+		req_msg->alt_hop_limit = param->alternate_path->hop_limit;
+		cm_req_set_alt_sl(req_msg, param->alternate_path->sl);
+		cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */
+		cm_req_set_alt_local_ack_timeout(req_msg,
+			min(31, param->alternate_path->packet_life_time + 1));
+	}
+
+	if (param->private_data && param->private_data_len)
+		memcpy(req_msg->private_data, param->private_data,
+		       param->private_data_len);
+}
+
+static int cm_validate_req_param(struct ib_cm_req_param *param)
+{
+	/* peer-to-peer not supported */
+	if (param->peer_to_peer)
+		return -EINVAL;
+
+	if (!param->primary_path)
+		return -EINVAL;
+
+	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
+		return -EINVAL;
+
+	if (param->private_data &&
+	    param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
+		return -EINVAL;
+
+	if (param->alternate_path &&
+	    (param->alternate_path->pkey != param->primary_path->pkey ||
+	     param->alternate_path->mtu != param->primary_path->mtu))
+		return -EINVAL;
+
+	return 0;
+}
+
+int ib_send_cm_req(struct ib_cm_id *cm_id,
+		   struct ib_cm_req_param *param)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_req_msg *req_msg;
+	unsigned long flags;
+	int ret;
+
+	ret = cm_validate_req_param(param);
+	if (ret)
+		return ret;
+
+	/* Verify that we're not in timewait. */
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state != IB_CM_IDLE) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = -EINVAL;
+		goto out;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
+							    id.local_id);
+	if (IS_ERR(cm_id_priv->timewait_info)) {
+		ret = PTR_ERR(cm_id_priv->timewait_info);
+		goto out;
+	}
+
+	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
+	if (ret)
+		goto error1;
+	if (param->alternate_path) {
+		ret = cm_init_av_by_path(param->alternate_path,
+					 &cm_id_priv->alt_av);
+		if (ret)
+			goto error1;
+	}
+	cm_id->service_id = param->service_id;
+	cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
+	cm_id_priv->timeout_ms = cm_convert_to_ms(
+				    param->primary_path->packet_life_time) * 2 +
+				 cm_convert_to_ms(
+				    param->remote_cm_response_timeout);
+       if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
+               printk(KERN_WARNING PFX "req timeout_ms %d > %d, decreasing\n",
+                      cm_id_priv->timeout_ms, cm_convert_to_ms(max_timeout));
+               cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+       }
+	cm_id_priv->max_cm_retries = param->max_cm_retries;
+	cm_id_priv->initiator_depth = param->initiator_depth;
+	cm_id_priv->responder_resources = param->responder_resources;
+	cm_id_priv->retry_count = param->retry_count;
+	cm_id_priv->path_mtu = param->primary_path->mtu;
+	cm_id_priv->pkey = param->primary_path->pkey;
+	cm_id_priv->qp_type = param->qp_type;
+
+	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
+	if (ret)
+		goto error1;
+
+	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
+	cm_format_req(req_msg, cm_id_priv, param);
+	cm_id_priv->tid = req_msg->hdr.tid;
+	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
+	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
+
+	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
+	cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
+	if (ret) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		goto error2;
+	}
+	BUG_ON(cm_id->state != IB_CM_IDLE);
+	cm_id->state = IB_CM_REQ_SENT;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return 0;
+
+error2:	cm_free_msg(cm_id_priv->msg);
+error1:	kfree(cm_id_priv->timewait_info);
+out:	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_req);
+
+static int cm_issue_rej(struct cm_port *port,
+			struct ib_mad_recv_wc *mad_recv_wc,
+			enum ib_cm_rej_reason reason,
+			enum cm_msg_response msg_rejected,
+			void *ari, u8 ari_length)
+{
+	struct ib_mad_send_buf *msg = NULL;
+	struct cm_rej_msg *rej_msg, *rcv_msg;
+	int ret;
+
+	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+	if (ret)
+		return ret;
+
+	/* We just need common CM header information.  Cast to any message. */
+	rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
+	rej_msg = (struct cm_rej_msg *) msg->mad;
+
+	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
+	rej_msg->remote_comm_id = rcv_msg->local_comm_id;
+	rej_msg->local_comm_id = rcv_msg->remote_comm_id;
+	cm_rej_set_msg_rejected(rej_msg, msg_rejected);
+	rej_msg->reason = cpu_to_be16(reason);
+
+	if (ari && ari_length) {
+		cm_rej_set_reject_info_len(rej_msg, ari_length);
+		memcpy(rej_msg->ari, ari, ari_length);
+	}
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret)
+		cm_free_msg(msg);
+
+	return ret;
+}
+
+static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
+				    __be32 local_qpn, __be32 remote_qpn)
+{
+	return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
+		((local_ca_guid == remote_ca_guid) &&
+		 (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
+}
+
+static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
+					    struct ib_sa_path_rec *primary_path,
+					    struct ib_sa_path_rec *alt_path)
+{
+	memset(primary_path, 0, sizeof *primary_path);
+	primary_path->dgid = req_msg->primary_local_gid;
+	primary_path->sgid = req_msg->primary_remote_gid;
+	primary_path->dlid = req_msg->primary_local_lid;
+	primary_path->slid = req_msg->primary_remote_lid;
+	primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
+	primary_path->hop_limit = req_msg->primary_hop_limit;
+	primary_path->traffic_class = req_msg->primary_traffic_class;
+	primary_path->reversible = 1;
+	primary_path->pkey = req_msg->pkey;
+	primary_path->sl = cm_req_get_primary_sl(req_msg);
+	primary_path->mtu_selector = IB_SA_EQ;
+	primary_path->mtu = cm_req_get_path_mtu(req_msg);
+	primary_path->rate_selector = IB_SA_EQ;
+	primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
+	primary_path->packet_life_time_selector = IB_SA_EQ;
+	primary_path->packet_life_time =
+		cm_req_get_primary_local_ack_timeout(req_msg);
+	primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
+
+	if (req_msg->alt_local_lid) {
+		memset(alt_path, 0, sizeof *alt_path);
+		alt_path->dgid = req_msg->alt_local_gid;
+		alt_path->sgid = req_msg->alt_remote_gid;
+		alt_path->dlid = req_msg->alt_local_lid;
+		alt_path->slid = req_msg->alt_remote_lid;
+		alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
+		alt_path->hop_limit = req_msg->alt_hop_limit;
+		alt_path->traffic_class = req_msg->alt_traffic_class;
+		alt_path->reversible = 1;
+		alt_path->pkey = req_msg->pkey;
+		alt_path->sl = cm_req_get_alt_sl(req_msg);
+		alt_path->mtu_selector = IB_SA_EQ;
+		alt_path->mtu = cm_req_get_path_mtu(req_msg);
+		alt_path->rate_selector = IB_SA_EQ;
+		alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
+		alt_path->packet_life_time_selector = IB_SA_EQ;
+		alt_path->packet_life_time =
+			cm_req_get_alt_local_ack_timeout(req_msg);
+		alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
+	}
+}
+
+static void cm_format_req_event(struct cm_work *work,
+				struct cm_id_private *cm_id_priv,
+				struct ib_cm_id *listen_id)
+{
+	struct cm_req_msg *req_msg;
+	struct ib_cm_req_event_param *param;
+
+	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
+	param = &work->cm_event.param.req_rcvd;
+	param->listen_id = listen_id;
+	param->port = cm_id_priv->av.port->port_num;
+	param->primary_path = &work->path[0];
+	if (req_msg->alt_local_lid)
+		param->alternate_path = &work->path[1];
+	else
+		param->alternate_path = NULL;
+	param->remote_ca_guid = req_msg->local_ca_guid;
+	param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
+	param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
+	param->qp_type = cm_req_get_qp_type(req_msg);
+	param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
+	param->responder_resources = cm_req_get_init_depth(req_msg);
+	param->initiator_depth = cm_req_get_resp_res(req_msg);
+	param->local_cm_response_timeout =
+					cm_req_get_remote_resp_timeout(req_msg);
+	param->flow_control = cm_req_get_flow_ctrl(req_msg);
+	param->remote_cm_response_timeout =
+					cm_req_get_local_resp_timeout(req_msg);
+	param->retry_count = cm_req_get_retry_count(req_msg);
+	param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
+	param->srq = cm_req_get_srq(req_msg);
+	work->cm_event.private_data = &req_msg->private_data;
+}
+
+static void cm_process_work(struct cm_id_private *cm_id_priv,
+			    struct cm_work *work)
+{
+	unsigned long flags;
+	int ret;
+
+	/* We will typically only have the current event to report. */
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
+	cm_free_work(work);
+
+	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		work = cm_dequeue_work(cm_id_priv);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		BUG_ON(!work);
+		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
+						&work->cm_event);
+		cm_free_work(work);
+	}
+	cm_deref_id(cm_id_priv);
+	if (ret)
+		cm_destroy_id(&cm_id_priv->id, ret);
+}
+
+static void cm_format_mra(struct cm_mra_msg *mra_msg,
+			  struct cm_id_private *cm_id_priv,
+			  enum cm_msg_response msg_mraed, u8 service_timeout,
+			  const void *private_data, u8 private_data_len)
+{
+	cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
+	cm_mra_set_msg_mraed(mra_msg, msg_mraed);
+	mra_msg->local_comm_id = cm_id_priv->id.local_id;
+	mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
+	cm_mra_set_service_timeout(mra_msg, service_timeout);
+
+	if (private_data && private_data_len)
+		memcpy(mra_msg->private_data, private_data, private_data_len);
+}
+
+static void cm_format_rej(struct cm_rej_msg *rej_msg,
+			  struct cm_id_private *cm_id_priv,
+			  enum ib_cm_rej_reason reason,
+			  void *ari,
+			  u8 ari_length,
+			  const void *private_data,
+			  u8 private_data_len)
+{
+	cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
+	rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
+
+	switch(cm_id_priv->id.state) {
+	case IB_CM_REQ_RCVD:
+		rej_msg->local_comm_id = 0;
+		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+		break;
+	case IB_CM_MRA_REQ_SENT:
+		rej_msg->local_comm_id = cm_id_priv->id.local_id;
+		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+		break;
+	case IB_CM_REP_RCVD:
+	case IB_CM_MRA_REP_SENT:
+		rej_msg->local_comm_id = cm_id_priv->id.local_id;
+		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
+		break;
+	default:
+		rej_msg->local_comm_id = cm_id_priv->id.local_id;
+		cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
+		break;
+	}
+
+	rej_msg->reason = cpu_to_be16(reason);
+	if (ari && ari_length) {
+		cm_rej_set_reject_info_len(rej_msg, ari_length);
+		memcpy(rej_msg->ari, ari, ari_length);
+	}
+
+	if (private_data && private_data_len)
+		memcpy(rej_msg->private_data, private_data, private_data_len);
+}
+
+static void cm_dup_req_handler(struct cm_work *work,
+			       struct cm_id_private *cm_id_priv)
+{
+	struct ib_mad_send_buf *msg = NULL;
+	unsigned long flags;
+	int ret;
+
+	/* Quick state check to discard duplicate REQs. */
+	if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
+		return;
+
+	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
+	if (ret)
+		return;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->id.state) {
+	case IB_CM_MRA_REQ_SENT:
+		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+			      CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
+			      cm_id_priv->private_data,
+			      cm_id_priv->private_data_len);
+		break;
+	case IB_CM_TIMEWAIT:
+		cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
+			      IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
+		break;
+	default:
+		goto unlock;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret)
+		goto free;
+	return;
+
+unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+free:	cm_free_msg(msg);
+}
+
+static struct cm_id_private * cm_match_req(struct cm_work *work,
+					   struct cm_id_private *cm_id_priv)
+{
+	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
+	struct cm_timewait_info *timewait_info;
+	struct cm_req_msg *req_msg;
+	unsigned long flags;
+
+	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
+
+	/* Check for possible duplicate REQ. */
+	spin_lock_irqsave(&cm.lock, flags);
+	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
+	if (timewait_info) {
+		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+					   timewait_info->work.remote_id);
+		spin_unlock_irqrestore(&cm.lock, flags);
+		if (cur_cm_id_priv) {
+			cm_dup_req_handler(work, cur_cm_id_priv);
+			cm_deref_id(cur_cm_id_priv);
+		}
+		return NULL;
+	}
+
+	/* Check for stale connections. */
+	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
+	if (timewait_info) {
+		cm_cleanup_timewait(cm_id_priv->timewait_info);
+		spin_unlock_irqrestore(&cm.lock, flags);
+		cm_issue_rej(work->port, work->mad_recv_wc,
+			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
+			     NULL, 0);
+		return NULL;
+	}
+
+	/* Find matching listen request. */
+	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+					   req_msg->service_id,
+					   req_msg->private_data);
+	if (!listen_cm_id_priv) {
+		cm_cleanup_timewait(cm_id_priv->timewait_info);
+		spin_unlock_irqrestore(&cm.lock, flags);
+		cm_issue_rej(work->port, work->mad_recv_wc,
+			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
+			     NULL, 0);
+		goto out;
+	}
+	atomic_inc(&listen_cm_id_priv->refcount);
+	atomic_inc(&cm_id_priv->refcount);
+	cm_id_priv->id.state = IB_CM_REQ_RCVD;
+	atomic_inc(&cm_id_priv->work_count);
+	spin_unlock_irqrestore(&cm.lock, flags);
+out:
+	return listen_cm_id_priv;
+}
+
+static int cm_req_handler(struct cm_work *work)
+{
+	struct ib_cm_id *cm_id;
+	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
+	struct cm_req_msg *req_msg;
+	int ret;
+
+	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
+
+	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
+	if (IS_ERR(cm_id))
+		return PTR_ERR(cm_id);
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	cm_id_priv->id.remote_id = req_msg->local_comm_id;
+	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+				work->mad_recv_wc->recv_buf.grh,
+				&cm_id_priv->av);
+	cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
+							    id.local_id);
+	if (IS_ERR(cm_id_priv->timewait_info)) {
+		ret = PTR_ERR(cm_id_priv->timewait_info);
+		goto destroy;
+	}
+	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
+	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
+	cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
+
+	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
+	if (!listen_cm_id_priv) {
+		ret = -EINVAL;
+		kfree(cm_id_priv->timewait_info);
+		goto destroy;
+	}
+
+	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+	cm_id_priv->id.context = listen_cm_id_priv->id.context;
+	cm_id_priv->id.service_id = req_msg->service_id;
+	cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL);
+
+	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
+	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+	if (ret) {
+		ib_get_cached_gid(work->port->cm_dev->device,
+				  work->port->port_num, 0, &work->path[0].sgid);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+			       &work->path[0].sgid, sizeof work->path[0].sgid,
+			       NULL, 0);
+		goto rejected;
+	}
+	if (req_msg->alt_local_lid) {
+		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
+		if (ret) {
+			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+				       &work->path[0].sgid,
+				       sizeof work->path[0].sgid, NULL, 0);
+			goto rejected;
+		}
+	}
+	cm_id_priv->tid = req_msg->hdr.tid;
+	cm_id_priv->timeout_ms = cm_convert_to_ms(
+					cm_req_get_local_resp_timeout(req_msg));
+       if (cm_req_get_local_resp_timeout(req_msg) > (u8) max_timeout) {
+               printk(KERN_WARNING PFX "rcvd cm_local_resp_timeout %d > %d, "
+                      "decreasing used timeout_ms\n",
+                      cm_req_get_local_resp_timeout(req_msg), max_timeout);
+               cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+       }
+
+	cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
+	cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
+	cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
+	cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
+	cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
+	cm_id_priv->pkey = req_msg->pkey;
+	cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
+	cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
+	cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
+	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+
+	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
+	cm_process_work(cm_id_priv, work);
+	cm_deref_id(listen_cm_id_priv);
+	return 0;
+
+rejected:
+	atomic_dec(&cm_id_priv->refcount);
+	cm_deref_id(listen_cm_id_priv);
+destroy:
+	ib_destroy_cm_id(cm_id);
+	return ret;
+}
+
+static void cm_format_rep(struct cm_rep_msg *rep_msg,
+			  struct cm_id_private *cm_id_priv,
+			  struct ib_cm_rep_param *param)
+{
+	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
+	rep_msg->local_comm_id = cm_id_priv->id.local_id;
+	rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
+	cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
+	rep_msg->resp_resources = param->responder_resources;
+	rep_msg->initiator_depth = param->initiator_depth;
+	cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay);
+	cm_rep_set_failover(rep_msg, param->failover_accepted);
+	cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
+	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
+	cm_rep_set_srq(rep_msg, param->srq);
+	rep_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid;
+
+	if (param->private_data && param->private_data_len)
+		memcpy(rep_msg->private_data, param->private_data,
+		       param->private_data_len);
+}
+
+int ib_send_cm_rep(struct ib_cm_id *cm_id,
+		   struct ib_cm_rep_param *param)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	struct cm_rep_msg *rep_msg;
+	unsigned long flags;
+	int ret;
+
+	if (param->private_data &&
+	    param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
+		return -EINVAL;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state != IB_CM_REQ_RCVD &&
+	    cm_id->state != IB_CM_MRA_REQ_SENT) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = cm_alloc_msg(cm_id_priv, &msg);
+	if (ret)
+		goto out;
+
+	rep_msg = (struct cm_rep_msg *) msg->mad;
+	cm_format_rep(rep_msg, cm_id_priv, param);
+	msg->timeout_ms = cm_id_priv->timeout_ms;
+	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_free_msg(msg);
+		return ret;
+	}
+
+	cm_id->state = IB_CM_REP_SENT;
+	cm_id_priv->msg = msg;
+	cm_id_priv->initiator_depth = param->initiator_depth;
+	cm_id_priv->responder_resources = param->responder_resources;
+	cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
+	cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
+
+out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_rep);
+
+static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
+			  struct cm_id_private *cm_id_priv,
+			  const void *private_data,
+			  u8 private_data_len)
+{
+	cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
+	rtu_msg->local_comm_id = cm_id_priv->id.local_id;
+	rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
+
+	if (private_data && private_data_len)
+		memcpy(rtu_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_rtu(struct ib_cm_id *cm_id,
+		   const void *private_data,
+		   u8 private_data_len)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	unsigned long flags;
+	void *data;
+	int ret;
+
+	if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
+		return -EINVAL;
+
+	data = cm_copy_private_data(private_data, private_data_len);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state != IB_CM_REP_RCVD &&
+	    cm_id->state != IB_CM_MRA_REP_SENT) {
+		ret = -EINVAL;
+		goto error;
+	}
+
+	ret = cm_alloc_msg(cm_id_priv, &msg);
+	if (ret)
+		goto error;
+
+	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
+		      private_data, private_data_len);
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_free_msg(msg);
+		kfree(data);
+		return ret;
+	}
+
+	cm_id->state = IB_CM_ESTABLISHED;
+	cm_set_private_data(cm_id_priv, data, private_data_len);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return 0;
+
+error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	kfree(data);
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_rtu);
+
+static void cm_format_rep_event(struct cm_work *work)
+{
+	struct cm_rep_msg *rep_msg;
+	struct ib_cm_rep_event_param *param;
+
+	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
+	param = &work->cm_event.param.rep_rcvd;
+	param->remote_ca_guid = rep_msg->local_ca_guid;
+	param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
+	param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
+	param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
+	param->responder_resources = rep_msg->initiator_depth;
+	param->initiator_depth = rep_msg->resp_resources;
+	param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
+	param->failover_accepted = cm_rep_get_failover(rep_msg);
+	param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
+	param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
+	param->srq = cm_rep_get_srq(rep_msg);
+	work->cm_event.private_data = &rep_msg->private_data;
+}
+
+static void cm_dup_rep_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_rep_msg *rep_msg;
+	struct ib_mad_send_buf *msg = NULL;
+	unsigned long flags;
+	int ret;
+
+	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
+				   rep_msg->local_comm_id);
+	if (!cm_id_priv)
+		return;
+
+	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
+	if (ret)
+		goto deref;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
+		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
+			      cm_id_priv->private_data,
+			      cm_id_priv->private_data_len);
+	else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
+		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+			      CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
+			      cm_id_priv->private_data,
+			      cm_id_priv->private_data_len);
+	else
+		goto unlock;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret)
+		goto free;
+	goto deref;
+
+unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+free:	cm_free_msg(msg);
+deref:	cm_deref_id(cm_id_priv);
+}
+
+static int cm_rep_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_rep_msg *rep_msg;
+	unsigned long flags;
+	int ret;
+
+	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
+	if (!cm_id_priv) {
+		cm_dup_rep_handler(work);
+		return -EINVAL;
+	}
+
+	cm_format_rep_event(work);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->id.state) {
+	case IB_CM_REQ_SENT:
+	case IB_CM_MRA_REQ_RCVD:
+		break;
+	default:
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
+	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
+	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+
+	spin_lock(&cm.lock);
+	/* Check for duplicate REP. */
+	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
+		spin_unlock(&cm.lock);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = -EINVAL;
+		goto error;
+	}
+	/* Check for a stale connection. */
+	if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
+		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
+			 &cm.remote_id_table);
+		cm_id_priv->timewait_info->inserted_remote_id = 0;
+		spin_unlock(&cm.lock);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_issue_rej(work->port, work->mad_recv_wc,
+			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
+			     NULL, 0);
+		ret = -EINVAL;
+		goto error;
+	}
+	spin_unlock(&cm.lock);
+
+	cm_id_priv->id.state = IB_CM_REP_RCVD;
+	cm_id_priv->id.remote_id = rep_msg->local_comm_id;
+	cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+	cm_id_priv->initiator_depth = rep_msg->resp_resources;
+	cm_id_priv->responder_resources = rep_msg->initiator_depth;
+	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
+	cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
+
+	/* todo: handle peer_to_peer */
+
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+
+error:
+	cm_deref_id(cm_id_priv);
+	return ret;
+}
+
+static int cm_establish_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	/* See comment in cm_establish about lookup. */
+	cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
+	if (!cm_id_priv)
+		return -EINVAL;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		goto out;
+	}
+
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+out:
+	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+static int cm_rtu_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_rtu_msg *rtu_msg;
+	unsigned long flags;
+	int ret;
+
+	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
+				   rtu_msg->local_comm_id);
+	if (!cm_id_priv)
+		return -EINVAL;
+
+	work->cm_event.private_data = &rtu_msg->private_data;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
+	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		goto out;
+	}
+	cm_id_priv->id.state = IB_CM_ESTABLISHED;
+
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+out:
+	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
+			  struct cm_id_private *cm_id_priv,
+			  const void *private_data,
+			  u8 private_data_len)
+{
+	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
+			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
+	dreq_msg->local_comm_id = cm_id_priv->id.local_id;
+	dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
+	cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
+
+	if (private_data && private_data_len)
+		memcpy(dreq_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_dreq(struct ib_cm_id *cm_id,
+		    const void *private_data,
+		    u8 private_data_len)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	unsigned long flags;
+	int ret;
+
+	if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
+		return -EINVAL;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state != IB_CM_ESTABLISHED) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = cm_alloc_msg(cm_id_priv, &msg);
+	if (ret) {
+		cm_enter_timewait(cm_id_priv);
+		goto out;
+	}
+
+	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
+		       private_data, private_data_len);
+	msg->timeout_ms = cm_id_priv->timeout_ms;
+	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		cm_enter_timewait(cm_id_priv);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_free_msg(msg);
+		return ret;
+	}
+
+	cm_id->state = IB_CM_DREQ_SENT;
+	cm_id_priv->msg = msg;
+out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_dreq);
+
+static void cm_format_drep(struct cm_drep_msg *drep_msg,
+			  struct cm_id_private *cm_id_priv,
+			  const void *private_data,
+			  u8 private_data_len)
+{
+	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
+	drep_msg->local_comm_id = cm_id_priv->id.local_id;
+	drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
+
+	if (private_data && private_data_len)
+		memcpy(drep_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_drep(struct ib_cm_id *cm_id,
+		    const void *private_data,
+		    u8 private_data_len)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	unsigned long flags;
+	void *data;
+	int ret;
+
+	if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
+		return -EINVAL;
+
+	data = cm_copy_private_data(private_data, private_data_len);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state != IB_CM_DREQ_RCVD) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		kfree(data);
+		return -EINVAL;
+	}
+
+	cm_set_private_data(cm_id_priv, data, private_data_len);
+	cm_enter_timewait(cm_id_priv);
+
+	ret = cm_alloc_msg(cm_id_priv, &msg);
+	if (ret)
+		goto out;
+
+	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
+		       private_data, private_data_len);
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_free_msg(msg);
+		return ret;
+	}
+
+out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_drep);
+
+static int cm_issue_drep(struct cm_port *port,
+			 struct ib_mad_recv_wc *mad_recv_wc)
+{
+	struct ib_mad_send_buf *msg = NULL;
+	struct cm_dreq_msg *dreq_msg;
+	struct cm_drep_msg *drep_msg;
+	int ret;
+
+	ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+	if (ret)
+		return ret;
+
+	dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
+	drep_msg = (struct cm_drep_msg *) msg->mad;
+
+	cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
+	drep_msg->remote_comm_id = dreq_msg->local_comm_id;
+	drep_msg->local_comm_id = dreq_msg->remote_comm_id;
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret)
+		cm_free_msg(msg);
+
+	return ret;
+}
+
+static int cm_dreq_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_dreq_msg *dreq_msg;
+	struct ib_mad_send_buf *msg = NULL;
+	unsigned long flags;
+	int ret;
+
+	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
+				   dreq_msg->local_comm_id);
+	if (!cm_id_priv) {
+		cm_issue_drep(work->port, work->mad_recv_wc);
+		return -EINVAL;
+	}
+
+	work->cm_event.private_data = &dreq_msg->private_data;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
+		goto unlock;
+
+	switch (cm_id_priv->id.state) {
+	case IB_CM_REP_SENT:
+	case IB_CM_DREQ_SENT:
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		break;
+	case IB_CM_ESTABLISHED:
+	case IB_CM_MRA_REP_RCVD:
+		break;
+	case IB_CM_TIMEWAIT:
+		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+			goto unlock;
+
+		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
+			       cm_id_priv->private_data,
+			       cm_id_priv->private_data_len);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+		if (ib_post_send_mad(msg, NULL))
+			cm_free_msg(msg);
+		goto deref;
+	default:
+		goto unlock;
+	}
+	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
+	cm_id_priv->tid = dreq_msg->hdr.tid;
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+
+unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+deref:	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+static int cm_drep_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_drep_msg *drep_msg;
+	unsigned long flags;
+	int ret;
+
+	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
+				   drep_msg->local_comm_id);
+	if (!cm_id_priv)
+		return -EINVAL;
+
+	work->cm_event.private_data = &drep_msg->private_data;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
+	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		goto out;
+	}
+	cm_enter_timewait(cm_id_priv);
+
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+out:
+	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+int ib_send_cm_rej(struct ib_cm_id *cm_id,
+		   enum ib_cm_rej_reason reason,
+		   void *ari,
+		   u8 ari_length,
+		   const void *private_data,
+		   u8 private_data_len)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	unsigned long flags;
+	int ret;
+
+	if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
+	    (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
+		return -EINVAL;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id->state) {
+	case IB_CM_REQ_SENT:
+	case IB_CM_MRA_REQ_RCVD:
+	case IB_CM_REQ_RCVD:
+	case IB_CM_MRA_REQ_SENT:
+	case IB_CM_REP_RCVD:
+	case IB_CM_MRA_REP_SENT:
+		ret = cm_alloc_msg(cm_id_priv, &msg);
+		if (!ret)
+			cm_format_rej((struct cm_rej_msg *) msg->mad,
+				      cm_id_priv, reason, ari, ari_length,
+				      private_data, private_data_len);
+
+		cm_reset_to_idle(cm_id_priv);
+		break;
+	case IB_CM_REP_SENT:
+	case IB_CM_MRA_REP_RCVD:
+		ret = cm_alloc_msg(cm_id_priv, &msg);
+		if (!ret)
+			cm_format_rej((struct cm_rej_msg *) msg->mad,
+				      cm_id_priv, reason, ari, ari_length,
+				      private_data, private_data_len);
+
+		cm_enter_timewait(cm_id_priv);
+		break;
+	default:
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (ret)
+		goto out;
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret)
+		cm_free_msg(msg);
+
+out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_rej);
+
+static void cm_format_rej_event(struct cm_work *work)
+{
+	struct cm_rej_msg *rej_msg;
+	struct ib_cm_rej_event_param *param;
+
+	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
+	param = &work->cm_event.param.rej_rcvd;
+	param->ari = rej_msg->ari;
+	param->ari_length = cm_rej_get_reject_info_len(rej_msg);
+	param->reason = __be16_to_cpu(rej_msg->reason);
+	work->cm_event.private_data = &rej_msg->private_data;
+}
+
+static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
+{
+	struct cm_timewait_info *timewait_info;
+	struct cm_id_private *cm_id_priv;
+	unsigned long flags;
+	__be32 remote_id;
+
+	remote_id = rej_msg->local_comm_id;
+
+	if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
+		spin_lock_irqsave(&cm.lock, flags);
+		timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
+						  remote_id);
+		if (!timewait_info) {
+			spin_unlock_irqrestore(&cm.lock, flags);
+			return NULL;
+		}
+		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
+				      (timewait_info->work.local_id ^
+				       cm.random_id_operand));
+		if (cm_id_priv) {
+			if (cm_id_priv->id.remote_id == remote_id)
+				atomic_inc(&cm_id_priv->refcount);
+			else
+				cm_id_priv = NULL;
+		}
+		spin_unlock_irqrestore(&cm.lock, flags);
+	} else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
+		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
+	else
+		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
+
+	return cm_id_priv;
+}
+
+static int cm_rej_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_rej_msg *rej_msg;
+	unsigned long flags;
+	int ret;
+
+	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_rejected_id(rej_msg);
+	if (!cm_id_priv)
+		return -EINVAL;
+
+	cm_format_rej_event(work);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->id.state) {
+	case IB_CM_REQ_SENT:
+	case IB_CM_MRA_REQ_RCVD:
+	case IB_CM_REP_SENT:
+	case IB_CM_MRA_REP_RCVD:
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		/* fall through */
+	case IB_CM_REQ_RCVD:
+	case IB_CM_MRA_REQ_SENT:
+		if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
+			cm_enter_timewait(cm_id_priv);
+		else
+			cm_reset_to_idle(cm_id_priv);
+		break;
+	case IB_CM_DREQ_SENT:
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		/* fall through */
+	case IB_CM_REP_RCVD:
+	case IB_CM_MRA_REP_SENT:
+	case IB_CM_ESTABLISHED:
+		cm_enter_timewait(cm_id_priv);
+		break;
+	default:
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+out:
+	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+int ib_send_cm_mra(struct ib_cm_id *cm_id,
+		   u8 service_timeout,
+		   const void *private_data,
+		   u8 private_data_len)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	void *data;
+	unsigned long flags;
+	int ret;
+
+	if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
+		return -EINVAL;
+
+	data = cm_copy_private_data(private_data, private_data_len);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch(cm_id_priv->id.state) {
+	case IB_CM_REQ_RCVD:
+		ret = cm_alloc_msg(cm_id_priv, &msg);
+		if (ret)
+			goto error1;
+
+		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+			      CM_MSG_RESPONSE_REQ, service_timeout,
+			      private_data, private_data_len);
+		ret = ib_post_send_mad(msg, NULL);
+		if (ret)
+			goto error2;
+		cm_id->state = IB_CM_MRA_REQ_SENT;
+		break;
+	case IB_CM_REP_RCVD:
+		ret = cm_alloc_msg(cm_id_priv, &msg);
+		if (ret)
+			goto error1;
+
+		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+			      CM_MSG_RESPONSE_REP, service_timeout,
+			      private_data, private_data_len);
+		ret = ib_post_send_mad(msg, NULL);
+		if (ret)
+			goto error2;
+		cm_id->state = IB_CM_MRA_REP_SENT;
+		break;
+	case IB_CM_ESTABLISHED:
+		ret = cm_alloc_msg(cm_id_priv, &msg);
+		if (ret)
+			goto error1;
+
+		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+			      CM_MSG_RESPONSE_OTHER, service_timeout,
+			      private_data, private_data_len);
+		ret = ib_post_send_mad(msg, NULL);
+		if (ret)
+			goto error2;
+		cm_id->lap_state = IB_CM_MRA_LAP_SENT;
+		break;
+	default:
+		ret = -EINVAL;
+		goto error1;
+	}
+	cm_id_priv->service_timeout = service_timeout;
+	cm_set_private_data(cm_id_priv, data, private_data_len);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return 0;
+
+error1:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	kfree(data);
+	return ret;
+
+error2:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	kfree(data);
+	cm_free_msg(msg);
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_mra);
+
+static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
+{
+	switch (cm_mra_get_msg_mraed(mra_msg)) {
+	case CM_MSG_RESPONSE_REQ:
+		return cm_acquire_id(mra_msg->remote_comm_id, 0);
+	case CM_MSG_RESPONSE_REP:
+	case CM_MSG_RESPONSE_OTHER:
+		return cm_acquire_id(mra_msg->remote_comm_id,
+				     mra_msg->local_comm_id);
+	default:
+		return NULL;
+	}
+}
+
+static int cm_mra_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_mra_msg *mra_msg;
+	unsigned long flags;
+	int timeout, ret;
+
+	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_mraed_id(mra_msg);
+	if (!cm_id_priv)
+		return -EINVAL;
+
+	work->cm_event.private_data = &mra_msg->private_data;
+	work->cm_event.param.mra_rcvd.service_timeout =
+					cm_mra_get_service_timeout(mra_msg);
+	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
+		  cm_convert_to_ms(cm_id_priv->av.packet_life_time);
+       if (timeout > cm_convert_to_ms(max_timeout)) {
+               printk(KERN_WARNING PFX "calculated mra timeout %d > %d, "
+                      "decreasing used timeout_ms\n", timeout,
+                      cm_convert_to_ms(max_timeout));
+               timeout = cm_convert_to_ms(max_timeout);
+       }
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->id.state) {
+	case IB_CM_REQ_SENT:
+		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
+		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
+				  cm_id_priv->msg, timeout))
+			goto out;
+		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
+		break;
+	case IB_CM_REP_SENT:
+		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
+		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
+				  cm_id_priv->msg, timeout))
+			goto out;
+		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
+		break;
+	case IB_CM_ESTABLISHED:
+		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
+		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
+		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
+				  cm_id_priv->msg, timeout))
+			goto out;
+		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
+		break;
+	default:
+		goto out;
+	}
+
+	cm_id_priv->msg->context[1] = (void *) (unsigned long)
+				      cm_id_priv->id.state;
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+out:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+static void cm_format_lap(struct cm_lap_msg *lap_msg,
+			  struct cm_id_private *cm_id_priv,
+			  struct ib_sa_path_rec *alternate_path,
+			  const void *private_data,
+			  u8 private_data_len)
+{
+	cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
+			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
+	lap_msg->local_comm_id = cm_id_priv->id.local_id;
+	lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
+	cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
+	/* todo: need remote CM response timeout */
+	cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
+	lap_msg->alt_local_lid = alternate_path->slid;
+	lap_msg->alt_remote_lid = alternate_path->dlid;
+	lap_msg->alt_local_gid = alternate_path->sgid;
+	lap_msg->alt_remote_gid = alternate_path->dgid;
+	cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
+	cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
+	lap_msg->alt_hop_limit = alternate_path->hop_limit;
+	cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
+	cm_lap_set_sl(lap_msg, alternate_path->sl);
+	cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
+	cm_lap_set_local_ack_timeout(lap_msg,
+		min(31, alternate_path->packet_life_time + 1));
+
+	if (private_data && private_data_len)
+		memcpy(lap_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_lap(struct ib_cm_id *cm_id,
+		   struct ib_sa_path_rec *alternate_path,
+		   const void *private_data,
+		   u8 private_data_len)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	unsigned long flags;
+	int ret;
+
+	if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
+		return -EINVAL;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state != IB_CM_ESTABLISHED ||
+	    (cm_id->lap_state != IB_CM_LAP_UNINIT &&
+	     cm_id->lap_state != IB_CM_LAP_IDLE)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
+	if (ret)
+		goto out;
+
+	ret = cm_alloc_msg(cm_id_priv, &msg);
+	if (ret)
+		goto out;
+
+	cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
+		      alternate_path, private_data, private_data_len);
+	msg->timeout_ms = cm_id_priv->timeout_ms;
+	msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_free_msg(msg);
+		return ret;
+	}
+
+	cm_id->lap_state = IB_CM_LAP_SENT;
+	cm_id_priv->msg = msg;
+
+out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_lap);
+
+static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
+				    struct ib_sa_path_rec *path,
+				    struct cm_lap_msg *lap_msg)
+{
+	memset(path, 0, sizeof *path);
+	path->dgid = lap_msg->alt_local_gid;
+	path->sgid = lap_msg->alt_remote_gid;
+	path->dlid = lap_msg->alt_local_lid;
+	path->slid = lap_msg->alt_remote_lid;
+	path->flow_label = cm_lap_get_flow_label(lap_msg);
+	path->hop_limit = lap_msg->alt_hop_limit;
+	path->traffic_class = cm_lap_get_traffic_class(lap_msg);
+	path->reversible = 1;
+	path->pkey = cm_id_priv->pkey;
+	path->sl = cm_lap_get_sl(lap_msg);
+	path->mtu_selector = IB_SA_EQ;
+	path->mtu = cm_id_priv->path_mtu;
+	path->rate_selector = IB_SA_EQ;
+	path->rate = cm_lap_get_packet_rate(lap_msg);
+	path->packet_life_time_selector = IB_SA_EQ;
+	path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
+	path->packet_life_time -= (path->packet_life_time > 0);
+}
+
+static int cm_lap_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_lap_msg *lap_msg;
+	struct ib_cm_lap_event_param *param;
+	struct ib_mad_send_buf *msg = NULL;
+	unsigned long flags;
+	int ret;
+
+	/* todo: verify LAP request and send reject APR if invalid. */
+	lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
+				   lap_msg->local_comm_id);
+	if (!cm_id_priv)
+		return -EINVAL;
+
+	param = &work->cm_event.param.lap_rcvd;
+	param->alternate_path = &work->path[0];
+	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
+	work->cm_event.private_data = &lap_msg->private_data;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
+		goto unlock;
+
+	switch (cm_id_priv->id.lap_state) {
+	case IB_CM_LAP_UNINIT:
+	case IB_CM_LAP_IDLE:
+		break;
+	case IB_CM_MRA_LAP_SENT:
+		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+			goto unlock;
+
+		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
+			      CM_MSG_RESPONSE_OTHER,
+			      cm_id_priv->service_timeout,
+			      cm_id_priv->private_data,
+			      cm_id_priv->private_data_len);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+		if (ib_post_send_mad(msg, NULL))
+			cm_free_msg(msg);
+		goto deref;
+	default:
+		goto unlock;
+	}
+
+	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
+	cm_id_priv->tid = lap_msg->hdr.tid;
+	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+				work->mad_recv_wc->recv_buf.grh,
+				&cm_id_priv->av);
+	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+
+unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+deref:	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+static void cm_format_apr(struct cm_apr_msg *apr_msg,
+			  struct cm_id_private *cm_id_priv,
+			  enum ib_cm_apr_status status,
+			  void *info,
+			  u8 info_length,
+			  const void *private_data,
+			  u8 private_data_len)
+{
+	cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
+	apr_msg->local_comm_id = cm_id_priv->id.local_id;
+	apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
+	apr_msg->ap_status = (u8) status;
+
+	if (info && info_length) {
+		apr_msg->info_length = info_length;
+		memcpy(apr_msg->info, info, info_length);
+	}
+
+	if (private_data && private_data_len)
+		memcpy(apr_msg->private_data, private_data, private_data_len);
+}
+
+int ib_send_cm_apr(struct ib_cm_id *cm_id,
+		   enum ib_cm_apr_status status,
+		   void *info,
+		   u8 info_length,
+		   const void *private_data,
+		   u8 private_data_len)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	unsigned long flags;
+	int ret;
+
+	if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
+	    (info && info_length > IB_CM_APR_INFO_LENGTH))
+		return -EINVAL;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state != IB_CM_ESTABLISHED ||
+	    (cm_id->lap_state != IB_CM_LAP_RCVD &&
+	     cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = cm_alloc_msg(cm_id_priv, &msg);
+	if (ret)
+		goto out;
+
+	cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
+		      info, info_length, private_data, private_data_len);
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_free_msg(msg);
+		return ret;
+	}
+
+	cm_id->lap_state = IB_CM_LAP_IDLE;
+out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_apr);
+
+static int cm_apr_handler(struct cm_work *work)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_apr_msg *apr_msg;
+	unsigned long flags;
+	int ret;
+
+	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
+				   apr_msg->local_comm_id);
+	if (!cm_id_priv)
+		return -EINVAL; /* Unmatched reply. */
+
+	work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
+	work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
+	work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
+	work->cm_event.private_data = &apr_msg->private_data;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
+	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
+	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		goto out;
+	}
+	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	cm_id_priv->msg = NULL;
+
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+out:
+	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+static int cm_timewait_handler(struct cm_work *work)
+{
+	struct cm_timewait_info *timewait_info;
+	struct cm_id_private *cm_id_priv;
+	int ret;
+
+	timewait_info = (struct cm_timewait_info *)work;
+	spin_lock_irq(&cm.lock);
+	list_del(&timewait_info->list);
+	spin_unlock_irq(&cm.lock);
+
+	cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
+				   timewait_info->work.remote_id);
+	if (!cm_id_priv)
+		return -EINVAL;
+
+	spin_lock_irq(&cm_id_priv->lock);
+	if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
+	    cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
+		spin_unlock_irq(&cm_id_priv->lock);
+		goto out;
+	}
+	cm_id_priv->id.state = IB_CM_IDLE;
+	ret = atomic_inc_and_test(&cm_id_priv->work_count);
+	if (!ret)
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+	spin_unlock_irq(&cm_id_priv->lock);
+
+	if (ret)
+		cm_process_work(cm_id_priv, work);
+	else
+		cm_deref_id(cm_id_priv);
+	return 0;
+out:
+	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
+			       struct cm_id_private *cm_id_priv,
+			       struct ib_cm_sidr_req_param *param)
+{
+	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
+			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
+	sidr_req_msg->request_id = cm_id_priv->id.local_id;
+	sidr_req_msg->pkey = cpu_to_be16(param->path->pkey);
+	sidr_req_msg->service_id = param->service_id;
+
+	if (param->private_data && param->private_data_len)
+		memcpy(sidr_req_msg->private_data, param->private_data,
+		       param->private_data_len);
+}
+
+int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
+			struct ib_cm_sidr_req_param *param)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	unsigned long flags;
+	int ret;
+
+	if (!param->path || (param->private_data &&
+	     param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
+		return -EINVAL;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
+	if (ret)
+		goto out;
+
+	cm_id->service_id = param->service_id;
+	cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
+	cm_id_priv->timeout_ms = param->timeout_ms;
+       if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
+               printk(KERN_WARNING PFX "sidr req timeout_ms %d > %d, "
+                      "decreasing used timeout_ms\n", param->timeout_ms,
+                      cm_convert_to_ms(max_timeout));
+               cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+       }
+	cm_id_priv->max_cm_retries = param->max_cm_retries;
+	ret = cm_alloc_msg(cm_id_priv, &msg);
+	if (ret)
+		goto out;
+
+	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
+			   param);
+	msg->timeout_ms = cm_id_priv->timeout_ms;
+	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state == IB_CM_IDLE)
+		ret = ib_post_send_mad(msg, NULL);
+	else
+		ret = -EINVAL;
+
+	if (ret) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_free_msg(msg);
+		goto out;
+	}
+	cm_id->state = IB_CM_SIDR_REQ_SENT;
+	cm_id_priv->msg = msg;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_sidr_req);
+
+static void cm_format_sidr_req_event(struct cm_work *work,
+				     struct ib_cm_id *listen_id)
+{
+	struct cm_sidr_req_msg *sidr_req_msg;
+	struct ib_cm_sidr_req_event_param *param;
+
+	sidr_req_msg = (struct cm_sidr_req_msg *)
+				work->mad_recv_wc->recv_buf.mad;
+	param = &work->cm_event.param.sidr_req_rcvd;
+	param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
+	param->listen_id = listen_id;
+	param->port = work->port->port_num;
+	work->cm_event.private_data = &sidr_req_msg->private_data;
+}
+
+static int cm_sidr_req_handler(struct cm_work *work)
+{
+	struct ib_cm_id *cm_id;
+	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+	struct cm_sidr_req_msg *sidr_req_msg;
+	struct ib_wc *wc;
+	unsigned long flags;
+
+	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
+	if (IS_ERR(cm_id))
+		return PTR_ERR(cm_id);
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+
+	/* Record SGID/SLID and request ID for lookup. */
+	sidr_req_msg = (struct cm_sidr_req_msg *)
+				work->mad_recv_wc->recv_buf.mad;
+	wc = work->mad_recv_wc->wc;
+	cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
+	cm_id_priv->av.dgid.global.interface_id = 0;
+	cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+				work->mad_recv_wc->recv_buf.grh,
+				&cm_id_priv->av);
+	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
+	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
+	cm_id_priv->tid = sidr_req_msg->hdr.tid;
+	atomic_inc(&cm_id_priv->work_count);
+
+	spin_lock_irqsave(&cm.lock, flags);
+	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
+	if (cur_cm_id_priv) {
+		spin_unlock_irqrestore(&cm.lock, flags);
+		goto out; /* Duplicate message. */
+	}
+	cur_cm_id_priv = cm_find_listen(cm_id->device,
+					sidr_req_msg->service_id,
+					sidr_req_msg->private_data);
+	if (!cur_cm_id_priv) {
+		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+		spin_unlock_irqrestore(&cm.lock, flags);
+		/* todo: reply with no match */
+		goto out; /* No match. */
+	}
+	atomic_inc(&cur_cm_id_priv->refcount);
+	spin_unlock_irqrestore(&cm.lock, flags);
+
+	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
+	cm_id_priv->id.context = cur_cm_id_priv->id.context;
+	cm_id_priv->id.service_id = sidr_req_msg->service_id;
+	cm_id_priv->id.service_mask = __constant_cpu_to_be64(~0ULL);
+
+	cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
+	cm_process_work(cm_id_priv, work);
+	cm_deref_id(cur_cm_id_priv);
+	return 0;
+out:
+	ib_destroy_cm_id(&cm_id_priv->id);
+	return -EINVAL;
+}
+
+static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
+			       struct cm_id_private *cm_id_priv,
+			       struct ib_cm_sidr_rep_param *param)
+{
+	cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
+			  cm_id_priv->tid);
+	sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
+	sidr_rep_msg->status = param->status;
+	cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
+	sidr_rep_msg->service_id = cm_id_priv->id.service_id;
+	sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
+
+	if (param->info && param->info_length)
+		memcpy(sidr_rep_msg->info, param->info, param->info_length);
+
+	if (param->private_data && param->private_data_len)
+		memcpy(sidr_rep_msg->private_data, param->private_data,
+		       param->private_data_len);
+}
+
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+			struct ib_cm_sidr_rep_param *param)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_mad_send_buf *msg;
+	unsigned long flags;
+	int ret;
+
+	if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
+	    (param->private_data &&
+	     param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
+		return -EINVAL;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
+		ret = -EINVAL;
+		goto error;
+	}
+
+	ret = cm_alloc_msg(cm_id_priv, &msg);
+	if (ret)
+		goto error;
+
+	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
+			   param);
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		cm_free_msg(msg);
+		return ret;
+	}
+	cm_id->state = IB_CM_IDLE;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	spin_lock_irqsave(&cm.lock, flags);
+	rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+	spin_unlock_irqrestore(&cm.lock, flags);
+	return 0;
+
+error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(ib_send_cm_sidr_rep);
+
+static void cm_format_sidr_rep_event(struct cm_work *work)
+{
+	struct cm_sidr_rep_msg *sidr_rep_msg;
+	struct ib_cm_sidr_rep_event_param *param;
+
+	sidr_rep_msg = (struct cm_sidr_rep_msg *)
+				work->mad_recv_wc->recv_buf.mad;
+	param = &work->cm_event.param.sidr_rep_rcvd;
+	param->status = sidr_rep_msg->status;
+	param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
+	param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
+	param->info = &sidr_rep_msg->info;
+	param->info_len = sidr_rep_msg->info_length;
+	work->cm_event.private_data = &sidr_rep_msg->private_data;
+}
+
+static int cm_sidr_rep_handler(struct cm_work *work)
+{
+	struct cm_sidr_rep_msg *sidr_rep_msg;
+	struct cm_id_private *cm_id_priv;
+	unsigned long flags;
+
+	sidr_rep_msg = (struct cm_sidr_rep_msg *)
+				work->mad_recv_wc->recv_buf.mad;
+	cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
+	if (!cm_id_priv)
+		return -EINVAL; /* Unmatched reply. */
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		goto out;
+	}
+	cm_id_priv->id.state = IB_CM_IDLE;
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	cm_format_sidr_rep_event(work);
+	cm_process_work(cm_id_priv, work);
+	return 0;
+out:
+	cm_deref_id(cm_id_priv);
+	return -EINVAL;
+}
+
+static void cm_process_send_error(struct ib_mad_send_buf *msg,
+				  enum ib_wc_status wc_status)
+{
+	struct cm_id_private *cm_id_priv;
+	struct ib_cm_event cm_event;
+	enum ib_cm_state state;
+	unsigned long flags;
+	int ret;
+
+	memset(&cm_event, 0, sizeof cm_event);
+	cm_id_priv = msg->context[0];
+
+	/* Discard old sends or ones without a response. */
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	state = (enum ib_cm_state) (unsigned long) msg->context[1];
+	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
+		goto discard;
+
+	switch (state) {
+	case IB_CM_REQ_SENT:
+	case IB_CM_MRA_REQ_RCVD:
+		cm_reset_to_idle(cm_id_priv);
+		cm_event.event = IB_CM_REQ_ERROR;
+		break;
+	case IB_CM_REP_SENT:
+	case IB_CM_MRA_REP_RCVD:
+		cm_reset_to_idle(cm_id_priv);
+		cm_event.event = IB_CM_REP_ERROR;
+		break;
+	case IB_CM_DREQ_SENT:
+		cm_enter_timewait(cm_id_priv);
+		cm_event.event = IB_CM_DREQ_ERROR;
+		break;
+	case IB_CM_SIDR_REQ_SENT:
+		cm_id_priv->id.state = IB_CM_IDLE;
+		cm_event.event = IB_CM_SIDR_REQ_ERROR;
+		break;
+	default:
+		goto discard;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	cm_event.param.send_status = wc_status;
+
+	/* No other events can occur on the cm_id at this point. */
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
+	cm_free_msg(msg);
+	if (ret)
+		ib_destroy_cm_id(&cm_id_priv->id);
+	return;
+discard:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	cm_free_msg(msg);
+}
+
+static void cm_send_handler(struct ib_mad_agent *mad_agent,
+			    struct ib_mad_send_wc *mad_send_wc)
+{
+	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+
+	switch (mad_send_wc->status) {
+	case IB_WC_SUCCESS:
+	case IB_WC_WR_FLUSH_ERR:
+		cm_free_msg(msg);
+		break;
+	default:
+		if (msg->context[0] && msg->context[1])
+			cm_process_send_error(msg, mad_send_wc->status);
+		else
+			cm_free_msg(msg);
+		break;
+	}
+}
+
+static void cm_work_handler(struct work_struct *_work)
+{
+	struct cm_work *work = container_of(_work, struct cm_work, work.work);
+	int ret;
+
+	switch (work->cm_event.event) {
+	case IB_CM_REQ_RECEIVED:
+		ret = cm_req_handler(work);
+		break;
+	case IB_CM_MRA_RECEIVED:
+		ret = cm_mra_handler(work);
+		break;
+	case IB_CM_REJ_RECEIVED:
+		ret = cm_rej_handler(work);
+		break;
+	case IB_CM_REP_RECEIVED:
+		ret = cm_rep_handler(work);
+		break;
+	case IB_CM_RTU_RECEIVED:
+		ret = cm_rtu_handler(work);
+		break;
+	case IB_CM_USER_ESTABLISHED:
+		ret = cm_establish_handler(work);
+		break;
+	case IB_CM_DREQ_RECEIVED:
+		ret = cm_dreq_handler(work);
+		break;
+	case IB_CM_DREP_RECEIVED:
+		ret = cm_drep_handler(work);
+		break;
+	case IB_CM_SIDR_REQ_RECEIVED:
+		ret = cm_sidr_req_handler(work);
+		break;
+	case IB_CM_SIDR_REP_RECEIVED:
+		ret = cm_sidr_rep_handler(work);
+		break;
+	case IB_CM_LAP_RECEIVED:
+		ret = cm_lap_handler(work);
+		break;
+	case IB_CM_APR_RECEIVED:
+		ret = cm_apr_handler(work);
+		break;
+	case IB_CM_TIMEWAIT_EXIT:
+		ret = cm_timewait_handler(work);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	if (ret)
+		cm_free_work(work);
+}
+
+static int cm_establish(struct ib_cm_id *cm_id)
+{
+	struct cm_id_private *cm_id_priv;
+	struct cm_work *work;
+	unsigned long flags;
+	int ret = 0;
+
+	work = kmalloc(sizeof *work, GFP_ATOMIC);
+	if (!work)
+		return -ENOMEM;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id->state)
+	{
+	case IB_CM_REP_SENT:
+	case IB_CM_MRA_REP_RCVD:
+		cm_id->state = IB_CM_ESTABLISHED;
+		break;
+	case IB_CM_ESTABLISHED:
+		ret = -EISCONN;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (ret) {
+		kfree(work);
+		goto out;
+	}
+
+	/*
+	 * The CM worker thread may try to destroy the cm_id before it
+	 * can execute this work item.  To prevent potential deadlock,
+	 * we need to find the cm_id once we're in the context of the
+	 * worker thread, rather than holding a reference on it.
+	 */
+	INIT_DELAYED_WORK(&work->work, cm_work_handler);
+	work->local_id = cm_id->local_id;
+	work->remote_id = cm_id->remote_id;
+	work->mad_recv_wc = NULL;
+	work->cm_event.event = IB_CM_USER_ESTABLISHED;
+	queue_delayed_work(cm.wq, &work->work, 0);
+out:
+	return ret;
+}
+
+static int cm_migrate(struct ib_cm_id *cm_id)
+{
+	struct cm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id->state == IB_CM_ESTABLISHED &&
+	    (cm_id->lap_state == IB_CM_LAP_UNINIT ||
+	     cm_id->lap_state == IB_CM_LAP_IDLE)) {
+		cm_id->lap_state = IB_CM_LAP_IDLE;
+		cm_id_priv->av = cm_id_priv->alt_av;
+	} else
+		ret = -EINVAL;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+
+int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
+{
+	int ret;
+
+	switch (event) {
+	case IB_EVENT_COMM_EST:
+		ret = cm_establish(cm_id);
+		break;
+	case IB_EVENT_PATH_MIG:
+		ret = cm_migrate(cm_id);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(ib_cm_notify);
+
+static void cm_recv_handler(struct ib_mad_agent *mad_agent,
+			    struct ib_mad_recv_wc *mad_recv_wc)
+{
+	struct cm_work *work;
+	enum ib_cm_event_type event;
+	int paths = 0;
+
+	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
+	case CM_REQ_ATTR_ID:
+		paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
+						    alt_local_lid != 0);
+		event = IB_CM_REQ_RECEIVED;
+		break;
+	case CM_MRA_ATTR_ID:
+		event = IB_CM_MRA_RECEIVED;
+		break;
+	case CM_REJ_ATTR_ID:
+		event = IB_CM_REJ_RECEIVED;
+		break;
+	case CM_REP_ATTR_ID:
+		event = IB_CM_REP_RECEIVED;
+		break;
+	case CM_RTU_ATTR_ID:
+		event = IB_CM_RTU_RECEIVED;
+		break;
+	case CM_DREQ_ATTR_ID:
+		event = IB_CM_DREQ_RECEIVED;
+		break;
+	case CM_DREP_ATTR_ID:
+		event = IB_CM_DREP_RECEIVED;
+		break;
+	case CM_SIDR_REQ_ATTR_ID:
+		event = IB_CM_SIDR_REQ_RECEIVED;
+		break;
+	case CM_SIDR_REP_ATTR_ID:
+		event = IB_CM_SIDR_REP_RECEIVED;
+		break;
+	case CM_LAP_ATTR_ID:
+		paths = 1;
+		event = IB_CM_LAP_RECEIVED;
+		break;
+	case CM_APR_ATTR_ID:
+		event = IB_CM_APR_RECEIVED;
+		break;
+	default:
+		ib_free_recv_mad(mad_recv_wc);
+		return;
+	}
+
+	work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
+		       GFP_KERNEL);
+	if (!work) {
+		ib_free_recv_mad(mad_recv_wc);
+		return;
+	}
+
+	INIT_DELAYED_WORK(&work->work, cm_work_handler);
+	work->cm_event.event = event;
+	work->mad_recv_wc = mad_recv_wc;
+	work->port = (struct cm_port *)mad_agent->context;
+	queue_delayed_work(cm.wq, &work->work, 0);
+}
+
+static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
+				struct ib_qp_attr *qp_attr,
+				int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->id.state) {
+	case IB_CM_REQ_SENT:
+	case IB_CM_MRA_REQ_RCVD:
+	case IB_CM_REQ_RCVD:
+	case IB_CM_MRA_REQ_SENT:
+	case IB_CM_REP_RCVD:
+	case IB_CM_MRA_REP_SENT:
+	case IB_CM_REP_SENT:
+	case IB_CM_MRA_REP_RCVD:
+	case IB_CM_ESTABLISHED:
+		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
+				IB_QP_PKEY_INDEX | IB_QP_PORT;
+		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
+		if (cm_id_priv->responder_resources)
+			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
+						    IB_ACCESS_REMOTE_ATOMIC;
+		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
+		qp_attr->port_num = cm_id_priv->av.port->port_num;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
+			       struct ib_qp_attr *qp_attr,
+			       int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->id.state) {
+	case IB_CM_REQ_RCVD:
+	case IB_CM_MRA_REQ_SENT:
+	case IB_CM_REP_RCVD:
+	case IB_CM_MRA_REP_SENT:
+	case IB_CM_REP_SENT:
+	case IB_CM_MRA_REP_RCVD:
+	case IB_CM_ESTABLISHED:
+		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
+				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
+		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+		qp_attr->path_mtu = cm_id_priv->path_mtu;
+		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
+		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
+		if (cm_id_priv->qp_type == IB_QPT_RC) {
+			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
+					 IB_QP_MIN_RNR_TIMER;
+			qp_attr->max_dest_rd_atomic =
+					cm_id_priv->responder_resources;
+			qp_attr->min_rnr_timer = 0;
+		}
+		if (cm_id_priv->alt_av.ah_attr.dlid) {
+			*qp_attr_mask |= IB_QP_ALT_PATH;
+			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
+			qp_attr->alt_timeout =
+					cm_id_priv->alt_av.packet_life_time + 1;
+			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
+		}
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
+			       struct ib_qp_attr *qp_attr,
+			       int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->id.state) {
+	/* Allow transition to RTS before sending REP */
+	case IB_CM_REQ_RCVD:
+	case IB_CM_MRA_REQ_SENT:
+
+	case IB_CM_REP_RCVD:
+	case IB_CM_MRA_REP_SENT:
+	case IB_CM_REP_SENT:
+	case IB_CM_MRA_REP_RCVD:
+	case IB_CM_ESTABLISHED:
+		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
+			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
+			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
+			if (cm_id_priv->qp_type == IB_QPT_RC) {
+				*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
+						 IB_QP_RNR_RETRY |
+						 IB_QP_MAX_QP_RD_ATOMIC;
+				qp_attr->timeout =
+					cm_id_priv->av.packet_life_time + 1;
+				qp_attr->retry_cnt = cm_id_priv->retry_count;
+				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
+				qp_attr->max_rd_atomic =
+					cm_id_priv->initiator_depth;
+			}
+			if (cm_id_priv->alt_av.ah_attr.dlid) {
+				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
+				qp_attr->path_mig_state = IB_MIG_REARM;
+			}
+		} else {
+			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
+			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
+			qp_attr->alt_timeout =
+				cm_id_priv->alt_av.packet_life_time + 1;
+			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
+			qp_attr->path_mig_state = IB_MIG_REARM;
+		}
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
+		       struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask)
+{
+	struct cm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+	switch (qp_attr->qp_state) {
+	case IB_QPS_INIT:
+		ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
+		break;
+	case IB_QPS_RTR:
+		ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
+		break;
+	case IB_QPS_RTS:
+		ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(ib_cm_init_qp_attr);
+
+static void cm_add_one(struct ib_device *device)
+{
+	struct cm_device *cm_dev;
+	struct cm_port *port;
+	struct ib_mad_reg_req reg_req = {
+		.mgmt_class = IB_MGMT_CLASS_CM,
+		.mgmt_class_version = IB_CM_CLASS_VERSION
+	};
+	struct ib_port_modify port_modify = {
+		.set_port_cap_mask = IB_PORT_CM_SUP
+	};
+	unsigned long flags;
+	int ret;
+	u8 i;
+
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
+			 device->phys_port_cnt, GFP_KERNEL);
+	if (!cm_dev)
+		return;
+
+	cm_dev->device = device;
+	cm_dev->ca_guid = device->node_guid;
+
+	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
+	for (i = 1; i <= device->phys_port_cnt; i++) {
+		port = &cm_dev->port[i-1];
+		port->cm_dev = cm_dev;
+		port->port_num = i;
+		port->mad_agent = ib_register_mad_agent(device, i,
+							IB_QPT_GSI,
+							&reg_req,
+							0,
+							cm_send_handler,
+							cm_recv_handler,
+							port);
+		if (IS_ERR(port->mad_agent))
+			goto error1;
+
+		ret = ib_modify_port(device, i, 0, &port_modify);
+		if (ret)
+			goto error2;
+	}
+	ib_set_client_data(device, &cm_client, cm_dev);
+
+	write_lock_irqsave(&cm.device_lock, flags);
+	list_add_tail(&cm_dev->list, &cm.device_list);
+	write_unlock_irqrestore(&cm.device_lock, flags);
+	return;
+
+error2:
+	ib_unregister_mad_agent(port->mad_agent);
+error1:
+	port_modify.set_port_cap_mask = 0;
+	port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
+	while (--i) {
+		port = &cm_dev->port[i-1];
+		ib_modify_port(device, port->port_num, 0, &port_modify);
+		ib_unregister_mad_agent(port->mad_agent);
+	}
+	kfree(cm_dev);
+}
+
+static void cm_remove_one(struct ib_device *device)
+{
+	struct cm_device *cm_dev;
+	struct cm_port *port;
+	struct ib_port_modify port_modify = {
+		.clr_port_cap_mask = IB_PORT_CM_SUP
+	};
+	unsigned long flags;
+	int i;
+
+	cm_dev = ib_get_client_data(device, &cm_client);
+	if (!cm_dev)
+		return;
+
+	write_lock_irqsave(&cm.device_lock, flags);
+	list_del(&cm_dev->list);
+	write_unlock_irqrestore(&cm.device_lock, flags);
+
+	for (i = 1; i <= device->phys_port_cnt; i++) {
+		port = &cm_dev->port[i-1];
+		ib_modify_port(device, port->port_num, 0, &port_modify);
+		ib_unregister_mad_agent(port->mad_agent);
+	}
+	kfree(cm_dev);
+}
+
+static int __init ib_cm_init(void)
+{
+	int ret;
+
+	memset(&cm, 0, sizeof cm);
+	INIT_LIST_HEAD(&cm.device_list);
+	rwlock_init(&cm.device_lock);
+	spin_lock_init(&cm.lock);
+	cm.listen_service_table = RB_ROOT;
+	cm.listen_service_id = __constant_be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
+	cm.remote_id_table = RB_ROOT;
+	cm.remote_qp_table = RB_ROOT;
+	cm.remote_sidr_table = RB_ROOT;
+	idr_init(&cm.local_id_table);
+	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
+	idr_pre_get(&cm.local_id_table, GFP_KERNEL);
+	INIT_LIST_HEAD(&cm.timewait_list);
+
+	cm.wq = create_workqueue("ib_cm");
+	if (!cm.wq)
+		return -ENOMEM;
+
+	ret = ib_register_client(&cm_client);
+	if (ret)
+		goto error;
+
+	return 0;
+error:
+	destroy_workqueue(cm.wq);
+	return ret;
+}
+
+static void __exit ib_cm_cleanup(void)
+{
+	struct cm_timewait_info *timewait_info, *tmp;
+
+	spin_lock_irq(&cm.lock);
+	list_for_each_entry(timewait_info, &cm.timewait_list, list)
+		cancel_delayed_work(&timewait_info->work.work);
+	spin_unlock_irq(&cm.lock);
+
+	destroy_workqueue(cm.wq);
+
+	list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
+		list_del(&timewait_info->list);
+		kfree(timewait_info);
+	}
+
+	ib_unregister_client(&cm_client);
+	idr_destroy(&cm.local_id_table);
+}
+
+module_init(ib_cm_init);
+module_exit(ib_cm_cleanup);
+
--- linux-2.6.18.noarch/drivers/infiniband/core/device.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/device.c
@@ -148,6 +148,18 @@ static int alloc_name(char *name)
 	return 0;
 }
 
+static inline int start_port(struct ib_device *device)
+{
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
+}
+
+
+static inline int end_port(struct ib_device *device)
+{
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+		0 : device->phys_port_cnt;
+}
+
 /**
  * ib_alloc_device - allocate an IB device struct
  * @size:size of structure to allocate
@@ -207,6 +219,55 @@ static int add_client_context(struct ib_
 	return 0;
 }
 
+/* read the lengths of pkey,gid tables on each port */
+static int read_port_table_lengths(struct ib_device *device)
+{
+	struct ib_port_attr *tprops = NULL;
+	int num_ports, ret = -ENOMEM;
+	u8 port_index;
+
+	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
+	if (!tprops)
+		goto out;
+
+	num_ports = end_port(device) - start_port(device) + 1;
+
+	device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len *
+						num_ports, GFP_KERNEL);
+	if (!device->pkey_tbl_len)
+		goto out;
+
+	device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len *
+						num_ports, GFP_KERNEL);
+	if (!device->gid_tbl_len)
+		goto err1;
+
+	for (port_index = 0; port_index < num_ports; ++port_index) {
+		ret = ib_query_port(device, port_index + start_port(device),
+					tprops);
+		if (ret)
+			goto err2;
+		device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
+		device->gid_tbl_len[port_index] = tprops->gid_tbl_len;
+	}
+
+	ret = 0;
+	goto out;
+err2:
+	kfree(device->gid_tbl_len);
+err1:
+	kfree(device->pkey_tbl_len);
+out:
+	kfree(tprops);
+	return ret;
+}
+
+static inline void free_port_table_lengths(struct ib_device *device)
+{
+	kfree(device->gid_tbl_len);
+	kfree(device->pkey_tbl_len);
+}
+
 /**
  * ib_register_device - Register an IB device with IB core
  * @device:Device to register
@@ -238,6 +299,13 @@ int ib_register_device(struct ib_device 
 	spin_lock_init(&device->event_handler_lock);
 	spin_lock_init(&device->client_data_lock);
 
+	ret = read_port_table_lengths(device);
+	if (ret) {
+		printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n",
+		       device->name);
+		goto out;
+	}
+
 	ret = ib_device_register_sysfs(device);
 	if (ret) {
 		printk(KERN_WARNING "Couldn't register device %s with driver model\n",
@@ -283,6 +351,8 @@ void ib_unregister_device(struct ib_devi
 
 	list_del(&device->core_list);
 
+	free_port_table_lengths(device);
+
 	mutex_unlock(&device_mutex);
 
 	spin_lock_irqsave(&device->client_data_lock, flags);
@@ -385,7 +455,7 @@ void *ib_get_client_data(struct ib_devic
 EXPORT_SYMBOL(ib_get_client_data);
 
 /**
- * ib_set_client_data - Get IB client context
+ * ib_set_client_data - Set IB client context
  * @device:Device to set context for
  * @client:Client to set context for
  * @data:Context to set
@@ -505,7 +575,7 @@ int ib_query_port(struct ib_device *devi
 		  u8 port_num,
 		  struct ib_port_attr *port_attr)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
@@ -580,7 +650,7 @@ int ib_modify_port(struct ib_device *dev
 		   u8 port_num, int port_modify_mask,
 		   struct ib_port_modify *port_modify)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
@@ -591,6 +661,74 @@ int ib_modify_port(struct ib_device *dev
 }
 EXPORT_SYMBOL(ib_modify_port);
 
+/**
+ * ib_find_gid - Returns the port number and GID table index where
+ *   a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the GID table where the GID was found.  This
+ *   parameter may be NULL.
+ */
+int ib_find_gid(struct ib_device *device, union ib_gid *gid,
+			u8 *port_num, u16 *index)
+{
+	union ib_gid tmp_gid;
+	int ret, port, i, tbl_len;
+
+	for (port = start_port(device); port <= end_port(device); ++port) {
+		tbl_len = device->gid_tbl_len[port - start_port(device)];
+		for (i = 0; i < tbl_len; ++i) {
+			ret = ib_query_gid(device, port, i, &tmp_gid);
+			if (ret)
+				goto out;
+			if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
+				*port_num = port;
+				*index = i;
+				ret = 0;
+				goto out;
+			}
+		}
+	}
+	ret = -ENOENT;
+out:
+	return ret;
+}
+EXPORT_SYMBOL(ib_find_gid);
+
+/**
+ * ib_find_pkey - Returns the PKey table index where a specified
+ *   PKey value occurs.
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the PKey table where the PKey was found.
+ */
+int ib_find_pkey(struct ib_device *device,
+			u8 port_num, u16 pkey, u16 *index)
+{
+	int ret, i, tbl_len;
+	u16 tmp_pkey;
+
+	tbl_len = device->pkey_tbl_len[port_num - start_port(device)];
+	for (i = 0; i < tbl_len; ++i) {
+		ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
+		if (ret)
+			goto out;
+
+		if (pkey == tmp_pkey) {
+			*index = i;
+			ret = 0;
+			goto out;
+		}
+	}
+	ret = -ENOENT;
+
+out:
+	return ret;
+}
+EXPORT_SYMBOL(ib_find_pkey);
+
 static int __init ib_core_init(void)
 {
 	int ret;
--- linux-2.6.18.noarch/drivers/infiniband/core/fmr_pool.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/fmr_pool.c
@@ -394,20 +394,12 @@ EXPORT_SYMBOL(ib_destroy_fmr_pool);
  */
 int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
 {
-	int serial;
-
-	atomic_inc(&pool->req_ser);
-	/*
-	 * It's OK if someone else bumps req_ser again here -- we'll
-	 * just wait a little longer.
-	 */
-	serial = atomic_read(&pool->req_ser);
+	int serial = atomic_inc_return(&pool->req_ser);
 
 	wake_up_process(pool->thread);
 
 	if (wait_event_interruptible(pool->force_wait,
-				     atomic_read(&pool->flush_ser) -
-				     atomic_read(&pool->req_ser) >= 0))
+				     atomic_read(&pool->flush_ser) - serial >= 0))
 		return -EINTR;
 
 	return 0;
--- linux-2.6.18.noarch/drivers/infiniband/core/iwcm.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/iwcm.c
@@ -0,0 +1,1020 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_addr.h>
+
+#include "iwcm.h"
+
+MODULE_AUTHOR("Tom Tucker");
+MODULE_DESCRIPTION("iWARP CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct workqueue_struct *iwcm_wq;
+struct iwcm_work {
+	struct work_struct work;
+	struct iwcm_id_private *cm_id;
+	struct list_head list;
+	struct iw_cm_event event;
+	struct list_head free_list;
+};
+
+/*
+ * The following services provide a mechanism for pre-allocating iwcm_work
+ * elements.  The design pre-allocates them  based on the cm_id type:
+ *	LISTENING IDS: 	Get enough elements preallocated to handle the
+ *			listen backlog.
+ *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
+ *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
+ *
+ * Allocating them in connect and listen avoids having to deal
+ * with allocation failures on the event upcall from the provider (which
+ * is called in the interrupt context).
+ *
+ * One exception is when creating the cm_id for incoming connection requests.
+ * There are two cases:
+ * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
+ *    the backlog is exceeded, then no more connection request events will
+ *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
+ *    to the provider to reject the connection request.
+ * 2) in the connection request workqueue handler, cm_conn_req_handler().
+ *    If work elements cannot be allocated for the new connect request cm_id,
+ *    then IWCM will call the provider reject method.  This is ok since
+ *    cm_conn_req_handler() runs in the workqueue thread context.
+ */
+
+static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
+{
+	struct iwcm_work *work;
+
+	if (list_empty(&cm_id_priv->work_free_list))
+		return NULL;
+	work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
+			  free_list);
+	list_del_init(&work->free_list);
+	return work;
+}
+
+static void put_work(struct iwcm_work *work)
+{
+	list_add(&work->free_list, &work->cm_id->work_free_list);
+}
+
+static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
+{
+	struct list_head *e, *tmp;
+
+	list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+		kfree(list_entry(e, struct iwcm_work, free_list));
+}
+
+static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
+{
+	struct iwcm_work *work;
+
+	BUG_ON(!list_empty(&cm_id_priv->work_free_list));
+	while (count--) {
+		work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
+		if (!work) {
+			dealloc_work_entries(cm_id_priv);
+			return -ENOMEM;
+		}
+		work->cm_id = cm_id_priv;
+		INIT_LIST_HEAD(&work->list);
+		put_work(work);
+	}
+	return 0;
+}
+
+/*
+ * Save private data from incoming connection requests to
+ * iw_cm_event, so the low level driver doesn't have to. Adjust
+ * the event ptr to point to the local copy.
+ */
+static int copy_private_data(struct iw_cm_event *event)
+{
+	void *p;
+
+	p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+	event->private_data = p;
+	return 0;
+}
+
+/*
+ * Release a reference on cm_id. If the last reference is being
+ * released, enable the waiting thread (in iw_destroy_cm_id) to
+ * get woken up, and return 1 if a thread is already waiting.
+ */
+static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+	int ret = 0;
+
+	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+		BUG_ON(!list_empty(&cm_id_priv->work_list));
+		if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
+			BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
+			BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
+					&cm_id_priv->flags));
+			ret = 1;
+		}
+		complete(&cm_id_priv->destroy_comp);
+	}
+
+	return ret;
+}
+
+static void add_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	atomic_inc(&cm_id_priv->refcount);
+}
+
+static void rem_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	iwcm_deref_id(cm_id_priv);
+}
+
+static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
+
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 iw_cm_handler cm_handler,
+				 void *context)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
+	if (!cm_id_priv)
+		return ERR_PTR(-ENOMEM);
+
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	cm_id_priv->id.device = device;
+	cm_id_priv->id.cm_handler = cm_handler;
+	cm_id_priv->id.context = context;
+	cm_id_priv->id.event_handler = cm_event_handler;
+	cm_id_priv->id.add_ref = add_ref;
+	cm_id_priv->id.rem_ref = rem_ref;
+	spin_lock_init(&cm_id_priv->lock);
+	atomic_set(&cm_id_priv->refcount, 1);
+	init_waitqueue_head(&cm_id_priv->connect_wait);
+	init_completion(&cm_id_priv->destroy_comp);
+	INIT_LIST_HEAD(&cm_id_priv->work_list);
+	INIT_LIST_HEAD(&cm_id_priv->work_free_list);
+
+	return &cm_id_priv->id;
+}
+EXPORT_SYMBOL(iw_create_cm_id);
+
+
+static int iwcm_modify_qp_err(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	if (!qp)
+		return -EINVAL;
+
+	qp_attr.qp_state = IB_QPS_ERR;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * This is really the RDMAC CLOSING state. It is most similar to the
+ * IB SQD QP state.
+ */
+static int iwcm_modify_qp_sqd(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	BUG_ON(qp == NULL);
+	qp_attr.qp_state = IB_QPS_SQD;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * Block if a passive or active connection is currently being processed. Then
+ * process the event as follows:
+ * - If we are ESTABLISHED, move to CLOSING and modify the QP state
+ *   based on the abrupt flag
+ * - If the connection is already in the CLOSING or IDLE state, the peer is
+ *   disconnecting concurrently with us and we've already seen the
+ *   DISCONNECT event -- ignore the request and return 0
+ * - Disconnect on a listening endpoint returns -EINVAL
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+	struct ib_qp *qp = NULL;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/* Wait if we're currently in a connect or accept downcall */
+	wait_event(cm_id_priv->connect_wait,
+		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+
+		/* QP could be <nul> for user-mode client */
+		if (cm_id_priv->qp)
+			qp = cm_id_priv->qp;
+		else
+			ret = -EINVAL;
+		break;
+	case IW_CM_STATE_LISTEN:
+		ret = -EINVAL;
+		break;
+	case IW_CM_STATE_CLOSING:
+		/* remote peer closed first */
+	case IW_CM_STATE_IDLE:
+		/* accept or connect returned !0 */
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called disconnect before/without calling accept after
+		 * connect_request event delivered.
+		 */
+		break;
+	case IW_CM_STATE_CONN_SENT:
+		/* Can only get here if wait above fails */
+	default:
+		BUG();
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (qp) {
+		if (abrupt)
+			ret = iwcm_modify_qp_err(qp);
+		else
+			ret = iwcm_modify_qp_sqd(qp);
+
+		/*
+		 * If both sides are disconnecting the QP could
+		 * already be in ERR or SQD states
+		 */
+		ret = 0;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_disconnect);
+
+/*
+ * CM_ID <-- DESTROYING
+ *
+ * Clean up all resources associated with the connection and release
+ * the initial reference taken by iw_create_cm_id.
+ */
+static void destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/*
+	 * Wait if we're currently in a connect or accept downcall. A
+	 * listening endpoint should never block here.
+	 */
+	wait_event(cm_id_priv->connect_wait,
+		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_LISTEN:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		/* destroy the listening endpoint */
+		ret = cm_id->device->iwcm->destroy_listen(cm_id);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		/* Abrupt close of the connection */
+		(void)iwcm_modify_qp_err(cm_id_priv->qp);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called destroy before/without calling accept after
+		 * receiving connection request event notification.
+		 */
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_DESTROYING:
+	default:
+		BUG();
+		break;
+	}
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	(void)iwcm_deref_id(cm_id_priv);
+}
+
+/*
+ * This function is only called by the application thread and cannot
+ * be called by the event thread. The function will wait for all
+ * references to be released on the cm_id and then kfree the cm_id
+ * object.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
+
+	destroy_cm_id(cm_id);
+
+	wait_for_completion(&cm_id_priv->destroy_comp);
+
+	dealloc_work_entries(cm_id_priv);
+
+	kfree(cm_id_priv);
+}
+EXPORT_SYMBOL(iw_destroy_cm_id);
+
+/*
+ * CM_ID <-- LISTEN
+ *
+ * Start listening for connect requests. Generates one CONNECT_REQUEST
+ * event for each inbound connect request.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, backlog);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+		cm_id_priv->state = IW_CM_STATE_LISTEN;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+		if (ret)
+			cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_listen);
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * Rejects an inbound connection request. No events are generated.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id,
+		 const void *private_data,
+		 u8 private_data_len)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->reject(cm_id, private_data,
+					  private_data_len);
+
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_reject);
+
+/*
+ * CM_ID <-- ESTABLISHED
+ *
+ * Accepts an inbound connection request and generates an ESTABLISHED
+ * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
+ * until the ESTABLISHED event is received from the provider.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id,
+		 struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	struct ib_qp *qp;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		return -EINVAL;
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+	if (ret) {
+		/* An error on accept precludes provider events */
+		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_accept);
+
+/*
+ * Active Side: CM_ID <-- CONN_SENT
+ *
+ * If successful, results in the generation of a CONNECT_REPLY
+ * event. iw_cm_disconnect and iw_cm_destroy will block until the
+ * CONNECT_REPLY event is received from the provider.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+	unsigned long flags;
+	struct ib_qp *qp;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, 4);
+	if (ret)
+		return ret;
+
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		return -EINVAL;
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+	if (ret) {
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_connect);
+
+/*
+ * Passive Side: new CM_ID <-- CONN_RECV
+ *
+ * Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent.
+ *
+ * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and
+ * device. These are copied when the device is cloned. The event
+ * contains the new four tuple.
+ *
+ * An error on the child should not affect the parent, so this
+ * function does not return a value.
+ */
+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
+				struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	struct iw_cm_id *cm_id;
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	/*
+	 * The provider should never generate a connection request
+	 * event with a bad status.
+	 */
+	BUG_ON(iw_event->status);
+
+	/*
+	 * We could be destroying the listening id. If so, ignore this
+	 * upcall.
+	 */
+	spin_lock_irqsave(&listen_id_priv->lock, flags);
+	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+		goto out;
+	}
+	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
+	cm_id = iw_create_cm_id(listen_id_priv->id.device,
+				listen_id_priv->id.cm_handler,
+				listen_id_priv->id.context);
+	/* If the cm_id could not be created, ignore the request */
+	if (IS_ERR(cm_id))
+		goto out;
+
+	cm_id->provider_data = iw_event->provider_data;
+	cm_id->local_addr = iw_event->local_addr;
+	cm_id->remote_addr = iw_event->remote_addr;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+
+	ret = alloc_work_entries(cm_id_priv, 3);
+	if (ret) {
+		iw_cm_reject(cm_id, NULL, 0);
+		iw_destroy_cm_id(cm_id);
+		goto out;
+	}
+
+	/* Call the client CM handler */
+	ret = cm_id->cm_handler(cm_id, iw_event);
+	if (ret) {
+		set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+		destroy_cm_id(cm_id);
+		if (atomic_read(&cm_id_priv->refcount)==0)
+			kfree(cm_id);
+	}
+
+out:
+	if (iw_event->private_data_len)
+		kfree(iw_event->private_data);
+}
+
+/*
+ * Passive Side: CM_ID <-- ESTABLISHED
+ *
+ * The provider generated an ESTABLISHED event which means that
+ * the MPA negotion has completed successfully and we are now in MPA
+ * FPDU mode.
+ *
+ * This event can only be received in the CONN_RECV state. If the
+ * remote peer closed, the ESTABLISHED event would be received followed
+ * by the CLOSE event. If the app closes, it will block until we wake
+ * it up after processing this event.
+ */
+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	/*
+	 * We clear the CONNECT_WAIT bit here to allow the callback
+	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
+	 * from a callback handler is not allowed.
+	 */
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- ESTABLISHED
+ *
+ * The app has called connect and is waiting for the established event to
+ * post it's requests to the server. This event will wake up anyone
+ * blocked in iw_cm_disconnect or iw_destroy_id.
+ */
+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	/*
+	 * Clear the connect wait bit so a callback function calling
+	 * iw_cm_disconnect will not wait and deadlock this thread
+	 */
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+	if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+		cm_id_priv->id.local_addr = iw_event->local_addr;
+		cm_id_priv->id.remote_addr = iw_event->remote_addr;
+		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	} else {
+		/* REJECTED or RESET */
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+
+	if (iw_event->private_data_len)
+		kfree(iw_event->private_data);
+
+	/* Wake up waiters on connect complete */
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * If in the ESTABLISHED state, move to CLOSING.
+ */
+static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * If in the ESTBLISHED or CLOSING states, the QP will have have been
+ * moved by the provider to the ERR state. Disassociate the CM_ID from
+ * the QP,  move to IDLE, and remove the 'connected' reference.
+ *
+ * If in some other state, the cm_id was destroyed asynchronously.
+ * This is the last reference that will result in waking up
+ * the app thread blocked in iw_destroy_cm_id.
+ */
+static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_DESTROYING:
+		break;
+	default:
+		BUG();
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+
+static int process_event(struct iwcm_id_private *cm_id_priv,
+			 struct iw_cm_event *iw_event)
+{
+	int ret = 0;
+
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CONNECT_REQUEST:
+		cm_conn_req_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		ret = cm_conn_est_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_DISCONNECT:
+		cm_disconnect_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CLOSE:
+		ret = cm_close_handler(cm_id_priv, iw_event);
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
+/*
+ * Process events on the work_list for the cm_id. If the callback
+ * function requests that the cm_id be deleted, a flag is set in the
+ * cm_id flags to indicate that when the last reference is
+ * removed, the cm_id is to be destroyed. This is necessary to
+ * distinguish between an object that will be destroyed by the app
+ * thread asleep on the destroy_comp list vs. an object destroyed
+ * here synchronously when the last reference is removed.
+ */
+static void cm_work_handler(void *arg)
+{
+	struct iwcm_work *work = arg;
+	struct iw_cm_event levent;
+	struct iwcm_id_private *cm_id_priv = work->cm_id;
+	unsigned long flags;
+	int empty;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	empty = list_empty(&cm_id_priv->work_list);
+	while (!empty) {
+		work = list_entry(cm_id_priv->work_list.next,
+				  struct iwcm_work, list);
+		list_del_init(&work->list);
+		empty = list_empty(&cm_id_priv->work_list);
+		levent = work->event;
+		put_work(work);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+		ret = process_event(cm_id_priv, &levent);
+		if (ret) {
+			set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+			destroy_cm_id(&cm_id_priv->id);
+		}
+		BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+		if (iwcm_deref_id(cm_id_priv))
+			return;
+
+		if (atomic_read(&cm_id_priv->refcount)==0 &&
+		    test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+			dealloc_work_entries(cm_id_priv);
+			kfree(cm_id_priv);
+			return;
+		}
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * This function is called on interrupt context. Schedule events on
+ * the iwcm_wq thread to allow callback functions to downcall into
+ * the CM and/or block.  Events are queued to a per-CM_ID
+ * work_list. If this is the first event on the work_list, the work
+ * element is also queued on the iwcm_wq thread.
+ *
+ * Each event holds a reference on the cm_id. Until the last posted
+ * event has been delivered and processed, the cm_id cannot be
+ * deleted.
+ *
+ * Returns:
+ * 	      0	- the event was handled.
+ *	-ENOMEM	- the event was not handled due to lack of resources.
+ */
+static int cm_event_handler(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *iw_event)
+{
+	struct iwcm_work *work;
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	work = get_work(cm_id_priv);
+	if (!work) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	INIT_WORK(&work->work, cm_work_handler, work);
+	work->cm_id = cm_id_priv;
+	work->event = *iw_event;
+
+	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
+	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
+	    work->event.private_data_len) {
+		ret = copy_private_data(&work->event);
+		if (ret) {
+			put_work(work);
+			goto out;
+		}
+	}
+
+	atomic_inc(&cm_id_priv->refcount);
+	if (list_empty(&cm_id_priv->work_list)) {
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+		queue_work(iwcm_wq, &work->work);
+	} else
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+out:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+					   IB_ACCESS_REMOTE_WRITE|
+					   IB_ACCESS_REMOTE_READ;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = 0;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
+		       struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	switch (qp_attr->qp_state) {
+	case IB_QPS_INIT:
+	case IB_QPS_RTR:
+		ret = iwcm_init_qp_init_attr(cm_id_priv,
+					     qp_attr, qp_attr_mask);
+		break;
+	case IB_QPS_RTS:
+		ret = iwcm_init_qp_rts_attr(cm_id_priv,
+					    qp_attr, qp_attr_mask);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_init_qp_attr);
+
+static int __init iw_cm_init(void)
+{
+	iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
+	if (!iwcm_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __exit iw_cm_cleanup(void)
+{
+	destroy_workqueue(iwcm_wq);
+}
+
+module_init(iw_cm_init);
+module_exit(iw_cm_cleanup);
--- linux-2.6.18.noarch/drivers/infiniband/core/iwcm.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/iwcm.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IWCM_H
+#define IWCM_H
+
+enum iw_cm_state {
+	IW_CM_STATE_IDLE,             /* unbound, inactive */
+	IW_CM_STATE_LISTEN,           /* listen waiting for connect */
+	IW_CM_STATE_CONN_RECV,        /* inbound waiting for user accept */
+	IW_CM_STATE_CONN_SENT,        /* outbound waiting for peer accept */
+	IW_CM_STATE_ESTABLISHED,      /* established */
+	IW_CM_STATE_CLOSING,	      /* disconnect */
+	IW_CM_STATE_DESTROYING        /* object being deleted */
+};
+
+struct iwcm_id_private {
+	struct iw_cm_id	id;
+	enum iw_cm_state state;
+	unsigned long flags;
+	struct ib_qp *qp;
+	struct completion destroy_comp;
+	wait_queue_head_t connect_wait;
+	struct list_head work_list;
+	spinlock_t lock;
+	atomic_t refcount;
+	struct list_head work_free_list;
+};
+
+#define IWCM_F_CALLBACK_DESTROY   1
+#define IWCM_F_CONNECT_WAIT       2
+
+#endif /* IWCM_H */
--- linux-2.6.18.noarch/drivers/infiniband/core/local_sa.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/local_sa.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/pci.h>
+
+#include <rdma/ib_cache.h>
+#include <rdma/ib_local_sa.h>
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("InfiniBand subnet administration caching");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static int retry_timer = 5000; /* 5 sec */
+module_param(retry_timer, int, 0444);
+MODULE_PARM_DESC(retry_timer, "Time in ms between retried requests.");
+
+static int retries = 3;
+module_param(retries, int, 0444);
+MODULE_PARM_DESC(retries, "Number of times to retry a request.");
+
+static unsigned long cache_timeout = 15 * 60 * 1000; /* 15 min */
+module_param(cache_timeout, ulong, 0444);
+MODULE_PARM_DESC(cache_timeout, "Time in ms between cache updates.  "
+				"Set to 0 to disable cache.");
+
+static unsigned long hold_time = 30 * 1000; /* 30 sec */
+module_param(hold_time, ulong, 0444);
+MODULE_PARM_DESC(hold_timer, "Minimal time in ms between cache updates.");
+
+static unsigned long update_delay = 3000; /* 3 sec */
+module_param(update_delay, ulong, 0444);
+MODULE_PARM_DESC(update_delay, "Delay in ms between an event and an update.");
+
+enum {
+	IB_MAX_PATHS_PER_DEST = 0x7F
+};
+
+static unsigned long paths_per_dest = IB_MAX_PATHS_PER_DEST;
+module_param(paths_per_dest, ulong, 0444);
+MODULE_PARM_DESC(paths_per_dest, "Maximum number of paths to retrieve "
+				 "to each destination (DGID).  Set to 0 "
+				 "to disable cache.");
+
+static void sa_db_add_one(struct ib_device *device);
+static void sa_db_remove_one(struct ib_device *device);
+
+static struct ib_client sa_db_client = {
+	.name   = "local_sa",
+	.add    = sa_db_add_one,
+	.remove = sa_db_remove_one
+};
+
+static LIST_HEAD(dev_list);
+static DECLARE_RWSEM(lock);
+static unsigned long hold_time, update_delay;
+static struct workqueue_struct *sa_wq;
+
+struct sa_db_port {
+	struct sa_db_device *dev;
+	struct ib_mad_agent *agent;
+	struct rb_root paths;
+	unsigned long update_time;
+	int update;
+	struct work_struct work;
+	union ib_gid gid;
+	int port_num;
+};
+
+struct sa_db_device {
+	struct list_head list;
+	struct ib_device *device;
+	struct ib_event_handler event_handler;
+	struct sa_db_port port[0];
+};
+
+struct ib_sa_iterator {
+	struct ib_sa_iterator	*next;
+};
+
+struct ib_sa_attr_list {
+	struct ib_sa_iterator	iter;
+	struct ib_sa_iterator	*tail;
+	int			update;
+	union ib_gid		gid;
+	struct rb_node		node;
+};
+
+struct ib_path_rec_info {
+	struct ib_sa_iterator	iter;	/* keep first for ib_get_next_sa_attr */
+	struct ib_sa_path_rec	rec;
+};
+
+struct ib_sa_iter {
+	struct ib_mad_recv_wc *recv_wc;
+	struct ib_mad_recv_buf *recv_buf;
+	int attr_size;
+	int attr_offset;
+	int data_offset;
+	int data_left;
+	void *attr;
+	u8 attr_data[0];
+};
+
+static void send_handler(struct ib_mad_agent *agent,
+			 struct ib_mad_send_wc *mad_send_wc)
+{
+	ib_destroy_ah(mad_send_wc->send_buf->ah);
+	ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+static void free_attr_list(struct ib_sa_attr_list *attr_list)
+{
+	struct ib_sa_iterator *cur;
+
+	for (cur = attr_list->iter.next; cur; cur = attr_list->iter.next) {
+		attr_list->iter.next = cur->next;
+		kfree(cur);
+	}
+	attr_list->tail = &attr_list->iter;
+}
+
+static void remove_all_attrs(struct rb_root *root)
+{
+	struct rb_node *node, *next_node;
+	struct ib_sa_attr_list *attr_list;
+
+	for (node = rb_first(root); node; node = next_node) {
+		next_node = rb_next(node);
+		rb_erase(node, root);
+		attr_list = rb_entry(node, struct ib_sa_attr_list, node);
+		free_attr_list(attr_list);
+		kfree(attr_list);
+	}
+}
+
+static struct ib_sa_attr_list * insert_attr_list(struct rb_root *root,
+						 struct ib_sa_attr_list
+							*attr_list)
+{
+	struct rb_node **link = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct ib_sa_attr_list *cur_attr_list;
+	int cmp;
+
+	while (*link) {
+		parent = *link;
+		cur_attr_list = rb_entry(parent, struct ib_sa_attr_list, node);
+		cmp = memcmp(&cur_attr_list->gid, &attr_list->gid,
+			     sizeof attr_list->gid);
+		if (cmp < 0)
+			link = &(*link)->rb_left;
+		else if (cmp > 0)
+			link = &(*link)->rb_right;
+		else
+			return cur_attr_list;
+	}
+	rb_link_node(&attr_list->node, parent, link);
+	rb_insert_color(&attr_list->node, root);
+	return NULL;
+}
+
+static struct ib_sa_attr_list * find_attr_list(struct rb_root *root,
+					       u8 *gid)
+{
+	struct rb_node *node = root->rb_node;
+	struct ib_sa_attr_list *attr_list;
+	int cmp;
+
+	while (node) {
+		attr_list = rb_entry(node, struct ib_sa_attr_list, node);
+		cmp = memcmp(&attr_list->gid, gid, sizeof attr_list->gid);
+		if (cmp < 0)
+			node = node->rb_left;
+		else if (cmp > 0)
+			node = node->rb_right;
+		else
+			return attr_list;
+	}
+	return NULL;
+}
+
+static int insert_attr(struct rb_root *root, int update, void *key,
+		       struct ib_sa_iterator *iter)
+{
+	struct ib_sa_attr_list *attr_list;
+	void *err;
+
+	attr_list = find_attr_list(root, key);
+	if (!attr_list) {
+		attr_list = kmalloc(sizeof *attr_list, GFP_KERNEL);
+		if (!attr_list)
+			return -ENOMEM;
+
+		attr_list->iter.next = NULL;
+		attr_list->tail = &attr_list->iter;
+		attr_list->update = update;
+		memcpy(attr_list->gid.raw, key, sizeof attr_list->gid);
+
+		err = insert_attr_list(root, attr_list);
+		if (err) {
+			kfree(attr_list);
+			return PTR_ERR(err);
+		}
+	} else if (attr_list->update != update) {
+		free_attr_list(attr_list);
+		attr_list->update = update;
+	}
+
+	/*
+	 * Assume that the SA returned the best attribute first, and insert
+	 * attributes on the tail.
+	 */
+	attr_list->tail->next = iter;
+	iter->next = NULL;
+	attr_list->tail = iter;
+	return 0;
+}
+
+static struct ib_sa_iter *ib_sa_iter_create(struct ib_mad_recv_wc *mad_recv_wc)
+{
+	struct ib_sa_iter *iter;
+	struct ib_sa_mad *mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad;
+	int attr_size, attr_offset;
+
+	attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8;
+	attr_size = 64;		/* path record length */
+	if (attr_offset < attr_size)
+		return ERR_PTR(-EINVAL);
+
+	iter = kzalloc(sizeof *iter + attr_size, GFP_KERNEL);
+	if (!iter)
+		return ERR_PTR(-ENOMEM);
+
+	iter->data_left = mad_recv_wc->mad_len - IB_MGMT_SA_HDR;
+	iter->recv_wc = mad_recv_wc;
+	iter->recv_buf = &mad_recv_wc->recv_buf;
+	iter->attr_offset = attr_offset;
+	iter->attr_size = attr_size;
+	return iter;
+}
+
+static void ib_sa_iter_free(struct ib_sa_iter *iter)
+{
+	kfree(iter);
+}
+
+static void *ib_sa_iter_next(struct ib_sa_iter *iter)
+{
+	struct ib_sa_mad *mad;
+	int left, offset = 0;
+
+	while (iter->data_left >= iter->attr_offset) {
+		while (iter->data_offset < IB_MGMT_SA_DATA) {
+			mad = (struct ib_sa_mad *) iter->recv_buf->mad;
+
+			left = IB_MGMT_SA_DATA - iter->data_offset;
+			if (left < iter->attr_size) {
+				/* copy first piece of the attribute */
+				iter->attr = &iter->attr_data;
+				memcpy(iter->attr,
+				       &mad->data[iter->data_offset], left);
+				offset = left;
+				break;
+			} else if (offset) {
+				/* copy the second piece of the attribute */
+				memcpy(iter->attr + offset, &mad->data[0],
+				       iter->attr_size - offset);
+				iter->data_offset = iter->attr_size - offset;
+				offset = 0;
+			} else {
+				iter->attr = &mad->data[iter->data_offset];
+				iter->data_offset += iter->attr_size;
+			}
+
+			iter->data_left -= iter->attr_offset;
+			goto out;
+		}
+		iter->data_offset = 0;
+		iter->recv_buf = list_entry(iter->recv_buf->list.next,
+					    struct ib_mad_recv_buf, list);
+	}
+	iter->attr = NULL;
+out:
+	return iter->attr;
+}
+
+/*
+ * Copy a path record from a received MAD and insert it into our index.
+ * The path record in the MAD is in network order, so must be swapped.  It
+ * can also span multiple MADs, just to make our life hard.
+ */
+static void update_path_rec(struct sa_db_port *port,
+			    struct ib_mad_recv_wc *mad_recv_wc)
+{
+	struct ib_sa_iter *iter;
+	struct ib_path_rec_info *path_info;
+	void *attr;
+
+	iter = ib_sa_iter_create(mad_recv_wc);
+	if (IS_ERR(iter))
+		return;
+
+	down_write(&lock);
+	port->update++;
+	while ((attr = ib_sa_iter_next(iter)) &&
+	       (path_info = kmalloc(sizeof *path_info, GFP_KERNEL))) {
+
+		ib_sa_unpack_attr(&path_info->rec, attr, IB_SA_ATTR_PATH_REC);
+		if (insert_attr(&port->paths, port->update,
+				path_info->rec.dgid.raw,
+				&path_info->iter)) {
+			kfree(path_info);
+			break;
+		}
+	}
+	up_write(&lock);
+	ib_sa_iter_free(iter);
+}
+
+static void recv_handler(struct ib_mad_agent *mad_agent,
+			 struct ib_mad_recv_wc *mad_recv_wc)
+{
+	struct ib_sa_mad *mad = (void *) mad_recv_wc->recv_buf.mad;
+
+	if (mad->mad_hdr.status)
+		goto done;
+
+	switch (cpu_to_be16(mad->mad_hdr.attr_id)) {
+	case IB_SA_ATTR_PATH_REC:
+		update_path_rec(mad_agent->context, mad_recv_wc);
+		break;
+	default:
+		break;
+	}
+done:
+	ib_free_recv_mad(mad_recv_wc);
+}
+
+static struct ib_mad_send_buf* get_sa_msg(struct sa_db_port *port)
+{
+	struct ib_port_attr	port_attr;
+	struct ib_ah_attr	ah_attr;
+	struct ib_mad_send_buf	*msg;
+	int ret;
+
+	ret = ib_query_port(port->dev->device, port->port_num, &port_attr);
+	if (ret || port_attr.state != IB_PORT_ACTIVE)
+		return NULL;
+
+	msg = ib_create_send_mad(port->agent, 1, 0, 0, IB_MGMT_SA_HDR,
+				 IB_MGMT_SA_DATA, GFP_KERNEL);
+	if (IS_ERR(msg))
+		return NULL;
+
+	memset(&ah_attr, 0, sizeof ah_attr);
+	ah_attr.dlid = port_attr.sm_lid;
+	ah_attr.sl = port_attr.sm_sl;
+	ah_attr.port_num = port->port_num;
+
+	msg->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
+	if (IS_ERR(msg->ah)) {
+		ib_free_send_mad(msg);
+		return NULL;
+	}
+
+	msg->timeout_ms = retry_timer;
+	msg->retries = retries;
+	msg->context[0] = port;
+	return msg;
+}
+
+static __be64 form_tid(u32 hi_tid)
+{
+	static atomic_t tid;
+	return cpu_to_be64((((u64) hi_tid) << 32) |
+			   ((u32) atomic_inc_return(&tid)));
+}
+
+static void format_path_req(struct sa_db_port *port,
+			    struct ib_mad_send_buf *msg)
+{
+	struct ib_sa_mad *mad = msg->mad;
+	struct ib_sa_path_rec path_rec;
+
+	mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
+	mad->mad_hdr.mgmt_class	   = IB_MGMT_CLASS_SUBN_ADM;
+	mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+	mad->mad_hdr.method	   = IB_SA_METHOD_GET_TABLE;
+	mad->mad_hdr.attr_id	   = cpu_to_be16(IB_SA_ATTR_PATH_REC);
+	mad->mad_hdr.tid	   = form_tid(msg->mad_agent->hi_tid);
+
+	mad->sa_hdr.comp_mask = IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH;
+
+	path_rec.sgid = port->gid;
+	path_rec.numb_path = paths_per_dest;
+	ib_sa_pack_attr(mad->data, &path_rec, IB_SA_ATTR_PATH_REC);
+}
+
+static void update_cache(void *_port)
+{
+	struct sa_db_port *port = (struct sa_db_port *)_port;
+	struct ib_mad_send_buf *msg;
+
+	msg = get_sa_msg(port);
+	if (!msg)
+		return;
+
+	format_path_req(port, msg);
+
+	if (ib_post_send_mad(msg, NULL)) {
+		ib_destroy_ah(msg->ah);
+		ib_free_send_mad(msg);
+		return;
+	}
+
+	/*
+	 * We record the time that we requested the update, rather than use the
+	 * time that the update occurred.  This allows us to generate a new
+	 * update if an event occurs while we're still processing this one.
+	 */
+	port->update_time = jiffies;
+	queue_delayed_work(sa_wq, &port->work, cache_timeout);
+}
+
+static void schedule_update(struct sa_db_port *port)
+{
+	unsigned long time, delay;
+
+	if (!paths_per_dest)
+		return;
+
+	time = jiffies;
+	if (time_after(time, port->update_time + hold_time))
+		delay = update_delay;
+	else
+		delay = port->update_time + hold_time - time;
+
+	cancel_delayed_work(&port->work);
+	queue_delayed_work(sa_wq, &port->work, delay);
+}
+
+static void handle_event(struct ib_event_handler *event_handler,
+			 struct ib_event *event)
+{
+	struct sa_db_device *dev;
+	dev = container_of(event_handler, typeof(*dev), event_handler);
+
+	if (event->event == IB_EVENT_PORT_ERR    ||
+	    event->event == IB_EVENT_PORT_ACTIVE ||
+	    event->event == IB_EVENT_LID_CHANGE  ||
+	    event->event == IB_EVENT_PKEY_CHANGE ||
+	    event->event == IB_EVENT_SM_CHANGE	 ||
+	    event->event == IB_EVENT_CLIENT_REREGISTER)
+		schedule_update(&dev->port[event->element.port_num - 1]);
+}
+
+int ib_get_path_rec(struct ib_device *device, u8 port_num, union ib_gid *sgid,
+		    union ib_gid *dgid, u16 pkey, struct ib_sa_path_rec *rec)
+{
+	struct ib_sa_iterator *iter;
+	struct ib_sa_path_rec *path;
+	int ret = -ENODATA;
+
+	iter = ib_create_path_iter(device, port_num, dgid);
+	if (IS_ERR(iter))
+		return PTR_ERR(iter);
+
+	for (path = ib_get_next_sa_attr(&iter); path;
+	     path = ib_get_next_sa_attr(&iter)) {
+		if (pkey == path->pkey &&
+		    !memcmp(sgid, path->sgid.raw, sizeof *sgid)) {
+			memcpy(rec, path, sizeof *rec);
+			ret = 0;
+			break;
+		    }
+	}
+
+	ib_free_sa_iter(iter);
+	return ret;
+}
+EXPORT_SYMBOL(ib_get_path_rec);
+
+struct ib_sa_iterator *ib_create_path_iter(struct ib_device *device,
+					   u8 port_num, union ib_gid *dgid)
+{
+	struct sa_db_device *dev;
+	struct sa_db_port *port;
+	struct ib_sa_attr_list *list;
+	int ret;
+
+	down_read(&lock);
+	dev = ib_get_client_data(device, &sa_db_client);
+	if (!dev) {
+		ret = -ENODEV;
+		goto err;
+	}
+	port = &dev->port[port_num - 1];
+
+	list = find_attr_list(&port->paths, dgid->raw);
+	if (!list) {
+		ret = -ENODATA;
+		goto err;
+	}
+
+	return &list->iter;
+err:
+	up_read(&lock);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_create_path_iter);
+
+void ib_free_sa_iter(struct ib_sa_iterator *iter)
+{
+	up_read(&lock);
+}
+EXPORT_SYMBOL(ib_free_sa_iter);
+
+void *ib_get_next_sa_attr(struct ib_sa_iterator **iter)
+{
+	*iter = (*iter)->next;
+	if (*iter)
+		return ((void *)(*iter)) + sizeof(**iter);
+	else
+		return NULL;
+}
+EXPORT_SYMBOL(ib_get_next_sa_attr);
+
+static void sa_db_add_one(struct ib_device *device)
+{
+	struct sa_db_device *dev;
+	struct sa_db_port *port;
+	int i;
+
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+		      GFP_KERNEL);
+	if (!dev)
+		return;
+
+	for (i = 1; i <= device->phys_port_cnt; i++) {
+		port = &dev->port[i-1];
+		port->dev = dev;
+		port->port_num = i;
+		port->update_time = jiffies - hold_time;
+		port->update = 0;
+		INIT_WORK(&port->work, update_cache, port);
+		port->paths = RB_ROOT;
+
+		if (ib_get_cached_gid(device, i, 0, &port->gid))
+			goto err;
+
+		port->agent = ib_register_mad_agent(device, i, IB_QPT_GSI,
+						    NULL, IB_MGMT_RMPP_VERSION,
+						    send_handler, recv_handler,
+						    port);
+		if (IS_ERR(port->agent))
+			goto err;
+	}
+
+	dev->device = device;
+	ib_set_client_data(device, &sa_db_client, dev);
+
+	down_write(&lock);
+	list_add_tail(&dev->list, &dev_list);
+	up_write(&lock);
+
+	/* Initialization must be complete before cache updates can occur. */
+	INIT_IB_EVENT_HANDLER(&dev->event_handler, device, handle_event);
+	ib_register_event_handler(&dev->event_handler);
+
+	/* Force an update now. */
+	for (i = 1; i <= device->phys_port_cnt; i++)
+		schedule_update(&dev->port[i-1]);
+	return;
+err:
+	while (--i) {
+		ib_unregister_mad_agent(dev->port[i-1].agent);
+		remove_all_attrs(&dev->port[i-1].paths);
+	}
+	kfree(dev);
+}
+
+static void sa_db_remove_one(struct ib_device *device)
+{
+	struct sa_db_device *dev;
+	int i;
+
+	dev = ib_get_client_data(device, &sa_db_client);
+	if (!dev)
+		return;
+
+	ib_unregister_event_handler(&dev->event_handler);
+	for (i = 0; i < device->phys_port_cnt; i++)
+		cancel_delayed_work(&dev->port[i].work);
+	flush_workqueue(sa_wq);
+
+	for (i = 0; i < device->phys_port_cnt; i++) {
+		ib_unregister_mad_agent(dev->port[i].agent);
+		remove_all_attrs(&dev->port[i].paths);
+	}
+
+	down_write(&lock);
+	list_del(&dev->list);
+	up_write(&lock);
+	kfree(dev);
+}
+
+static int __init sa_db_init(void)
+{
+	int ret;
+
+	cache_timeout = msecs_to_jiffies(cache_timeout);
+	hold_time = msecs_to_jiffies(hold_time);
+	update_delay = msecs_to_jiffies(update_delay);
+
+	if (!cache_timeout)
+		paths_per_dest = 0;
+	else if (paths_per_dest > IB_MAX_PATHS_PER_DEST)
+		paths_per_dest = IB_MAX_PATHS_PER_DEST;
+
+	sa_wq = create_singlethread_workqueue("local_sa");
+	if (!sa_wq)
+		return -ENOMEM;
+
+	ret = ib_register_client(&sa_db_client);
+	if (ret)
+		goto err;
+	return 0;
+
+err:
+	destroy_workqueue(sa_wq);
+	return ret;
+}
+
+static void __exit sa_db_cleanup(void)
+{
+	ib_unregister_client(&sa_db_client);
+	destroy_workqueue(sa_wq);
+}
+
+module_init(sa_db_init);
+module_exit(sa_db_cleanup);
--- linux-2.6.18.noarch/drivers/infiniband/core/mad.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/mad.c
@@ -46,7 +46,7 @@ MODULE_DESCRIPTION("kernel IB MAD API");
 MODULE_AUTHOR("Hal Rosenstock");
 MODULE_AUTHOR("Sean Hefty");
 
-static kmem_cache_t *ib_mad_cache;
+static struct kmem_cache *ib_mad_cache;
 
 static struct list_head ib_mad_port_list;
 static u32 ib_mad_client_id = 0;
@@ -643,7 +643,8 @@ static void snoop_recv(struct ib_mad_qp_
 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 }
 
-static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
+static void build_smp_wc(struct ib_qp *qp,
+			 u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
 			 struct ib_wc *wc)
 {
 	memset(wc, 0, sizeof *wc);
@@ -653,7 +654,7 @@ static void build_smp_wc(u64 wr_id, u16 
 	wc->pkey_index = pkey_index;
 	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
 	wc->src_qp = IB_QP0;
-	wc->qp_num = IB_QP0;
+	wc->qp = qp;
 	wc->slid = slid;
 	wc->sl = 0;
 	wc->dlid_path_bits = 0;
@@ -714,7 +715,8 @@ static int handle_outgoing_dr_smp(struct
 		goto out;
 	}
 
-	build_smp_wc(send_wr->wr_id, be16_to_cpu(smp->dr_slid),
+	build_smp_wc(mad_agent_priv->agent.qp,
+		     send_wr->wr_id, be16_to_cpu(smp->dr_slid),
 		     send_wr->wr.ud.pkey_index,
 		     send_wr->wr.ud.port_num, &mad_wc);
 
@@ -999,17 +1001,17 @@ int ib_send_mad(struct ib_mad_send_wr_pr
 
 	mad_agent = mad_send_wr->send_buf.mad_agent;
 	sge = mad_send_wr->sg_list;
-	sge[0].addr = dma_map_single(mad_agent->device->dma_device,
-				     mad_send_wr->send_buf.mad,
-				     sge[0].length,
-				     DMA_TO_DEVICE);
-	pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);
-
-	sge[1].addr = dma_map_single(mad_agent->device->dma_device,
-				     ib_get_payload(mad_send_wr),
-				     sge[1].length,
-				     DMA_TO_DEVICE);
-	pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);
+	sge[0].addr = ib_dma_map_single(mad_agent->device,
+					mad_send_wr->send_buf.mad,
+					sge[0].length,
+					DMA_TO_DEVICE);
+	mad_send_wr->header_mapping = sge[0].addr;
+
+	sge[1].addr = ib_dma_map_single(mad_agent->device,
+					ib_get_payload(mad_send_wr),
+					sge[1].length,
+					DMA_TO_DEVICE);
+	mad_send_wr->payload_mapping = sge[1].addr;
 
 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
@@ -1027,12 +1029,12 @@ int ib_send_mad(struct ib_mad_send_wr_pr
 	}
 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
 	if (ret) {
-		dma_unmap_single(mad_agent->device->dma_device,
-				 pci_unmap_addr(mad_send_wr, header_mapping),
-				 sge[0].length, DMA_TO_DEVICE);
-		dma_unmap_single(mad_agent->device->dma_device,
-				 pci_unmap_addr(mad_send_wr, payload_mapping),
-				 sge[1].length, DMA_TO_DEVICE);
+		ib_dma_unmap_single(mad_agent->device,
+				    mad_send_wr->header_mapping,
+				    sge[0].length, DMA_TO_DEVICE);
+		ib_dma_unmap_single(mad_agent->device,
+				    mad_send_wr->payload_mapping,
+				    sge[1].length, DMA_TO_DEVICE);
 	}
 	return ret;
 }
@@ -1246,8 +1248,8 @@ static int find_vendor_oui(struct ib_mad
 	int i;
 
 	for (i = 0; i < MAX_MGMT_OUI; i++)
-                /* Is there matching OUI for this vendor class ? */
-                if (!memcmp(vendor_class->oui[i], oui, 3))
+		/* Is there matching OUI for this vendor class ? */
+		if (!memcmp(vendor_class->oui[i], oui, 3))
 			return i;
 
 	return -1;
@@ -1750,7 +1752,7 @@ ib_find_send_mad(struct ib_mad_agent_pri
 		     */
 		    (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
 		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
-			return wr;
+			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
 	}
 
 	/*
@@ -1851,11 +1853,11 @@ static void ib_mad_recv_done_handler(str
 	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
 				    mad_list);
 	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
-	dma_unmap_single(port_priv->device->dma_device,
-			 pci_unmap_addr(&recv->header, mapping),
-			 sizeof(struct ib_mad_private) -
-			 sizeof(struct ib_mad_private_header),
-			 DMA_FROM_DEVICE);
+	ib_dma_unmap_single(port_priv->device,
+			    recv->header.mapping,
+			    sizeof(struct ib_mad_private) -
+			      sizeof(struct ib_mad_private_header),
+			    DMA_FROM_DEVICE);
 
 	/* Setup MAD receive work completion from "normal" work completion */
 	recv->header.wc = *wc;
@@ -2081,12 +2083,12 @@ static void ib_mad_send_done_handler(str
 	qp_info = send_queue->qp_info;
 
 retry:
-	dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
-			 pci_unmap_addr(mad_send_wr, header_mapping),
-			 mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
-	dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
-			 pci_unmap_addr(mad_send_wr, payload_mapping),
-			 mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
+	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
+			    mad_send_wr->header_mapping,
+			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
+	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
+			    mad_send_wr->payload_mapping,
+			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
 	queued_send_wr = NULL;
 	spin_lock_irqsave(&send_queue->lock, flags);
 	list_del(&mad_list->list);
@@ -2237,7 +2239,7 @@ static void cancel_mads(struct ib_mad_ag
 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
 				 &mad_agent_priv->send_list, agent_list) {
 		if (mad_send_wr->status == IB_WC_SUCCESS) {
- 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
 		}
 	}
@@ -2355,7 +2357,8 @@ static void local_completions(void *data
 			 * Defined behavior is to complete response
 			 * before request
 			 */
-			build_smp_wc((unsigned long) local->mad_send_wr,
+			build_smp_wc(recv_mad_agent->agent.qp,
+				     (unsigned long) local->mad_send_wr,
 				     be16_to_cpu(IB_LID_PERMISSIVE),
 				     0, recv_mad_agent->agent.port_num, &wc);
 
@@ -2527,13 +2530,12 @@ static int ib_mad_post_receive_mads(stru
 				break;
 			}
 		}
-		sg_list.addr = dma_map_single(qp_info->port_priv->
-					      	device->dma_device,
-					      &mad_priv->grh,
-					      sizeof *mad_priv -
-					      	sizeof mad_priv->header,
-					      DMA_FROM_DEVICE);
-		pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
+		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
+						 &mad_priv->grh,
+						 sizeof *mad_priv -
+						   sizeof mad_priv->header,
+						 DMA_FROM_DEVICE);
+		mad_priv->header.mapping = sg_list.addr;
 		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
 		mad_priv->header.mad_list.mad_queue = recv_queue;
 
@@ -2548,12 +2550,11 @@ static int ib_mad_post_receive_mads(stru
 			list_del(&mad_priv->header.mad_list.list);
 			recv_queue->count--;
 			spin_unlock_irqrestore(&recv_queue->lock, flags);
-			dma_unmap_single(qp_info->port_priv->device->dma_device,
-					 pci_unmap_addr(&mad_priv->header,
-							mapping),
-					 sizeof *mad_priv -
-					   sizeof mad_priv->header,
-					 DMA_FROM_DEVICE);
+			ib_dma_unmap_single(qp_info->port_priv->device,
+					    mad_priv->header.mapping,
+					    sizeof *mad_priv -
+					      sizeof mad_priv->header,
+					    DMA_FROM_DEVICE);
 			kmem_cache_free(ib_mad_cache, mad_priv);
 			printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
 			break;
@@ -2585,11 +2586,11 @@ static void cleanup_recv_queue(struct ib
 		/* Remove from posted receive MAD list */
 		list_del(&mad_list->list);
 
-		dma_unmap_single(qp_info->port_priv->device->dma_device,
-				 pci_unmap_addr(&recv->header, mapping),
-				 sizeof(struct ib_mad_private) -
-				 sizeof(struct ib_mad_private_header),
-				 DMA_FROM_DEVICE);
+		ib_dma_unmap_single(qp_info->port_priv->device,
+				    recv->header.mapping,
+				    sizeof(struct ib_mad_private) -
+				      sizeof(struct ib_mad_private_header),
+				    DMA_FROM_DEVICE);
 		kmem_cache_free(ib_mad_cache, recv);
 	}
 
@@ -2606,7 +2607,7 @@ static int ib_mad_port_start(struct ib_m
 	struct ib_qp *qp;
 
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
- 	if (!attr) {
+	if (!attr) {
 		printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
 		return -ENOMEM;
 	}
@@ -2876,7 +2877,10 @@ static void ib_mad_init_device(struct ib
 {
 	int start, end, i;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		start = 0;
 		end   = 0;
 	} else {
@@ -2923,7 +2927,7 @@ static void ib_mad_remove_device(struct 
 {
 	int i, num_ports, cur_port;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		num_ports = 1;
 		cur_port = 0;
 	} else {
@@ -2984,10 +2988,7 @@ error1:
 static void __exit ib_mad_cleanup_module(void)
 {
 	ib_unregister_client(&mad_client);
-
-	if (kmem_cache_destroy(ib_mad_cache)) {
-		printk(KERN_DEBUG PFX "Failed to destroy ib_mad cache\n");
-	}
+	kmem_cache_destroy(ib_mad_cache);
 }
 
 module_init(ib_mad_init_module);
--- linux-2.6.18.noarch/drivers/infiniband/core/mad_priv.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/mad_priv.h
@@ -38,8 +38,8 @@
 #define __IB_MAD_PRIV_H__
 
 #include <linux/completion.h>
+#include <linux/err.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
@@ -73,7 +73,7 @@ struct ib_mad_private_header {
 	struct ib_mad_list_head mad_list;
 	struct ib_mad_recv_wc recv_wc;
 	struct ib_wc wc;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
+	u64 mapping;
 } __attribute__ ((packed));
 
 struct ib_mad_private {
@@ -126,8 +126,8 @@ struct ib_mad_send_wr_private {
 	struct list_head agent_list;
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_buf send_buf;
-	DECLARE_PCI_UNMAP_ADDR(header_mapping)
-	DECLARE_PCI_UNMAP_ADDR(payload_mapping)
+	u64 header_mapping;
+	u64 payload_mapping;
 	struct ib_send_wr send_wr;
 	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
 	__be64 tid;
--- linux-2.6.18.noarch/drivers/infiniband/core/mad_rmpp.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/mad_rmpp.c
@@ -33,8 +33,6 @@
  * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
  */
 
-#include <linux/dma-mapping.h>
-
 #include "mad_priv.h"
 #include "mad_rmpp.h"
 
@@ -60,6 +58,7 @@ struct mad_rmpp_recv {
 	int last_ack;
 	int seg_num;
 	int newwin;
+	int repwin;
 
 	__be64 tid;
 	u32 src_qp;
@@ -170,6 +169,32 @@ static struct ib_mad_send_buf *alloc_res
 	return msg;
 }
 
+static void ack_ds_ack(struct ib_mad_agent_private *agent,
+		       struct ib_mad_recv_wc *recv_wc)
+{
+	struct ib_mad_send_buf *msg;
+	struct ib_rmpp_mad *rmpp_mad;
+	int ret;
+
+	msg = alloc_response_msg(&agent->agent, recv_wc);
+	if (IS_ERR(msg))
+		return;
+
+	rmpp_mad = msg->mad;
+	memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
+
+	rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+	rmpp_mad->rmpp_hdr.seg_num = 0;
+	rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		ib_destroy_ah(msg->ah);
+		ib_free_send_mad(msg);
+	}
+}
+
 void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
 {
 	struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
@@ -271,6 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_pri
 	rmpp_recv->newwin = 1;
 	rmpp_recv->seg_num = 1;
 	rmpp_recv->last_ack = 0;
+	rmpp_recv->repwin = 1;
 
 	mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
 	rmpp_recv->tid = mad_hdr->tid;
@@ -365,7 +391,7 @@ static inline int window_size(struct ib_
 static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
 						  int seg_num)
 {
-        struct ib_mad_recv_buf *seg_buf;
+	struct ib_mad_recv_buf *seg_buf;
 	int cur_seg_num;
 
 	list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
@@ -591,6 +617,16 @@ static inline void adjust_last_ack(struc
 			break;
 }
 
+static void process_ds_ack(struct ib_mad_agent_private *agent,
+			   struct ib_mad_recv_wc *mad_recv_wc, int newwin)
+{
+	struct mad_rmpp_recv *rmpp_recv;
+
+	rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
+	if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
+		rmpp_recv->repwin = newwin;
+}
+
 static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 			     struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -616,8 +652,18 @@ static void process_rmpp_ack(struct ib_m
 
 	spin_lock_irqsave(&agent->lock, flags);
 	mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
-	if (!mad_send_wr)
-		goto out;	/* Unmatched ACK */
+	if (!mad_send_wr) {
+		if (!seg_num)
+			process_ds_ack(agent, mad_recv_wc, newwin);
+		goto out;	/* Unmatched or DS RMPP ACK */
+	}
+
+	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
+	    (mad_send_wr->timeout)) {
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;		/* Repeated ACK for DS RMPP transaction */
+	}
 
 	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
 	    (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
@@ -656,6 +702,9 @@ static void process_rmpp_ack(struct ib_m
 		if (mad_send_wr->refcount == 1)
 			ib_reset_mad_timeout(mad_send_wr,
 					     mad_send_wr->send_buf.timeout_ms);
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;
 	} else if (mad_send_wr->refcount == 1 &&
 		   mad_send_wr->seg_num < mad_send_wr->newwin &&
 		   mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
@@ -772,6 +821,39 @@ out:
 	return NULL;
 }
 
+static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
+{
+	struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
+	struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
+	struct mad_rmpp_recv *rmpp_recv;
+	struct ib_ah_attr ah_attr;
+	unsigned long flags;
+	int newwin = 1;
+
+	if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
+		goto out;
+
+	spin_lock_irqsave(&agent->lock, flags);
+	list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+		if (rmpp_recv->tid != mad_hdr->tid ||
+		    rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
+		    rmpp_recv->class_version != mad_hdr->class_version ||
+		    (rmpp_recv->method & IB_MGMT_METHOD_RESP))
+			continue;
+
+		if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+			continue;
+
+		if (rmpp_recv->slid == ah_attr.dlid) {
+			newwin = rmpp_recv->repwin;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&agent->lock, flags);
+out:
+	return newwin;
+}
+
 int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_rmpp_mad *rmpp_mad;
@@ -787,7 +869,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_
 		return IB_RMPP_RESULT_INTERNAL;
 	}
 
-	mad_send_wr->newwin = 1;
+	mad_send_wr->newwin = init_newwin(mad_send_wr);
 
 	/* We need to wait for the final ACK even if there isn't a response */
 	mad_send_wr->refcount += (mad_send_wr->timeout == 0);
--- linux-2.6.18.noarch/drivers/infiniband/core/Makefile
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/Makefile
@@ -2,23 +2,28 @@ infiniband-$(CONFIG_INFINIBAND_ADDR_TRAN
 user_access-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= rdma_ucm.o
 
 obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
-					ib_cm.o $(infiniband-y)
+					ib_local_sa.o \
+					ib_cm.o iw_cm.o $(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
-obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o
-obj-$(CONFIG_INFINIBAND_USER_ACCESS) += $(user_access-y)
+obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \
+					$(user_access-y)
 
 ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \
 				device.o fmr_pool.o cache.o
 
 ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o
 
-ib_sa-y :=			sa_query.o
+ib_sa-y :=			sa_query.o multicast.o
+
+ib_local_sa-y :=		local_sa.o
 
 ib_cm-y :=			cm.o
 
+iw_cm-y :=			iwcm.o
+
 rdma_cm-y :=			cma.o
 
-rdma_ucm-y :=			ucma.o ucma_ib.o
+rdma_ucm-y :=			ucma.o
 
 ib_addr-y :=			addr.o
 
--- linux-2.6.18.noarch/drivers/infiniband/core/multicast.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/multicast.c
@@ -0,0 +1,846 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/bitops.h>
+#include <linux/random.h>
+
+#include <rdma/ib_cache.h>
+#include "sa.h"
+
+static void mcast_add_one(struct ib_device *device);
+static void mcast_remove_one(struct ib_device *device);
+
+static struct ib_client mcast_client = {
+	.name   = "ib_multicast",
+	.add    = mcast_add_one,
+	.remove = mcast_remove_one
+};
+
+static struct ib_sa_client	sa_client;
+static struct workqueue_struct	*mcast_wq;
+static union ib_gid mgid0;
+
+struct mcast_device;
+
+struct mcast_port {
+	struct mcast_device	*dev;
+	spinlock_t		lock;
+	struct rb_root		table;
+	atomic_t		refcount;
+	struct completion	comp;
+	u8			port_num;
+};
+
+struct mcast_device {
+	struct ib_device	*device;
+	struct ib_event_handler	event_handler;
+	int			start_port;
+	int			end_port;
+	struct mcast_port	port[0];
+};
+
+enum mcast_state {
+	MCAST_IDLE,
+	MCAST_JOINING,
+	MCAST_MEMBER,
+	MCAST_BUSY,
+	MCAST_ERROR
+};
+
+struct mcast_member;
+
+struct mcast_group {
+	struct ib_sa_mcmember_rec rec;
+	struct rb_node		node;
+	struct mcast_port	*port;
+	spinlock_t		lock;
+	struct work_struct	work;
+	struct list_head	pending_list;
+	struct list_head	active_list;
+	struct mcast_member	*last_join;
+	int			members[3];
+	atomic_t		refcount;
+	enum mcast_state	state;
+	struct ib_sa_query	*query;
+	int			query_id;
+};
+
+struct mcast_member {
+	struct ib_sa_multicast	multicast;
+	struct ib_sa_client	*client;
+	struct mcast_group	*group;
+	struct list_head	list;
+	enum mcast_state	state;
+	atomic_t		refcount;
+	struct completion	comp;
+};
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+			 void *context);
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+			  void *context);
+
+static struct mcast_group *mcast_find(struct mcast_port *port,
+				      union ib_gid *mgid)
+{
+	struct rb_node *node = port->table.rb_node;
+	struct mcast_group *group;
+	int ret;
+
+	while (node) {
+		group = rb_entry(node, struct mcast_group, node);
+		ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
+		if (!ret)
+			return group;
+
+		if (ret < 0)
+			node = node->rb_left;
+		else
+			node = node->rb_right;
+	}
+	return NULL;
+}
+
+static struct mcast_group *mcast_insert(struct mcast_port *port,
+					struct mcast_group *group,
+					int allow_duplicates)
+{
+	struct rb_node **link = &port->table.rb_node;
+	struct rb_node *parent = NULL;
+	struct mcast_group *cur_group;
+	int ret;
+
+	while (*link) {
+		parent = *link;
+		cur_group = rb_entry(parent, struct mcast_group, node);
+
+		ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
+			     sizeof group->rec.mgid);
+		if (ret < 0)
+			link = &(*link)->rb_left;
+		else if (ret > 0)
+			link = &(*link)->rb_right;
+		else if (allow_duplicates)
+			link = &(*link)->rb_left;
+		else
+			return cur_group;
+	}
+	rb_link_node(&group->node, parent, link);
+	rb_insert_color(&group->node, &port->table);
+	return NULL;
+}
+
+static void deref_port(struct mcast_port *port)
+{
+	if (atomic_dec_and_test(&port->refcount))
+		complete(&port->comp);
+}
+
+static void release_group(struct mcast_group *group)
+{
+	struct mcast_port *port = group->port;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	if (atomic_dec_and_test(&group->refcount)) {
+		rb_erase(&group->node, &port->table);
+		spin_unlock_irqrestore(&port->lock, flags);
+		kfree(group);
+		deref_port(port);
+	} else
+		spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void deref_member(struct mcast_member *member)
+{
+	if (atomic_dec_and_test(&member->refcount))
+		complete(&member->comp);
+}
+
+static void queue_join(struct mcast_member *member)
+{
+	struct mcast_group *group = member->group;
+	unsigned long flags;
+
+	spin_lock_irqsave(&group->lock, flags);
+	list_add(&member->list, &group->pending_list);
+	if (group->state == MCAST_IDLE) {
+		group->state = MCAST_BUSY;
+		atomic_inc(&group->refcount);
+		queue_work(mcast_wq, &group->work);
+	}
+	spin_unlock_irqrestore(&group->lock, flags);
+}
+
+/*
+ * A multicast group has three types of members: full member, non member, and
+ * send only member.  We need to keep track of the number of members of each
+ * type based on their join state.  Adjust the number of members the belong to
+ * the specified join states.
+ */
+static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
+{
+	int i;
+
+	for (i = 0; i < 3; i++, join_state >>= 1)
+		if (join_state & 0x1)
+			group->members[i] += inc;
+}
+
+/*
+ * If a multicast group has zero members left for a particular join state, but
+ * the group is still a member with the SA, we need to leave that join state.
+ * Determine which join states we still belong to, but that do not have any
+ * active members.
+ */
+static u8 get_leave_state(struct mcast_group *group)
+{
+	u8 leave_state = 0;
+	int i;
+
+	for (i = 0; i < 3; i++)
+		if (!group->members[i])
+			leave_state |= (0x1 << i);
+
+	return leave_state & group->rec.join_state;
+}
+
+static int check_selector(ib_sa_comp_mask comp_mask,
+			  ib_sa_comp_mask selector_mask,
+			  ib_sa_comp_mask value_mask,
+			  u8 selector, u8 src_value, u8 dst_value)
+{
+	int err;
+
+	if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
+		return 0;
+
+	switch (selector) {
+	case IB_SA_GT:
+		err = (src_value <= dst_value);
+		break;
+	case IB_SA_LT:
+		err = (src_value >= dst_value);
+		break;
+	case IB_SA_EQ:
+		err = (src_value != dst_value);
+		break;
+	default:
+		err = 0;
+		break;
+	}
+
+	return err;
+}
+
+static int cmp_rec(struct ib_sa_mcmember_rec *src,
+		   struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
+{
+	/* MGID must already match */
+
+	if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID &&
+	    memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid))
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
+		return -EINVAL;
+	if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
+			   IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
+			   src->mtu, dst->mtu))
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
+	    src->traffic_class != dst->traffic_class)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
+		return -EINVAL;
+	if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
+			   IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
+			   src->rate, dst->rate))
+		return -EINVAL;
+	if (check_selector(comp_mask,
+			   IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
+			   IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
+			   dst->packet_life_time_selector,
+			   src->packet_life_time, dst->packet_life_time))
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
+	    src->flow_label != dst->flow_label)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
+	    src->hop_limit != dst->hop_limit)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope)
+		return -EINVAL;
+
+	/* join_state checked separately, proxy_join ignored */
+
+	return 0;
+}
+
+static int send_join(struct mcast_group *group, struct mcast_member *member)
+{
+	struct mcast_port *port = group->port;
+	int ret;
+
+	group->last_join = member;
+	ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+				       port->port_num, IB_MGMT_METHOD_SET,
+				       &member->multicast.rec,
+				       member->multicast.comp_mask,
+				       3000, GFP_KERNEL, join_handler, group,
+				       &group->query);
+	if (ret >= 0) {
+		group->query_id = ret;
+		ret = 0;
+	}
+	return ret;
+}
+
+static int send_leave(struct mcast_group *group, u8 leave_state)
+{
+	struct mcast_port *port = group->port;
+	struct ib_sa_mcmember_rec rec;
+	int ret;
+
+	rec = group->rec;
+	rec.join_state = leave_state;
+
+	ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+				       port->port_num, IB_SA_METHOD_DELETE, &rec,
+				       IB_SA_MCMEMBER_REC_MGID     |
+				       IB_SA_MCMEMBER_REC_PORT_GID |
+				       IB_SA_MCMEMBER_REC_JOIN_STATE,
+				       3000, GFP_KERNEL, leave_handler,
+				       group, &group->query);
+	if (ret >= 0) {
+		group->query_id = ret;
+		ret = 0;
+	}
+	return ret;
+}
+
+static void join_group(struct mcast_group *group, struct mcast_member *member,
+		       u8 join_state)
+{
+	member->state = MCAST_MEMBER;
+	adjust_membership(group, join_state, 1);
+	group->rec.join_state |= join_state;
+	member->multicast.rec = group->rec;
+	member->multicast.rec.join_state = join_state;
+	list_del(&member->list);
+	list_add(&member->list, &group->active_list);
+}
+
+static int fail_join(struct mcast_group *group, struct mcast_member *member,
+		     int status)
+{
+	spin_lock_irq(&group->lock);
+	list_del_init(&member->list);
+	spin_unlock_irq(&group->lock);
+	return member->multicast.callback(status, &member->multicast);
+}
+
+static void process_group_error(struct mcast_group *group)
+{
+	struct mcast_member *member;
+	int ret;
+
+	spin_lock_irq(&group->lock);
+	while (!list_empty(&group->active_list)) {
+		member = list_entry(group->active_list.next,
+				    struct mcast_member, list);
+		atomic_inc(&member->refcount);
+		list_del_init(&member->list);
+		adjust_membership(group, member->multicast.rec.join_state, -1);
+		member->state = MCAST_ERROR;
+		spin_unlock_irq(&group->lock);
+
+		ret = member->multicast.callback(-ENETRESET,
+						 &member->multicast);
+		deref_member(member);
+		if (ret)
+			ib_sa_free_multicast(&member->multicast);
+		spin_lock_irq(&group->lock);
+	}
+
+	group->rec.join_state = 0;
+	group->state = MCAST_BUSY;
+	spin_unlock_irq(&group->lock);
+}
+
+static void mcast_work_handler(void *_work)
+{
+	struct mcast_group *group;
+	struct mcast_member *member;
+	struct ib_sa_multicast *multicast;
+	int status, ret;
+	u8 join_state;
+
+	group = container_of(_work, typeof(*group), work);
+retest:
+	spin_lock_irq(&group->lock);
+	while (!list_empty(&group->pending_list) ||
+	       (group->state == MCAST_ERROR)) {
+
+		if (group->state == MCAST_ERROR) {
+			spin_unlock_irq(&group->lock);
+			process_group_error(group);
+			goto retest;
+		}
+
+		member = list_entry(group->pending_list.next,
+				    struct mcast_member, list);
+		multicast = &member->multicast;
+		join_state = multicast->rec.join_state;
+		atomic_inc(&member->refcount);
+
+		if (join_state == (group->rec.join_state & join_state)) {
+			status = cmp_rec(&group->rec, &multicast->rec,
+					 multicast->comp_mask);
+			if (!status)
+				join_group(group, member, join_state);
+			else
+				list_del_init(&member->list);
+			spin_unlock_irq(&group->lock);
+			ret = multicast->callback(status, multicast);
+		} else {
+			spin_unlock_irq(&group->lock);
+			status = send_join(group, member);
+			if (!status) {
+				deref_member(member);
+				return;
+			}
+			ret = fail_join(group, member, status);
+		}
+
+		deref_member(member);
+		if (ret)
+			ib_sa_free_multicast(&member->multicast);
+		spin_lock_irq(&group->lock);
+	}
+
+	join_state = get_leave_state(group);
+	if (join_state) {
+		group->rec.join_state &= ~join_state;
+		spin_unlock_irq(&group->lock);
+		if (send_leave(group, join_state))
+			goto retest;
+	} else {
+		group->state = MCAST_IDLE;
+		spin_unlock_irq(&group->lock);
+		release_group(group);
+	}
+}
+
+/*
+ * Fail a join request if it is still active - at the head of the pending queue.
+ */
+static void process_join_error(struct mcast_group *group, int status)
+{
+	struct mcast_member *member;
+	int ret;
+
+	spin_lock_irq(&group->lock);
+	member = list_entry(group->pending_list.next,
+			    struct mcast_member, list);
+	if (group->last_join == member) {
+		atomic_inc(&member->refcount);
+		list_del_init(&member->list);
+		spin_unlock_irq(&group->lock);
+		ret = member->multicast.callback(status, &member->multicast);
+		deref_member(member);
+		if (ret)
+			ib_sa_free_multicast(&member->multicast);
+	} else
+		spin_unlock_irq(&group->lock);
+}
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+			 void *context)
+{
+	struct mcast_group *group = context;
+
+	if (status)
+		process_join_error(group, status);
+	else {
+		spin_lock_irq(&group->port->lock);
+		group->rec = *rec;
+		if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+			rb_erase(&group->node, &group->port->table);
+			mcast_insert(group->port, group, 1);
+		}
+		spin_unlock_irq(&group->port->lock);
+	}
+	mcast_work_handler(&group->work);
+}
+
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+			  void *context)
+{
+	struct mcast_group *group = context;
+
+	mcast_work_handler(&group->work);
+}
+
+static struct mcast_group *acquire_group(struct mcast_port *port,
+					 union ib_gid *mgid, gfp_t gfp_mask)
+{
+	struct mcast_group *group, *cur_group;
+	unsigned long flags;
+	int is_mgid0;
+
+	is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
+	if (!is_mgid0) {
+		spin_lock_irqsave(&port->lock, flags);
+		group = mcast_find(port, mgid);
+		if (group)
+			goto found;
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	group = kzalloc(sizeof *group, gfp_mask);
+	if (!group)
+		return NULL;
+
+	group->port = port;
+	group->rec.mgid = *mgid;
+	INIT_LIST_HEAD(&group->pending_list);
+	INIT_LIST_HEAD(&group->active_list);
+	INIT_WORK(&group->work, mcast_work_handler, &group->work);
+	spin_lock_init(&group->lock);
+
+	spin_lock_irqsave(&port->lock, flags);
+	cur_group = mcast_insert(port, group, is_mgid0);
+	if (cur_group) {
+		kfree(group);
+		group = cur_group;
+	} else
+		atomic_inc(&port->refcount);
+found:
+	atomic_inc(&group->refcount);
+	spin_unlock_irqrestore(&port->lock, flags);
+	return group;
+}
+
+/*
+ * We serialize all join requests to a single group to make our lives much
+ * easier.  Otherwise, two users could try to join the same group
+ * simultaneously, with different configurations, one could leave while the
+ * join is in progress, etc., which makes locking around error recovery
+ * difficult.
+ */
+struct ib_sa_multicast *
+ib_sa_join_multicast(struct ib_sa_client *client,
+		     struct ib_device *device, u8 port_num,
+		     struct ib_sa_mcmember_rec *rec,
+		     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+		     int (*callback)(int status,
+				     struct ib_sa_multicast *multicast),
+		     void *context)
+{
+	struct mcast_device *dev;
+	struct mcast_member *member;
+	struct ib_sa_multicast *multicast;
+	int ret;
+
+	dev = ib_get_client_data(device, &mcast_client);
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	member = kzalloc(sizeof *member, gfp_mask);
+	if (!member)
+		return ERR_PTR(-ENOMEM);
+
+	ib_sa_client_get(client);
+	member->client = client;
+	member->multicast.rec = *rec;
+	member->multicast.comp_mask = comp_mask;
+	member->multicast.callback = callback;
+	member->multicast.context = context;
+	init_completion(&member->comp);
+	atomic_set(&member->refcount, 1);
+	member->state = MCAST_JOINING;
+
+	member->group = acquire_group(&dev->port[port_num - dev->start_port],
+				      &rec->mgid, gfp_mask);
+	if (!member->group) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/*
+	 * The user will get the multicast structure in their callback.  They
+	 * could then free the multicast structure before we can return from
+	 * this routine.  So we save the pointer to return before queuing
+	 * any callback.
+	 */
+	multicast = &member->multicast;
+	queue_join(member);
+	return multicast;
+
+err:
+	ib_sa_client_put(client);
+	kfree(member);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_sa_join_multicast);
+
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
+{
+	struct mcast_member *member;
+	struct mcast_group *group;
+
+	member = container_of(multicast, struct mcast_member, multicast);
+	group = member->group;
+
+	spin_lock_irq(&group->lock);
+	if (member->state == MCAST_MEMBER)
+		adjust_membership(group, multicast->rec.join_state, -1);
+
+	list_del_init(&member->list);
+
+	if (group->state == MCAST_IDLE) {
+		group->state = MCAST_BUSY;
+		spin_unlock_irq(&group->lock);
+		/* Continue to hold reference on group until callback */
+		queue_work(mcast_wq, &group->work);
+	} else {
+		spin_unlock_irq(&group->lock);
+		release_group(group);
+	}
+
+	deref_member(member);
+	wait_for_completion(&member->comp);
+	ib_sa_client_put(member->client);
+	kfree(member);
+}
+EXPORT_SYMBOL(ib_sa_free_multicast);
+
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+			   union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
+{
+	struct mcast_device *dev;
+	struct mcast_port *port;
+	struct mcast_group *group;
+	unsigned long flags;
+	int ret = 0;
+
+	dev = ib_get_client_data(device, &mcast_client);
+	if (!dev)
+		return -ENODEV;
+
+	port = &dev->port[port_num - dev->start_port];
+	if (mgid && memcmp(mgid, &mgid0, sizeof mgid0)) {
+		spin_lock_irqsave(&port->lock, flags);
+		group = mcast_find(port, mgid);
+		if (group)
+			*rec = group->rec;
+		else
+			ret = -EADDRNOTAVAIL;
+		spin_unlock_irqrestore(&port->lock, flags);
+	} else {
+		memset(rec, 0, sizeof *rec);
+		ib_get_cached_gid(device, port_num, 0, &rec->port_gid);
+		rec->pkey = 0xFFFF;
+		get_random_bytes(&rec->qkey, sizeof rec->qkey);
+		rec->join_state = 1;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
+
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+			     struct ib_sa_mcmember_rec *rec,
+			     struct ib_ah_attr *ah_attr)
+{
+	int ret;
+	u16 gid_index;
+	u8 p;
+
+	ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+	if (ret)
+		return ret;
+
+	memset(ah_attr, 0, sizeof *ah_attr);
+	ah_attr->dlid = be16_to_cpu(rec->mlid);
+	ah_attr->sl = rec->sl;
+	ah_attr->port_num = port_num;
+	ah_attr->static_rate = rec->rate;
+
+	ah_attr->ah_flags = IB_AH_GRH;
+	ah_attr->grh.dgid = rec->mgid;
+
+	ah_attr->grh.sgid_index = (u8) gid_index;
+	ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
+	ah_attr->grh.hop_limit = rec->hop_limit;
+	ah_attr->grh.traffic_class = rec->traffic_class;
+
+	return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_mcmember);
+
+static void mcast_groups_lost(struct mcast_port *port)
+{
+	struct mcast_group *group;
+	struct rb_node *node;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	for (node = rb_first(&port->table); node; node = rb_next(node)) {
+		group = rb_entry(node, struct mcast_group, node);
+		spin_lock(&group->lock);
+		if (group->state == MCAST_IDLE) {
+			atomic_inc(&group->refcount);
+			queue_work(mcast_wq, &group->work);
+		}
+		group->state = MCAST_ERROR;
+		spin_unlock(&group->lock);
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void mcast_event_handler(struct ib_event_handler *handler,
+				struct ib_event *event)
+{
+	struct mcast_device *dev;
+
+	dev = container_of(handler, struct mcast_device, event_handler);
+
+	switch (event->event) {
+	case IB_EVENT_PORT_ERR:
+	case IB_EVENT_LID_CHANGE:
+	case IB_EVENT_SM_CHANGE:
+	case IB_EVENT_CLIENT_REREGISTER:
+		mcast_groups_lost(&dev->port[event->element.port_num -
+					     dev->start_port]);
+		break;
+	default:
+		break;
+	}
+}
+
+static void mcast_add_one(struct ib_device *device)
+{
+	struct mcast_device *dev;
+	struct mcast_port *port;
+	int i;
+
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+		      GFP_KERNEL);
+	if (!dev)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
+		dev->start_port = dev->end_port = 0;
+	else {
+		dev->start_port = 1;
+		dev->end_port = device->phys_port_cnt;
+	}
+
+	for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+		port = &dev->port[i];
+		port->dev = dev;
+		port->port_num = dev->start_port + i;
+		spin_lock_init(&port->lock);
+		port->table = RB_ROOT;
+		init_completion(&port->comp);
+		atomic_set(&port->refcount, 1);
+	}
+
+	dev->device = device;
+	ib_set_client_data(device, &mcast_client, dev);
+
+	INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
+	ib_register_event_handler(&dev->event_handler);
+}
+
+static void mcast_remove_one(struct ib_device *device)
+{
+	struct mcast_device *dev;
+	struct mcast_port *port;
+	int i;
+
+	dev = ib_get_client_data(device, &mcast_client);
+	if (!dev)
+		return;
+
+	ib_unregister_event_handler(&dev->event_handler);
+	flush_workqueue(mcast_wq);
+
+	for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+		port = &dev->port[i];
+		deref_port(port);
+		wait_for_completion(&port->comp);
+	}
+
+	kfree(dev);
+}
+
+int mcast_init(void)
+{
+	int ret;
+
+	mcast_wq = create_singlethread_workqueue("ib_mcast");
+	if (!mcast_wq)
+		return -ENOMEM;
+
+	ib_sa_register_client(&sa_client);
+
+	ret = ib_register_client(&mcast_client);
+	if (ret)
+		goto err;
+	return 0;
+
+err:
+	ib_sa_unregister_client(&sa_client);
+	destroy_workqueue(mcast_wq);
+	return ret;
+}
+
+void mcast_cleanup(void)
+{
+	ib_unregister_client(&mcast_client);
+	ib_sa_unregister_client(&sa_client);
+	destroy_workqueue(mcast_wq);
+}
--- linux-2.6.18.noarch/drivers/infiniband/core/sa.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/sa.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef SA_H
+#define SA_H
+
+#include <rdma/ib_sa.h>
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+	atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+	if (atomic_dec_and_test(&client->users))
+		complete(&client->comp);
+}
+
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+			     struct ib_device *device, u8 port_num,
+			     u8 method,
+			     struct ib_sa_mcmember_rec *rec,
+			     ib_sa_comp_mask comp_mask,
+			     int timeout_ms, gfp_t gfp_mask,
+			     void (*callback)(int status,
+					      struct ib_sa_mcmember_rec *resp,
+					      void *context),
+			     void *context,
+			     struct ib_sa_query **sa_query);
+
+int mcast_init(void);
+void mcast_cleanup(void);
+
+#endif /* SA_H */
--- linux-2.6.18.noarch/drivers/infiniband/core/sa_query.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/sa_query.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -46,8 +47,8 @@
 #include <linux/workqueue.h>
 
 #include <rdma/ib_pack.h>
-#include <rdma/ib_sa.h>
 #include <rdma/ib_cache.h>
+#include "sa.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -75,6 +76,7 @@ struct ib_sa_device {
 struct ib_sa_query {
 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 	void (*release)(struct ib_sa_query *);
+	struct ib_sa_client    *client;
 	struct ib_sa_port      *port;
 	struct ib_mad_send_buf *mad_buf;
 	struct ib_sa_sm_ah     *sm_ah;
@@ -350,6 +352,32 @@ static const struct ib_field service_rec
 	  .size_bits    = 2*64 },
 };
 
+int ib_sa_pack_attr(void *dst, void *src, int attr_id)
+{
+	switch (attr_id) {
+	case IB_SA_ATTR_PATH_REC:
+		ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ib_sa_pack_attr);
+
+int ib_sa_unpack_attr(void *dst, void *src, int attr_id)
+{
+	switch (attr_id) {
+	case IB_SA_ATTR_PATH_REC:
+		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ib_sa_unpack_attr);
+
 static void free_sm_ah(struct kref *kref)
 {
 	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -415,6 +443,20 @@ static void ib_sa_event(struct ib_event_
 	}
 }
 
+void ib_sa_register_client(struct ib_sa_client *client)
+{
+	atomic_set(&client->users, 1);
+	init_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_register_client);
+
+void ib_sa_unregister_client(struct ib_sa_client *client)
+{
+	ib_sa_client_put(client);
+	wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_unregister_client);
+
 /**
  * ib_sa_cancel_query - try to cancel an SA query
  * @id:ID of query to cancel
@@ -454,6 +496,7 @@ int ib_init_ah_from_path(struct ib_devic
 	ah_attr->sl = rec->sl;
 	ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
 	ah_attr->port_num = port_num;
+	ah_attr->static_rate = rec->rate;
 
 	if (rec->hop_limit > 1) {
 		ah_attr->ah_flags = IB_AH_GRH;
@@ -557,6 +600,7 @@ static void ib_sa_path_rec_release(struc
 
 /**
  * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:Path Record to send in query
@@ -579,7 +623,8 @@ static void ib_sa_path_rec_release(struc
  * error code.  Otherwise it is a query ID that can be used to cancel
  * the query.
  */
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@@ -614,8 +659,10 @@ int ib_sa_path_rec_get(struct ib_device 
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -639,6 +686,7 @@ int ib_sa_path_rec_get(struct ib_device 
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -671,6 +719,7 @@ static void ib_sa_service_rec_release(st
 
 /**
  * ib_sa_service_rec_query - Start Service Record operation
+ * @client:SA client
  * @device:device to send request on
  * @port_num: port number to send request on
  * @method:SA method - should be get, set, or delete
@@ -695,7 +744,8 @@ static void ib_sa_service_rec_release(st
  * error code.  Otherwise it is a request ID that can be used to cancel
  * the query.
  */
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			    struct ib_device *device, u8 port_num, u8 method,
 			    struct ib_sa_service_rec *rec,
 			    ib_sa_comp_mask comp_mask,
 			    int timeout_ms, gfp_t gfp_mask,
@@ -735,8 +785,10 @@ int ib_sa_service_rec_query(struct ib_de
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -761,6 +813,7 @@ int ib_sa_service_rec_query(struct ib_de
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -791,7 +844,8 @@ static void ib_sa_mcmember_rec_release(s
 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+			     struct ib_device *device, u8 port_num,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
@@ -827,8 +881,10 @@ int ib_sa_mcmember_rec_query(struct ib_d
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -853,13 +909,13 @@ int ib_sa_mcmember_rec_query(struct ib_d
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
 	kfree(query);
 	return ret;
 }
-EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
 
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *mad_send_wc)
@@ -887,8 +943,9 @@ static void send_handler(struct ib_mad_a
 	idr_remove(&query_idr, query->id);
 	spin_unlock_irqrestore(&idr_lock, flags);
 
-        ib_free_send_mad(mad_send_wc->send_buf);
+	ib_free_send_mad(mad_send_wc->send_buf);
 	kref_put(&query->sm_ah->ref, free_sm_ah);
+	ib_sa_client_put(query->client);
 	query->release(query);
 }
 
@@ -919,7 +976,10 @@ static void ib_sa_add_one(struct ib_devi
 	struct ib_sa_device *sa_dev;
 	int s, e, i;
 
-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
@@ -1008,14 +1068,27 @@ static int __init ib_sa_init(void)
 	get_random_bytes(&tid, sizeof tid);
 
 	ret = ib_register_client(&sa_client);
-	if (ret)
+	if (ret) {
 		printk(KERN_ERR "Couldn't register ib_sa client\n");
+		goto err1;
+	}
 
+	ret = mcast_init();
+	if (ret) {
+		printk(KERN_ERR "Couldn't initialize multicast handling\n");
+		goto err2;
+	}
+
+	return 0;
+err2:
+	ib_unregister_client(&sa_client);
+err1:
 	return ret;
 }
 
 static void __exit ib_sa_cleanup(void)
 {
+	mcast_cleanup();
 	ib_unregister_client(&sa_client);
 	idr_destroy(&query_idr);
 }
--- linux-2.6.18.noarch/drivers/infiniband/core/smi.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/smi.c
@@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp
 
 		/* C14-9:2 */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			/* smp->return_path set when received */
@@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp
 		if (hop_ptr == hop_cnt) {
 			/* smp->return_path set when received */
 			smp->hop_ptr++;
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}
 
@@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			smp->hop_ptr--;
@@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp
 		if (hop_ptr == 1) {
 			smp->hop_ptr--;
 			/* C14-13:3 -- SMPs destined for SM shouldn't be here */
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_slid == IB_LID_PERMISSIVE);
 		}
 
@@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp
 
 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			smp->return_path[hop_ptr] = port_num;
@@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp
 				smp->return_path[hop_ptr] = port_num;
 			/* smp->hop_ptr updated when sending */
 
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}
 
@@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			/* smp->hop_ptr updated when sending */
@@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp
 				return 1;
 			}
 			/* smp->hop_ptr updated when sending */
-			return (node_type == IB_NODE_SWITCH);
+			return (node_type == RDMA_NODE_IB_SWITCH);
 		}
 
 		/* C14-13:4 -- hop_ptr = 0 -> give to SM */
--- linux-2.6.18.noarch/drivers/infiniband/core/sysfs.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/sysfs.c
@@ -68,7 +68,7 @@ struct port_table_attribute {
 	int			index;
 };
 
-static inline int ibdev_is_alive(const struct ib_device *dev) 
+static inline int ibdev_is_alive(const struct ib_device *dev)
 {
 	return dev->reg_state == IB_DEV_REGISTERED;
 }
@@ -589,10 +589,11 @@ static ssize_t show_node_type(struct cla
 		return -ENODEV;
 
 	switch (dev->node_type) {
-	case IB_NODE_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
-	case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
-	case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
-	default:             return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);
+	case RDMA_NODE_RNIC:	  return sprintf(buf, "%d: RNIC\n", dev->node_type);
+	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
 	}
 }
 
@@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_d
 	if (ret)
 		goto err_put;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		ret = add_port(device, 0);
 		if (ret)
 			goto err_put;
--- linux-2.6.18.noarch/drivers/infiniband/core/ucma.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/ucma.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -42,14 +42,6 @@
 #include <rdma/ib_marshall.h>
 #include <rdma/rdma_cm.h>
 
-#include "ucma_ib.h"
-
-enum {
-	RDMA_TRANSPORT_IB
-};
-
-#define rdma_node_get_transport(x) RDMA_TRANSPORT_IB
-
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -59,10 +51,10 @@ enum {
 };
 
 struct ucma_file {
-	struct mutex		file_mutex;
+	struct mutex		mut;
 	struct file		*filp;
-	struct list_head	ctxs;
-	struct list_head	events;
+	struct list_head	ctx_list;
+	struct list_head	event_list;
 	wait_queue_head_t	poll_wait;
 };
 
@@ -75,37 +67,58 @@ struct ucma_context {
 
 	struct ucma_file	*file;
 	struct rdma_cm_id	*cm_id;
-	__u64			uid;
+	u64			uid;
+
+	struct list_head	list;
+	struct list_head	mc_list;
+};
+
+struct ucma_multicast {
+	struct ucma_context	*ctx;
+	int			id;
+	int			events_reported;
 
-	struct list_head	events;    /* list of pending events. */
-	struct list_head	file_list; /* member in file ctx list */
+	u64			uid;
+	struct list_head	list;
+	struct sockaddr		addr;
+	u8			pad[sizeof(struct sockaddr_in6) -
+				    sizeof(struct sockaddr)];
 };
 
 struct ucma_event {
 	struct ucma_context	*ctx;
-	struct list_head	file_list; /* member in file event list */
-	struct list_head	ctx_list;  /* member in ctx event list */
+	struct ucma_multicast	*mc;
+	struct list_head	list;
 	struct rdma_cm_id	*cm_id;
 	struct rdma_ucm_event_resp resp;
 };
 
-static DEFINE_MUTEX(ctx_mutex);
+static DEFINE_MUTEX(mut);
 static DEFINE_IDR(ctx_idr);
+static DEFINE_IDR(multicast_idr);
 
-static struct ucma_context* ucma_get_ctx(struct ucma_file *file, int id)
+static inline struct ucma_context *_ucma_find_context(int id,
+						      struct ucma_file *file)
 {
 	struct ucma_context *ctx;
 
-	mutex_lock(&ctx_mutex);
 	ctx = idr_find(&ctx_idr, id);
 	if (!ctx)
 		ctx = ERR_PTR(-ENOENT);
 	else if (ctx->file != file)
 		ctx = ERR_PTR(-EINVAL);
-	else
-		atomic_inc(&ctx->ref);
-	mutex_unlock(&ctx_mutex);
+	return ctx;
+}
+
+static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
+{
+	struct ucma_context *ctx;
 
+	mutex_lock(&mut);
+	ctx = _ucma_find_context(id, file);
+	if (!IS_ERR(ctx))
+		atomic_inc(&ctx->ref);
+	mutex_unlock(&mut);
 	return ctx;
 }
 
@@ -115,29 +128,7 @@ static void ucma_put_ctx(struct ucma_con
 		complete(&ctx->comp);
 }
 
-static void ucma_cleanup_events(struct ucma_context *ctx)
-{
-	struct ucma_event *uevent;
-
-	mutex_lock(&ctx->file->file_mutex);
-	list_del(&ctx->file_list);
-	while (!list_empty(&ctx->events)) {
-
-		uevent = list_entry(ctx->events.next, struct ucma_event,
-				    ctx_list);
-		list_del(&uevent->file_list);
-		list_del(&uevent->ctx_list);
-
-		/* clear incoming connections. */
-		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
-			rdma_destroy_id(uevent->cm_id);
-
-		kfree(uevent);
-	}
-	mutex_unlock(&ctx->file->file_mutex);
-}
-
-static struct ucma_context* ucma_alloc_ctx(struct ucma_file *file)
+static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 {
 	struct ucma_context *ctx;
 	int ret;
@@ -148,23 +139,23 @@ static struct ucma_context* ucma_alloc_c
 
 	atomic_set(&ctx->ref, 1);
 	init_completion(&ctx->comp);
+	INIT_LIST_HEAD(&ctx->mc_list);
 	ctx->file = file;
-	INIT_LIST_HEAD(&ctx->events);
 
 	do {
 		ret = idr_pre_get(&ctx_idr, GFP_KERNEL);
 		if (!ret)
 			goto error;
 
-		mutex_lock(&ctx_mutex);
+		mutex_lock(&mut);
 		ret = idr_get_new(&ctx_idr, ctx, &ctx->id);
-		mutex_unlock(&ctx_mutex);
+		mutex_unlock(&mut);
 	} while (ret == -EAGAIN);
 
 	if (ret)
 		goto error;
 
-	list_add_tail(&ctx->file_list, &file->ctxs);
+	list_add_tail(&ctx->list, &file->ctx_list);
 	return ctx;
 
 error:
@@ -172,6 +163,85 @@ error:
 	return NULL;
 }
 
+static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+{
+	struct ucma_multicast *mc;
+	int ret;
+
+	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
+	if (!mc)
+		return NULL;
+
+	do {
+		ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
+		if (!ret)
+			goto error;
+
+		mutex_lock(&mut);
+		ret = idr_get_new(&multicast_idr, mc, &mc->id);
+		mutex_unlock(&mut);
+	} while (ret == -EAGAIN);
+
+	if (ret)
+		goto error;
+
+	mc->ctx = ctx;
+	list_add_tail(&mc->list, &ctx->mc_list);
+	return mc;
+
+error:
+	kfree(mc);
+	return NULL;
+}
+
+static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
+				 struct rdma_conn_param *src)
+{
+	if (src->private_data_len)
+		memcpy(dst->private_data, src->private_data,
+		       src->private_data_len);
+	dst->private_data_len = src->private_data_len;
+	dst->responder_resources =src->responder_resources;
+	dst->initiator_depth = src->initiator_depth;
+	dst->flow_control = src->flow_control;
+	dst->retry_count = src->retry_count;
+	dst->rnr_retry_count = src->rnr_retry_count;
+	dst->srq = src->srq;
+	dst->qp_num = src->qp_num;
+}
+
+static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst,
+			       struct rdma_ud_param *src)
+{
+	if (src->private_data_len)
+		memcpy(dst->private_data, src->private_data,
+		       src->private_data_len);
+	dst->private_data_len = src->private_data_len;
+	ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
+	dst->qp_num = src->qp_num;
+	dst->qkey = src->qkey;
+}
+
+static void ucma_set_event_context(struct ucma_context *ctx,
+				   struct rdma_cm_event *event,
+				   struct ucma_event *uevent)
+{
+	uevent->ctx = ctx;
+	switch (event->event) {
+	case RDMA_CM_EVENT_MULTICAST_JOIN:
+	case RDMA_CM_EVENT_MULTICAST_ERROR:
+		uevent->mc = (struct ucma_multicast *)
+			     event->param.ud.private_data;
+		uevent->resp.uid = uevent->mc->uid;
+		uevent->resp.id = uevent->mc->id;
+		break;
+	default:
+		uevent->resp.uid = ctx->uid;
+		uevent->resp.id = ctx->id;
+		break;
+	}
+}
+
 static int ucma_event_handler(struct rdma_cm_id *cm_id,
 			      struct rdma_cm_event *event)
 {
@@ -183,29 +253,39 @@ static int ucma_event_handler(struct rdm
 	if (!uevent)
 		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
 
-	uevent->ctx = ctx;
 	uevent->cm_id = cm_id;
-	uevent->resp.uid = ctx->uid;
-	uevent->resp.id = ctx->id;
+	ucma_set_event_context(ctx, event, uevent);
 	uevent->resp.event = event->event;
 	uevent->resp.status = event->status;
-	if ((uevent->resp.private_data_len = event->private_data_len))
-		memcpy(uevent->resp.private_data, event->private_data,
-		       event->private_data_len);
+	if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
+		ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
+	else
+		ucma_copy_conn_event(&uevent->resp.param.conn,
+				     &event->param.conn);
 
-	mutex_lock(&ctx->file->file_mutex);
+	mutex_lock(&ctx->file->mut);
 	if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 		if (!ctx->backlog) {
-			ret = -EDQUOT;
+			ret = -ENOMEM;
+			kfree(uevent);
 			goto out;
 		}
 		ctx->backlog--;
+	} else if (!ctx->uid) {
+		/*
+		 * We ignore events for new connections until userspace has set
+		 * their context.  This can only happen if an error occurs on a
+		 * new connection before the user accepts it.  This is okay,
+		 * since the accept will just fail later.
+		 */
+		kfree(uevent);
+		goto out;
 	}
-	list_add_tail(&uevent->file_list, &ctx->file->events);
-	list_add_tail(&uevent->ctx_list, &ctx->events);
+
+	list_add_tail(&uevent->list, &ctx->file->event_list);
 	wake_up_interruptible(&ctx->file->poll_wait);
 out:
-	mutex_unlock(&ctx->file->file_mutex);
+	mutex_unlock(&ctx->file->mut);
 	return ret;
 }
 
@@ -218,14 +298,14 @@ static ssize_t ucma_get_event(struct ucm
 	int ret = 0;
 	DEFINE_WAIT(wait);
 
-	if (out_len < sizeof(struct rdma_ucm_event_resp))
+	if (out_len < sizeof uevent->resp)
 		return -ENOSPC;
 
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
-	mutex_lock(&file->file_mutex);
-	while (list_empty(&file->events)) {
+	mutex_lock(&file->mut);
+	while (list_empty(&file->event_list)) {
 		if (file->filp->f_flags & O_NONBLOCK) {
 			ret = -EAGAIN;
 			break;
@@ -237,16 +317,16 @@ static ssize_t ucma_get_event(struct ucm
 		}
 
 		prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
-		mutex_unlock(&file->file_mutex);
+		mutex_unlock(&file->mut);
 		schedule();
-		mutex_lock(&file->file_mutex);
+		mutex_lock(&file->mut);
 		finish_wait(&file->poll_wait, &wait);
 	}
 
 	if (ret)
 		goto done;
 
-	uevent = list_entry(file->events.next, struct ucma_event, file_list);
+	uevent = list_entry(file->event_list.next, struct ucma_event, list);
 
 	if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
 		ctx = ucma_alloc_ctx(file);
@@ -261,17 +341,18 @@ static ssize_t ucma_get_event(struct ucm
 	}
 
 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
-			 &uevent->resp, sizeof(uevent->resp))) {
+			 &uevent->resp, sizeof uevent->resp)) {
 		ret = -EFAULT;
 		goto done;
 	}
 
-	list_del(&uevent->file_list);
-	list_del(&uevent->ctx_list);
+	list_del(&uevent->list);
 	uevent->ctx->events_reported++;
+	if (uevent->mc)
+		uevent->mc->events_reported++;
 	kfree(uevent);
 done:
-	mutex_unlock(&file->file_mutex);
+	mutex_unlock(&file->mut);
 	return ret;
 }
 
@@ -290,14 +371,14 @@ static ssize_t ucma_create_id(struct ucm
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
-	mutex_lock(&file->file_mutex);
+	mutex_lock(&file->mut);
 	ctx = ucma_alloc_ctx(file);
-	mutex_unlock(&file->file_mutex);
+	mutex_unlock(&file->mut);
 	if (!ctx)
 		return -ENOMEM;
 
 	ctx->uid = cmd.uid;
-	ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, RDMA_PS_TCP);
+	ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps);
 	if (IS_ERR(ctx->cm_id)) {
 		ret = PTR_ERR(ctx->cm_id);
 		goto err1;
@@ -314,13 +395,77 @@ static ssize_t ucma_create_id(struct ucm
 err2:
 	rdma_destroy_id(ctx->cm_id);
 err1:
-	mutex_lock(&ctx_mutex);
+	mutex_lock(&mut);
 	idr_remove(&ctx_idr, ctx->id);
-	mutex_unlock(&ctx_mutex);
+	mutex_unlock(&mut);
 	kfree(ctx);
 	return ret;
 }
 
+static void ucma_cleanup_multicast(struct ucma_context *ctx)
+{
+	struct ucma_multicast *mc, *tmp;
+
+	mutex_lock(&mut);
+	list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
+		list_del(&mc->list);
+		idr_remove(&multicast_idr, mc->id);
+		kfree(mc);
+	}
+	mutex_unlock(&mut);
+}
+
+static void ucma_cleanup_events(struct ucma_context *ctx)
+{
+	struct ucma_event *uevent, *tmp;
+
+	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
+		if (uevent->ctx != ctx)
+			continue;
+
+		list_del(&uevent->list);
+
+		/* clear incoming connections. */
+		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+			rdma_destroy_id(uevent->cm_id);
+
+		kfree(uevent);
+	}
+}
+
+static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
+{
+	struct ucma_event *uevent, *tmp;
+
+	list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
+		if (uevent->mc != mc)
+			continue;
+
+		list_del(&uevent->list);
+		kfree(uevent);
+	}
+}
+
+static int ucma_free_ctx(struct ucma_context *ctx)
+{
+	int events_reported;
+
+	/* No new events will be generated after destroying the id. */
+	rdma_destroy_id(ctx->cm_id);
+
+	ucma_cleanup_multicast(ctx);
+
+	/* Cleanup events not yet reported to the user. */
+	mutex_lock(&ctx->file->mut);
+	ucma_cleanup_events(ctx);
+	list_del(&ctx->list);
+	mutex_unlock(&ctx->file->mut);
+
+	events_reported = ctx->events_reported;
+	kfree(ctx);
+	return events_reported;
+}
+
 static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 			       int in_len, int out_len)
 {
@@ -335,33 +480,23 @@ static ssize_t ucma_destroy_id(struct uc
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
-	mutex_lock(&ctx_mutex);
-	ctx = idr_find(&ctx_idr, cmd.id);
-	if (!ctx)
-		ctx = ERR_PTR(-ENOENT);
-	else if (ctx->file != file)
-		ctx = ERR_PTR(-EINVAL);
-	else
+	mutex_lock(&mut);
+	ctx = _ucma_find_context(cmd.id, file);
+	if (!IS_ERR(ctx))
 		idr_remove(&ctx_idr, ctx->id);
-	mutex_unlock(&ctx_mutex);
+	mutex_unlock(&mut);
 
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
 	ucma_put_ctx(ctx);
 	wait_for_completion(&ctx->comp);
+	resp.events_reported = ucma_free_ctx(ctx);
 
-	/* No new events will be generated after destroying the id. */
-	rdma_destroy_id(ctx->cm_id);
-	/* Cleanup events not yet reported to the user. */
-	ucma_cleanup_events(ctx);
-
-	resp.events_reported = ctx->events_reported;
 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
 			 &resp, sizeof(resp)))
 		ret = -EFAULT;
 
-	kfree(ctx);
 	return ret;
 }
 
@@ -477,11 +612,11 @@ static ssize_t ucma_query_route(struct u
 	memset(&resp, 0, sizeof resp);
 	addr = &ctx->cm_id->route.addr.src_addr;
 	memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
-				     sizeof(struct sockaddr_in) : 
+				     sizeof(struct sockaddr_in) :
 				     sizeof(struct sockaddr_in6));
 	addr = &ctx->cm_id->route.addr.dst_addr;
 	memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ?
-				     sizeof(struct sockaddr_in) : 
+				     sizeof(struct sockaddr_in) :
 				     sizeof(struct sockaddr_in6));
 	if (!ctx->cm_id->device)
 		goto out;
@@ -491,6 +626,7 @@ static ssize_t ucma_query_route(struct u
 	switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
 		ucma_copy_ib_route(&resp, &ctx->cm_id->route);
+		break;
 	default:
 		break;
 	}
@@ -504,19 +640,18 @@ out:
 	return ret;
 }
 
-static void ucma_copy_conn_param(struct rdma_conn_param *dst_conn,
-				 struct rdma_ucm_conn_param *src_conn)
+static void ucma_copy_conn_param(struct rdma_conn_param *dst,
+				 struct rdma_ucm_conn_param *src)
 {
-	dst_conn->private_data = src_conn->private_data;
-	dst_conn->private_data_len = src_conn->private_data_len;
-	dst_conn->responder_resources =src_conn->responder_resources;
-	dst_conn->initiator_depth = src_conn->initiator_depth;
-	dst_conn->flow_control = src_conn->flow_control;
-	dst_conn->retry_count = src_conn->retry_count;
-	dst_conn->rnr_retry_count = src_conn->rnr_retry_count;
-	dst_conn->srq = src_conn->srq;
-	dst_conn->qp_num = src_conn->qp_num;
-	dst_conn->qp_type = src_conn->qp_type;
+	dst->private_data = src->private_data;
+	dst->private_data_len = src->private_data_len;
+	dst->responder_resources =src->responder_resources;
+	dst->initiator_depth = src->initiator_depth;
+	dst->flow_control = src->flow_control;
+	dst->retry_count = src->retry_count;
+	dst->rnr_retry_count = src->rnr_retry_count;
+	dst->srq = src->srq;
+	dst->qp_num = src->qp_num;
 }
 
 static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
@@ -665,12 +800,33 @@ out:
 	return ret;
 }
 
-static ssize_t ucma_get_option(struct ucma_file *file, const char __user *inbuf,
-			       int in_len, int out_len)
+static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
+			   int in_len, int out_len)
+{
+	struct rdma_ucm_notify cmd;
+	struct ucma_context *ctx;
+	int ret;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	ctx = ucma_get_ctx(file, cmd.id);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
+	ucma_put_ctx(ctx);
+	return ret;
+}
+
+static ssize_t ucma_join_multicast(struct ucma_file *file,
+				   const char __user *inbuf,
+				   int in_len, int out_len)
 {
-	struct rdma_ucm_get_option cmd;
-	struct rdma_ucm_get_option_resp resp;
+	struct rdma_ucm_join_mcast cmd;
+	struct rdma_ucm_create_id_resp resp;
 	struct ucma_context *ctx;
+	struct ucma_multicast *mc;
 	int ret;
 
 	if (out_len < sizeof(resp))
@@ -683,62 +839,91 @@ static ssize_t ucma_get_option(struct uc
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	resp.optlen = cmd.optlen;
-
-	switch (cmd.level) {
-	case RDMA_PROTO_IP:
-		ret = -ENOSYS;
-		break;
-	case RDMA_PROTO_IB:
-		ret = ucma_get_ib_option(ctx->cm_id, cmd.optname,
-					 (void *) (unsigned long) cmd.optval,
-					 &resp.optlen);
-		break;
-	default:
-		ret = -EINVAL;
-		break;
+	mutex_lock(&file->mut);
+	mc = ucma_alloc_multicast(ctx);
+	if (IS_ERR(mc)) {
+		ret = PTR_ERR(mc);
+		goto err1;
 	}
 
+	mc->uid = cmd.uid;
+	memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
+	ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc);
 	if (ret)
-		goto out;
+		goto err2;
 
+	resp.id = mc->id;
 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
-			 &resp, sizeof(resp)))
+			 &resp, sizeof(resp))) {
 		ret = -EFAULT;
-out:
+		goto err3;
+	}
+
+	mutex_unlock(&file->mut);
+	ucma_put_ctx(ctx);
+	return 0;
+
+err3:
+	rdma_leave_multicast(ctx->cm_id, &mc->addr);
+	ucma_cleanup_mc_events(mc);
+err2:
+	mutex_lock(&mut);
+	idr_remove(&multicast_idr, mc->id);
+	mutex_unlock(&mut);
+	list_del(&mc->list);
+	kfree(mc);
+err1:
+	mutex_unlock(&file->mut);
 	ucma_put_ctx(ctx);
 	return ret;
 }
 
-static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
-			       int in_len, int out_len)
+static ssize_t ucma_leave_multicast(struct ucma_file *file,
+				    const char __user *inbuf,
+				    int in_len, int out_len)
 {
-	struct rdma_ucm_set_option cmd;
-	struct ucma_context *ctx;
-	int ret;
+	struct rdma_ucm_destroy_id cmd;
+	struct rdma_ucm_destroy_id_resp resp;
+	struct ucma_multicast *mc;
+	int ret = 0;
+
+	if (out_len < sizeof(resp))
+		return -ENOSPC;
 
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
-	ctx = ucma_get_ctx(file, cmd.id);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
+	mutex_lock(&mut);
+	mc = idr_find(&multicast_idr, cmd.id);
+	if (!mc)
+		mc = ERR_PTR(-ENOENT);
+	else if (mc->ctx->file != file)
+		mc = ERR_PTR(-EINVAL);
+	else {
+		idr_remove(&multicast_idr, mc->id);
+		atomic_inc(&mc->ctx->ref);
+	}
+	mutex_unlock(&mut);
 
-	switch (cmd.level) {
-	case RDMA_PROTO_IP:
-		ret = -ENOSYS;
-		break;
-	case RDMA_PROTO_IB:
-		ret = ucma_set_ib_option(ctx->cm_id, cmd.optname,
-					 (void *) (unsigned long) cmd.optval,
-					 cmd.optlen);
-		break;
-	default:
-		ret = -EINVAL;
-		break;
+	if (IS_ERR(mc)) {
+		ret = PTR_ERR(mc);
+		goto out;
 	}
 
-	ucma_put_ctx(ctx);
+	rdma_leave_multicast(mc->ctx->cm_id, &mc->addr);
+	mutex_lock(&mc->ctx->file->mut);
+	ucma_cleanup_mc_events(mc);
+	list_del(&mc->list);
+	mutex_unlock(&mc->ctx->file->mut);
+
+	ucma_put_ctx(mc->ctx);
+	resp.events_reported = mc->events_reported;
+	kfree(mc);
+
+	if (copy_to_user((void __user *)(unsigned long)cmd.response,
+			 &resp, sizeof(resp)))
+		ret = -EFAULT;
+out:
 	return ret;
 }
 
@@ -758,8 +943,11 @@ static ssize_t (*ucma_cmd_table[])(struc
 	[RDMA_USER_CM_CMD_DISCONNECT]	= ucma_disconnect,
 	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	= ucma_init_qp_attr,
 	[RDMA_USER_CM_CMD_GET_EVENT]	= ucma_get_event,
-	[RDMA_USER_CM_CMD_GET_OPTION]	= ucma_get_option,
-	[RDMA_USER_CM_CMD_SET_OPTION]	= ucma_set_option
+	[RDMA_USER_CM_CMD_GET_OPTION]	= NULL,
+	[RDMA_USER_CM_CMD_SET_OPTION]	= NULL,
+	[RDMA_USER_CM_CMD_NOTIFY]	= ucma_notify,
+	[RDMA_USER_CM_CMD_JOIN_MCAST]	= ucma_join_multicast,
+	[RDMA_USER_CM_CMD_LEAVE_MCAST]	= ucma_leave_multicast,
 };
 
 static ssize_t ucma_write(struct file *filp, const char __user *buf,
@@ -781,6 +969,9 @@ static ssize_t ucma_write(struct file *f
 	if (hdr.in + sizeof(hdr) > len)
 		return -EINVAL;
 
+	if (!ucma_cmd_table[hdr.cmd])
+		return -ENOSYS;
+
 	ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out);
 	if (!ret)
 		ret = len;
@@ -795,10 +986,8 @@ static unsigned int ucma_poll(struct fil
 
 	poll_wait(filp, &file->poll_wait, wait);
 
-	mutex_lock(&file->file_mutex);
-	if (!list_empty(&file->events))
+	if (!list_empty(&file->event_list))
 		mask = POLLIN | POLLRDNORM;
-	mutex_unlock(&file->file_mutex);
 
 	return mask;
 }
@@ -811,10 +1000,10 @@ static int ucma_open(struct inode *inode
 	if (!file)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&file->events);
-	INIT_LIST_HEAD(&file->ctxs);
+	INIT_LIST_HEAD(&file->event_list);
+	INIT_LIST_HEAD(&file->ctx_list);
 	init_waitqueue_head(&file->poll_wait);
-	mutex_init(&file->file_mutex);
+	mutex_init(&file->mut);
 
 	filp->private_data = file;
 	file->filp = filp;
@@ -824,25 +1013,20 @@ static int ucma_open(struct inode *inode
 static int ucma_close(struct inode *inode, struct file *filp)
 {
 	struct ucma_file *file = filp->private_data;
-	struct ucma_context *ctx;
+	struct ucma_context *ctx, *tmp;
 
-	mutex_lock(&file->file_mutex);
-	while (!list_empty(&file->ctxs)) {
-		ctx = list_entry(file->ctxs.next, struct ucma_context,
-				 file_list);
-		mutex_unlock(&file->file_mutex);
+	mutex_lock(&file->mut);
+	list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
+		mutex_unlock(&file->mut);
 
-		mutex_lock(&ctx_mutex);
+		mutex_lock(&mut);
 		idr_remove(&ctx_idr, ctx->id);
-		mutex_unlock(&ctx_mutex);
-
-		rdma_destroy_id(ctx->cm_id);
-		ucma_cleanup_events(ctx);
-		kfree(ctx);
+		mutex_unlock(&mut);
 
-		mutex_lock(&file->file_mutex);
+		ucma_free_ctx(ctx);
+		mutex_lock(&file->mut);
 	}
-	mutex_unlock(&file->file_mutex);
+	mutex_unlock(&file->mut);
 	kfree(file);
 	return 0;
 }
@@ -889,7 +1073,7 @@ err:
 
 static void __exit ucma_cleanup(void)
 {
-	class_device_remove_file(ucma_misc.class, 
+	class_device_remove_file(ucma_misc.class,
 				 &class_device_attr_abi_version);
 	misc_deregister(&ucma_misc);
 	idr_destroy(&ctx_idr);
--- linux-2.6.18.noarch/drivers/infiniband/core/ucma_ib.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/ucma_ib.c
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2006 Intel Corporation.  All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- *
- */
-
-#include <rdma/ib_addr.h>
-#include <rdma/ib_marshall.h>
-#include <rdma/rdma_cm_ib.h>
-#include <rdma/rdma_user_cm.h>
-
-#include "ucma_ib.h"
-
-static int ucma_get_req_opt(struct rdma_cm_id *id, void __user *opt,
-			    int *optlen)
-{
-	struct ib_cm_req_opt req_opt;
-	int ret = 0;
-
-	if (!opt)
-		goto out;
-
- 	if (*optlen < sizeof req_opt) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = rdma_get_ib_req_info(id, &req_opt);
-	if (!ret)
-		if (copy_to_user(opt, &req_opt, sizeof req_opt))
-			ret = -EFAULT;
-out:
-	*optlen = sizeof req_opt;
-	return ret;	
-}
-
-int ucma_get_ib_option(struct rdma_cm_id *id, int optname,
-		       void *optval, int *optlen)
-{
-	switch (optname) {
-	case IB_PATH_OPTIONS:
-		return -EINVAL;
-	case IB_CM_REQ_OPTIONS:
-		return ucma_get_req_opt(id, optval, optlen);
-	default:
-		return -EINVAL;
-	}
-}
-
-static int ucma_set_req_opt(struct rdma_cm_id *id, void __user *opt, int optlen)
-{
-	struct ib_cm_req_opt req_opt;
-
-	if (optlen != sizeof req_opt)
-		return -EINVAL;
-
-	if (copy_from_user(&req_opt, opt, sizeof req_opt))
-		return -EFAULT;
-
-	return rdma_set_ib_req_info(id, &req_opt);
-}
-
-int ucma_set_ib_option(struct rdma_cm_id *id, int optname,
-		       void *optval, int optlen)
-{
-	switch (optname) {
-	case IB_PATH_OPTIONS:
-		return -EINVAL;
-	case IB_CM_REQ_OPTIONS:
-		return ucma_set_req_opt(id, optval, optlen);
-	default:
-		return -EINVAL;
-	}
-}
--- linux-2.6.18.noarch/drivers/infiniband/core/ucma_ib.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/ucma_ib.h
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2006 Intel Corporation.  All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- *
- */
-
-#if !defined(UCMA_IB_H)
-#define UCMA_IB_H
-
-#include <rdma/rdma_cm.h>
-
-int ucma_get_ib_option(struct rdma_cm_id *id, int optname,
-		       void *optval, int *optlen);
-
-int ucma_set_ib_option(struct rdma_cm_id *id, int optname,
-		       void *optval, int optlen);
-
-#endif /* UCMA_IB_H */
--- linux-2.6.18.noarch/drivers/infiniband/core/ucm.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/ucm.c
@@ -161,12 +161,14 @@ static void ib_ucm_cleanup_events(struct
 				    struct ib_ucm_event, ctx_list);
 		list_del(&uevent->file_list);
 		list_del(&uevent->ctx_list);
+		mutex_unlock(&ctx->file->file_mutex);
 
 		/* clear incoming connections. */
 		if (ib_ucm_new_cm_id(uevent->resp.event))
 			ib_destroy_cm_id(uevent->cm_id);
 
 		kfree(uevent);
+		mutex_lock(&ctx->file->file_mutex);
 	}
 	mutex_unlock(&ctx->file->file_mutex);
 }
@@ -309,9 +311,9 @@ static int ib_ucm_event_process(struct i
 		info	      = evt->param.apr_rcvd.apr_info;
 		break;
 	case IB_CM_SIDR_REQ_RECEIVED:
-		uvt->resp.u.sidr_req_resp.pkey = 
+		uvt->resp.u.sidr_req_resp.pkey =
 					evt->param.sidr_req_rcvd.pkey;
-		uvt->resp.u.sidr_req_resp.port = 
+		uvt->resp.u.sidr_req_resp.port =
 					evt->param.sidr_req_rcvd.port;
 		uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
 		break;
@@ -328,20 +330,18 @@ static int ib_ucm_event_process(struct i
 	}
 
 	if (uvt->data_len) {
-		uvt->data = kmalloc(uvt->data_len, GFP_KERNEL);
+		uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL);
 		if (!uvt->data)
 			goto err1;
 
-		memcpy(uvt->data, evt->private_data, uvt->data_len);
 		uvt->resp.present |= IB_UCM_PRES_DATA;
 	}
 
 	if (uvt->info_len) {
-		uvt->info = kmalloc(uvt->info_len, GFP_KERNEL);
+		uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL);
 		if (!uvt->info)
 			goto err2;
 
-		memcpy(uvt->info, info, uvt->info_len);
 		uvt->resp.present |= IB_UCM_PRES_INFO;
 	}
 	return 0;
@@ -685,11 +685,11 @@ out:
 	return result;
 }
 
-static ssize_t ib_ucm_establish(struct ib_ucm_file *file,
-				const char __user *inbuf,
-				int in_len, int out_len)
+static ssize_t ib_ucm_notify(struct ib_ucm_file *file,
+			     const char __user *inbuf,
+			     int in_len, int out_len)
 {
-	struct ib_ucm_establish cmd;
+	struct ib_ucm_notify cmd;
 	struct ib_ucm_context *ctx;
 	int result;
 
@@ -700,7 +700,7 @@ static ssize_t ib_ucm_establish(struct i
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	result = ib_cm_establish(ctx->cm_id);
+	result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
 	ib_ucm_ctx_put(ctx);
 	return result;
 }
@@ -1107,7 +1107,7 @@ static ssize_t (*ucm_cmd_table[])(struct
 	[IB_USER_CM_CMD_DESTROY_ID]    = ib_ucm_destroy_id,
 	[IB_USER_CM_CMD_ATTR_ID]       = ib_ucm_attr_id,
 	[IB_USER_CM_CMD_LISTEN]        = ib_ucm_listen,
-	[IB_USER_CM_CMD_ESTABLISH]     = ib_ucm_establish,
+	[IB_USER_CM_CMD_NOTIFY]        = ib_ucm_notify,
 	[IB_USER_CM_CMD_SEND_REQ]      = ib_ucm_send_req,
 	[IB_USER_CM_CMD_SEND_REP]      = ib_ucm_send_rep,
 	[IB_USER_CM_CMD_SEND_RTU]      = ib_ucm_send_rtu,
@@ -1237,7 +1237,7 @@ static struct class ucm_class = {
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
 	struct ib_ucm_device *dev;
-	
+
 	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
 	return sprintf(buf, "%s\n", dev->ib_dev->name);
 }
@@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_dev
 {
 	struct ib_ucm_device *ucm_dev;
 
-	if (!device->alloc_ucontext)
+	if (!device->alloc_ucontext ||
+	    rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 		return;
 
 	ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
--- linux-2.6.18.noarch/drivers/infiniband/core/user_mad.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/user_mad.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_de
 	struct ib_umad_device *umad_dev;
 	int s, e, i;
 
-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
--- linux-2.6.18.noarch/drivers/infiniband/core/uverbs_cmd.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/uverbs_cmd.c
@@ -37,7 +37,6 @@
 
 #include <linux/file.h>
 #include <linux/fs.h>
-#include <linux/lockdep.h>
 
 #include <asm/uaccess.h>
 
@@ -156,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj
 }
 
 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
-					struct ib_ucontext *context)
+					struct ib_ucontext *context, int nested)
 {
 	struct ib_uobject *uobj;
 
@@ -164,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(
 	if (!uobj)
 		return NULL;
 
-	down_read(&uobj->mutex);
+	if (nested)
+		down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
+	else
+		down_read(&uobj->mutex);
 	if (!uobj->live) {
 		put_uobj_read(uobj);
 		return NULL;
@@ -191,17 +193,18 @@ static struct ib_uobject *idr_write_uobj
 	return uobj;
 }
 
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context)
+static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
+			  int nested)
 {
 	struct ib_uobject *uobj;
 
-	uobj = idr_read_uobj(idr, id, context);
+	uobj = idr_read_uobj(idr, id, context, nested);
 	return uobj ? uobj->object : NULL;
 }
 
 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context);
+	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
 }
 
 static void put_pd_read(struct ib_pd *pd)
@@ -209,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd
 	put_uobj_read(pd->uobject);
 }
 
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context)
+static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
 {
-	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context);
+	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
 }
 
 static void put_cq_read(struct ib_cq *cq)
@@ -221,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq
 
 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context);
+	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
 }
 
 static void put_ah_read(struct ib_ah *ah)
@@ -231,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah
 
 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context);
+	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
 }
 
 static void put_qp_read(struct ib_qp *qp)
@@ -241,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp
 
 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context);
+	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
 }
 
 static void put_srq_read(struct ib_srq *srq)
@@ -838,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uv
 err_copy:
 	idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
 
-
 err_free:
 	ib_destroy_cq(cq);
 
@@ -868,7 +870,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uv
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
 	if (!cq)
 		return -EINVAL;
 
@@ -876,11 +878,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uv
 	if (ret)
 		goto out;
 
-	memset(&resp, 0, sizeof resp);
 	resp.cqe = cq->cqe;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp))
+			 &resp, sizeof resp.cqe))
 		ret = -EFAULT;
 
 out:
@@ -895,7 +896,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uver
 {
 	struct ib_uverbs_poll_cq       cmd;
 	struct ib_uverbs_poll_cq_resp *resp;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 	struct ib_wc                  *wc;
 	int                            ret = 0;
@@ -916,16 +916,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uver
 		goto out_wc;
 	}
 
-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj) {
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+	if (!cq) {
 		ret = -EINVAL;
 		goto out;
 	}
-	cq = uobj->object;
 
 	resp->count = ib_poll_cq(cq, cmd.ne, wc);
 
-	put_uobj_read(uobj);
+	put_cq_read(cq);
 
 	for (i = 0; i < resp->count; i++) {
 		resp->wc[i].wr_id 	   = wc[i].wr_id;
@@ -934,7 +933,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uver
 		resp->wc[i].vendor_err 	   = wc[i].vendor_err;
 		resp->wc[i].byte_len 	   = wc[i].byte_len;
 		resp->wc[i].imm_data 	   = (__u32 __force) wc[i].imm_data;
-		resp->wc[i].qp_num 	   = wc[i].qp_num;
+		resp->wc[i].qp_num 	   = wc[i].qp->qp_num;
 		resp->wc[i].src_qp 	   = wc[i].src_qp;
 		resp->wc[i].wc_flags 	   = wc[i].wc_flags;
 		resp->wc[i].pkey_index 	   = wc[i].pkey_index;
@@ -960,21 +959,19 @@ ssize_t ib_uverbs_req_notify_cq(struct i
 				int out_len)
 {
 	struct ib_uverbs_req_notify_cq cmd;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj)
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+	if (!cq)
 		return -EINVAL;
-	cq = uobj->object;
 
 	ib_req_notify_cq(cq, cmd.solicited_only ?
 			 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
 
-	put_uobj_read(uobj);
+	put_cq_read(cq);
 
 	return in_len;
 }
@@ -1065,9 +1062,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
 
 	srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext);
+	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
 	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
-		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext);
+		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
 
 	if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
 		ret = -EINVAL;
@@ -1217,7 +1214,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uve
 	resp.qp_access_flags        = attr->qp_access_flags;
 	resp.pkey_index             = attr->pkey_index;
 	resp.alt_pkey_index         = attr->alt_pkey_index;
-	resp.en_sqd_async_notify    = attr->en_sqd_async_notify;
+	resp.sq_draining            = attr->sq_draining;
 	resp.max_rd_atomic          = attr->max_rd_atomic;
 	resp.max_dest_rd_atomic     = attr->max_dest_rd_atomic;
 	resp.min_rnr_timer          = attr->min_rnr_timer;
@@ -1275,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uv
 			    int out_len)
 {
 	struct ib_uverbs_modify_qp cmd;
+	struct ib_udata            udata;
 	struct ib_qp              *qp;
 	struct ib_qp_attr         *attr;
 	int                        ret;
@@ -1282,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uv
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
 	if (!attr)
 		return -ENOMEM;
@@ -1338,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uv
 	attr->alt_ah_attr.ah_flags 	    = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
 	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;
 
-	ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+	ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
 
 	put_qp_read(qp);
 
@@ -1675,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uv
 				break;
 		}
 
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
@@ -1725,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct i
 				break;
 		}
 
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
@@ -2056,6 +2055,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_u
 			     int out_len)
 {
 	struct ib_uverbs_modify_srq cmd;
+	struct ib_udata             udata;
 	struct ib_srq              *srq;
 	struct ib_srq_attr          attr;
 	int                         ret;
@@ -2063,6 +2063,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_u
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	srq = idr_read_srq(cmd.srq_handle, file->ucontext);
 	if (!srq)
 		return -EINVAL;
@@ -2070,7 +2073,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_u
 	attr.max_wr    = cmd.max_wr;
 	attr.srq_limit = cmd.srq_limit;
 
-	ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
+	ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
 
 	put_srq_read(srq);
 
--- linux-2.6.18.noarch/drivers/infiniband/core/uverbs_main.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/uverbs_main.c
@@ -64,6 +64,12 @@ enum {
 
 #define IB_UVERBS_BASE_DEV	MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
 
+#ifdef __ia64__
+/* workaround for a bug in hp chipset that would cause kernel
+   panic when dma resources are exhaused */
+int dma_map_sg_hp_wa = 0;
+#endif
+
 static struct class *uverbs_class;
 
 DEFINE_SPINLOCK(ib_uverbs_idr_lock);
@@ -842,6 +848,11 @@ static int __init ib_uverbs_init(void)
 
 	spin_lock_init(&map_lock);
 
+#ifdef __ia64__
+	if (ia64_platform_is("hpzx1"))
+		dma_map_sg_hp_wa = 1;
+#endif
+
 	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
 				     "infiniband_verbs");
 	if (ret) {
--- linux-2.6.18.noarch/drivers/infiniband/core/uverbs_marshall.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/uverbs_marshall.c
@@ -32,8 +32,8 @@
 
 #include <rdma/ib_marshall.h>
 
-static void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
-				    struct ib_ah_attr *src)
+void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
+			     struct ib_ah_attr *src)
 {
 	memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
 	dst->grh.flow_label        = src->grh.flow_label;
@@ -47,6 +47,7 @@ static void ib_copy_ah_attr_to_user(stru
 	dst->is_global             = src->ah_flags & IB_AH_GRH ? 1 : 0;
 	dst->port_num 	    	   = src->port_num;
 }
+EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
 
 void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
 			     struct ib_qp_attr *src)
--- linux-2.6.18.noarch/drivers/infiniband/core/uverbs_mem.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/uverbs_mem.c
@@ -39,6 +39,58 @@
 
 #include "uverbs.h"
 
+#ifdef __ia64__
+extern int dma_map_sg_hp_wa;
+
+static int dma_map_sg_ia64(struct ib_device *ibdev,
+			   struct scatterlist *sg,
+			   int nents,
+			   enum dma_data_direction dir)
+{
+	int i, rc, j, lents = 0;
+	struct device *dev;
+
+	if (!dma_map_sg_hp_wa)
+		return ib_dma_map_sg(ibdev, sg, nents, dir);
+
+	dev = ibdev->dma_device;
+	for (i = 0; i < nents; ++i) {
+		rc = dma_map_sg(dev, sg + i, 1, dir);
+		if (rc <= 0) {
+			for (j = 0; j < i; ++j)
+				dma_unmap_sg(dev, sg + j, 1, dir);
+
+			return 0;
+		}
+		lents += rc;
+	}
+
+	return lents;
+}
+
+static void dma_unmap_sg_ia64(struct ib_device *ibdev,
+			      struct scatterlist *sg,
+			      int nents,
+			      enum dma_data_direction dir)
+{
+	int i;
+	struct device *dev;
+
+	if (!dma_map_sg_hp_wa)
+		return ib_dma_unmap_sg(ibdev, sg, nents, dir);
+
+	dev = ibdev->dma_device;
+	for (i = 0; i < nents; ++i)
+		dma_unmap_sg(dev, sg + i, 1, dir);
+}
+
+
+
+#define ib_dma_map_sg(dev, sg, nents, dir) dma_map_sg_ia64(dev, sg, nents, dir)
+#define ib_dma_unmap_sg(dev, sg, nents, dir) dma_unmap_sg_ia64(dev, sg, nents, dir)
+
+#endif
+
 struct ib_umem_account_work {
 	struct work_struct work;
 	struct mm_struct  *mm;
@@ -52,8 +104,8 @@ static void __ib_umem_release(struct ib_
 	int i;
 
 	list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
-		dma_unmap_sg(dev->dma_device, chunk->page_list,
-			     chunk->nents, DMA_BIDIRECTIONAL);
+		ib_dma_unmap_sg(dev, chunk->page_list,
+				chunk->nents, DMA_BIDIRECTIONAL);
 		for (i = 0; i < chunk->nents; ++i) {
 			if (umem->writable && dirty)
 				set_page_dirty_lock(chunk->page_list[i].page);
@@ -136,10 +188,10 @@ int ib_umem_get(struct ib_device *dev, s
 				chunk->page_list[i].length = PAGE_SIZE;
 			}
 
-			chunk->nmap = dma_map_sg(dev->dma_device,
-						 &chunk->page_list[0],
-						 chunk->nents,
-						 DMA_BIDIRECTIONAL);
+			chunk->nmap = ib_dma_map_sg(dev,
+						    &chunk->page_list[0],
+						    chunk->nents,
+						    DMA_BIDIRECTIONAL);
 			if (chunk->nmap <= 0) {
 				for (i = 0; i < chunk->nents; ++i)
 					put_page(chunk->page_list[i].page);
--- linux-2.6.18.noarch/drivers/infiniband/core/verbs.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/core/verbs.c
@@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult)
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+	switch (node_type) {
+	case RDMA_NODE_IB_CA:
+	case RDMA_NODE_IB_SWITCH:
+	case RDMA_NODE_IB_ROUTER:
+		return RDMA_TRANSPORT_IB;
+	case RDMA_NODE_RNIC:
+		return RDMA_TRANSPORT_IWARP;
+	default:
+		BUG();
+		return 0;
+	}
+}
+EXPORT_SYMBOL(rdma_node_get_transport);
+
 /* Protection domains */
 
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
@@ -231,7 +248,7 @@ int ib_modify_srq(struct ib_srq *srq,
 		  struct ib_srq_attr *srq_attr,
 		  enum ib_srq_attr_mask srq_attr_mask)
 {
-	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask);
+	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_srq);
 
@@ -547,7 +564,7 @@ int ib_modify_qp(struct ib_qp *qp,
 		 struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask)
 {
-	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask);
+	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_ae.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_status.h"
+#include "c2_ae.h"
+
+static int c2_convert_cm_status(u32 c2_status)
+{
+	switch (c2_status) {
+	case C2_CONN_STATUS_SUCCESS:
+		return 0;
+	case C2_CONN_STATUS_REJECTED:
+		return -ENETRESET;
+	case C2_CONN_STATUS_REFUSED:
+		return -ECONNREFUSED;
+	case C2_CONN_STATUS_TIMEDOUT:
+		return -ETIMEDOUT;
+	case C2_CONN_STATUS_NETUNREACH:
+		return -ENETUNREACH;
+	case C2_CONN_STATUS_HOSTUNREACH:
+		return -EHOSTUNREACH;
+	case C2_CONN_STATUS_INVALID_RNIC:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP_STATE:
+		return -EINVAL;
+	case C2_CONN_STATUS_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	default:
+		printk(KERN_ERR PFX
+		       "%s - Unable to convert CM status: %d\n",
+		       __FUNCTION__, c2_status);
+		return -EIO;
+	}
+}
+
+static const char* to_event_str(int event)
+{
+	static const char* event_str[] = {
+		"CCAE_REMOTE_SHUTDOWN",
+		"CCAE_ACTIVE_CONNECT_RESULTS",
+		"CCAE_CONNECTION_REQUEST",
+		"CCAE_LLP_CLOSE_COMPLETE",
+		"CCAE_TERMINATE_MESSAGE_RECEIVED",
+		"CCAE_LLP_CONNECTION_RESET",
+		"CCAE_LLP_CONNECTION_LOST",
+		"CCAE_LLP_SEGMENT_SIZE_INVALID",
+		"CCAE_LLP_INVALID_CRC",
+		"CCAE_LLP_BAD_FPDU",
+		"CCAE_INVALID_DDP_VERSION",
+		"CCAE_INVALID_RDMA_VERSION",
+		"CCAE_UNEXPECTED_OPCODE",
+		"CCAE_INVALID_DDP_QUEUE_NUMBER",
+		"CCAE_RDMA_READ_NOT_ENABLED",
+		"CCAE_RDMA_WRITE_NOT_ENABLED",
+		"CCAE_RDMA_READ_TOO_SMALL",
+		"CCAE_NO_L_BIT",
+		"CCAE_TAGGED_INVALID_STAG",
+		"CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
+		"CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_TAGGED_INVALID_PD",
+		"CCAE_WRAP_ERROR",
+		"CCAE_BAD_CLOSE",
+		"CCAE_BAD_LLP_CLOSE",
+		"CCAE_INVALID_MSN_RANGE",
+		"CCAE_INVALID_MSN_GAP",
+		"CCAE_IRRQ_OVERFLOW",
+		"CCAE_IRRQ_MSN_GAP",
+		"CCAE_IRRQ_MSN_RANGE",
+		"CCAE_IRRQ_INVALID_STAG",
+		"CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
+		"CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_IRRQ_INVALID_PD",
+		"CCAE_IRRQ_WRAP_ERROR",
+		"CCAE_CQ_SQ_COMPLETION_OVERFLOW",
+		"CCAE_CQ_RQ_COMPLETION_ERROR",
+		"CCAE_QP_SRQ_WQE_ERROR",
+		"CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
+		"CCAE_CQ_OVERFLOW",
+		"CCAE_CQ_OPERATION_ERROR",
+		"CCAE_SRQ_LIMIT_REACHED",
+		"CCAE_QP_RQ_LIMIT_REACHED",
+		"CCAE_SRQ_CATASTROPHIC_ERROR",
+		"CCAE_RNIC_CATASTROPHIC_ERROR"
+	};
+
+	if (event < CCAE_REMOTE_SHUTDOWN ||
+	    event > CCAE_RNIC_CATASTROPHIC_ERROR)
+		return "<invalid event>";
+
+	event -= CCAE_REMOTE_SHUTDOWN;
+	return event_str[event];
+}
+
+static const char *to_qp_state_str(int state)
+{
+	switch (state) {
+	case C2_QP_STATE_IDLE:
+		return "C2_QP_STATE_IDLE";
+	case C2_QP_STATE_CONNECTING:
+		return "C2_QP_STATE_CONNECTING";
+	case C2_QP_STATE_RTS:
+		return "C2_QP_STATE_RTS";
+	case C2_QP_STATE_CLOSING:
+		return "C2_QP_STATE_CLOSING";
+	case C2_QP_STATE_TERMINATE:
+		return "C2_QP_STATE_TERMINATE";
+	case C2_QP_STATE_ERROR:
+		return "C2_QP_STATE_ERROR";
+	default:
+		return "<invalid QP state>";
+	};
+}
+
+void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_mq *mq = c2dev->qptr_array[mq_index];
+	union c2wr *wr;
+	void *resource_user_context;
+	struct iw_cm_event cm_event;
+	struct ib_event ib_event;
+	enum c2_resource_indicator resource_indicator;
+	enum c2_event_id event_id;
+	unsigned long flags;
+	int status;
+
+	/*
+	 * retreive the message
+	 */
+	wr = c2_mq_consume(mq);
+	if (!wr)
+		return;
+
+	memset(&ib_event, 0, sizeof(ib_event));
+	memset(&cm_event, 0, sizeof(cm_event));
+
+	event_id = c2_wr_get_id(wr);
+	resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
+	resource_user_context =
+	    (void *) (unsigned long) wr->ae.ae_generic.user_context;
+
+	status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
+
+	pr_debug("event received c2_dev=%p, event_id=%d, "
+		"resource_indicator=%d, user_context=%p, status = %d\n",
+		c2dev, event_id, resource_indicator, resource_user_context,
+		status);
+
+	switch (resource_indicator) {
+	case C2_RES_IND_QP:{
+
+		struct c2_qp *qp = (struct c2_qp *)resource_user_context;
+		struct iw_cm_id *cm_id = qp->cm_id;
+		struct c2wr_ae_active_connect_results *res;
+
+		if (!cm_id) {
+			pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
+				qp);
+			goto ignore_it;
+		}
+		pr_debug("%s: event = %s, user_context=%llx, "
+			"resource_type=%x, "
+			"resource=%x, qp_state=%s\n",
+			__FUNCTION__,
+			to_event_str(event_id),
+			(unsigned long long) be64_to_cpu(wr->ae.ae_generic.user_context),
+			be32_to_cpu(wr->ae.ae_generic.resource_type),
+			be32_to_cpu(wr->ae.ae_generic.resource),
+			to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
+
+		c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
+
+		switch (event_id) {
+		case CCAE_ACTIVE_CONNECT_RESULTS:
+			res = &wr->ae.ae_active_connect_results;
+			cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+			cm_event.local_addr.sin_addr.s_addr = res->laddr;
+			cm_event.remote_addr.sin_addr.s_addr = res->raddr;
+			cm_event.local_addr.sin_port = res->lport;
+			cm_event.remote_addr.sin_port =	res->rport;
+			if (status == 0) {
+				cm_event.private_data_len =
+					be32_to_cpu(res->private_data_length);
+				cm_event.private_data = res->private_data;
+			} else {
+				spin_lock_irqsave(&qp->lock, flags);
+				if (qp->cm_id) {
+					qp->cm_id->rem_ref(qp->cm_id);
+					qp->cm_id = NULL;
+				}
+				spin_unlock_irqrestore(&qp->lock, flags);
+				cm_event.private_data_len = 0;
+				cm_event.private_data = NULL;
+			}
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		case CCAE_TERMINATE_MESSAGE_RECEIVED:
+		case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
+			ib_event.device = &c2dev->ibdev;
+			ib_event.element.qp = &qp->ibqp;
+			ib_event.event = IB_EVENT_QP_REQ_ERR;
+
+			if (qp->ibqp.event_handler)
+				qp->ibqp.event_handler(&ib_event,
+						       qp->ibqp.
+						       qp_context);
+			break;
+		case CCAE_BAD_CLOSE:
+		case CCAE_LLP_CLOSE_COMPLETE:
+		case CCAE_LLP_CONNECTION_RESET:
+		case CCAE_LLP_CONNECTION_LOST:
+			BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
+
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id) {
+				qp->cm_id->rem_ref(qp->cm_id);
+				qp->cm_id = NULL;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+			cm_event.event = IW_CM_EVENT_CLOSE;
+			cm_event.status = 0;
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		default:
+			BUG_ON(1);
+			pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
+				"CM_ID=%p\n",
+				__FUNCTION__, __LINE__,
+				event_id, qp, cm_id);
+			break;
+		}
+		break;
+	}
+
+	case C2_RES_IND_EP:{
+
+		struct c2wr_ae_connection_request *req =
+			&wr->ae.ae_connection_request;
+		struct iw_cm_id *cm_id =
+			(struct iw_cm_id *)resource_user_context;
+
+		pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
+		if (event_id != CCAE_CONNECTION_REQUEST) {
+			pr_debug("%s: Invalid event_id: %d\n",
+				__FUNCTION__, event_id);
+			break;
+		}
+		cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
+		cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
+		cm_event.local_addr.sin_addr.s_addr = req->laddr;
+		cm_event.remote_addr.sin_addr.s_addr = req->raddr;
+		cm_event.local_addr.sin_port = req->lport;
+		cm_event.remote_addr.sin_port = req->rport;
+		cm_event.private_data_len =
+			be32_to_cpu(req->private_data_length);
+		cm_event.private_data = req->private_data;
+
+		if (cm_id->event_handler)
+			cm_id->event_handler(cm_id, &cm_event);
+		break;
+	}
+
+	case C2_RES_IND_CQ:{
+		struct c2_cq *cq =
+		    (struct c2_cq *) resource_user_context;
+
+		pr_debug("IB_EVENT_CQ_ERR\n");
+		ib_event.device = &c2dev->ibdev;
+		ib_event.element.cq = &cq->ibcq;
+		ib_event.event = IB_EVENT_CQ_ERR;
+
+		if (cq->ibcq.event_handler)
+			cq->ibcq.event_handler(&ib_event,
+					       cq->ibcq.cq_context);
+	}
+
+	default:
+		printk("Bad resource indicator = %d\n",
+		       resource_indicator);
+		break;
+	}
+
+ ignore_it:
+	c2_mq_free(mq);
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_ae.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_ae.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_AE_H_
+#define _C2_AE_H_
+
+/*
+ * WARNING: If you change this file, also bump C2_IVN_BASE
+ * in common/include/clustercore/c2_ivn.h.
+ */
+
+/*
+ * Asynchronous Event Identifiers
+ *
+ * These start at 0x80 only so it's obvious from inspection that
+ * they are not work-request statuses.  This isn't critical.
+ *
+ * NOTE: these event id's must fit in eight bits.
+ */
+enum c2_event_id {
+	CCAE_REMOTE_SHUTDOWN = 0x80,
+	CCAE_ACTIVE_CONNECT_RESULTS,
+	CCAE_CONNECTION_REQUEST,
+	CCAE_LLP_CLOSE_COMPLETE,
+	CCAE_TERMINATE_MESSAGE_RECEIVED,
+	CCAE_LLP_CONNECTION_RESET,
+	CCAE_LLP_CONNECTION_LOST,
+	CCAE_LLP_SEGMENT_SIZE_INVALID,
+	CCAE_LLP_INVALID_CRC,
+	CCAE_LLP_BAD_FPDU,
+	CCAE_INVALID_DDP_VERSION,
+	CCAE_INVALID_RDMA_VERSION,
+	CCAE_UNEXPECTED_OPCODE,
+	CCAE_INVALID_DDP_QUEUE_NUMBER,
+	CCAE_RDMA_READ_NOT_ENABLED,
+	CCAE_RDMA_WRITE_NOT_ENABLED,
+	CCAE_RDMA_READ_TOO_SMALL,
+	CCAE_NO_L_BIT,
+	CCAE_TAGGED_INVALID_STAG,
+	CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
+	CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
+	CCAE_TAGGED_INVALID_PD,
+	CCAE_WRAP_ERROR,
+	CCAE_BAD_CLOSE,
+	CCAE_BAD_LLP_CLOSE,
+	CCAE_INVALID_MSN_RANGE,
+	CCAE_INVALID_MSN_GAP,
+	CCAE_IRRQ_OVERFLOW,
+	CCAE_IRRQ_MSN_GAP,
+	CCAE_IRRQ_MSN_RANGE,
+	CCAE_IRRQ_INVALID_STAG,
+	CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
+	CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
+	CCAE_IRRQ_INVALID_PD,
+	CCAE_IRRQ_WRAP_ERROR,
+	CCAE_CQ_SQ_COMPLETION_OVERFLOW,
+	CCAE_CQ_RQ_COMPLETION_ERROR,
+	CCAE_QP_SRQ_WQE_ERROR,
+	CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
+	CCAE_CQ_OVERFLOW,
+	CCAE_CQ_OPERATION_ERROR,
+	CCAE_SRQ_LIMIT_REACHED,
+	CCAE_QP_RQ_LIMIT_REACHED,
+	CCAE_SRQ_CATASTROPHIC_ERROR,
+	CCAE_RNIC_CATASTROPHIC_ERROR
+/* WARNING If you add more id's, make sure their values fit in eight bits. */
+};
+
+/*
+ * Resource Indicators and Identifiers
+ */
+enum c2_resource_indicator {
+	C2_RES_IND_QP = 1,
+	C2_RES_IND_EP,
+	C2_RES_IND_CQ,
+	C2_RES_IND_SRQ,
+};
+
+#endif /* _C2_AE_H_ */
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_alloc.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "c2.h"
+
+static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
+			       struct sp_chunk **head)
+{
+	int i;
+	struct sp_chunk *new_head;
+	dma_addr_t dma_addr;
+
+	new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE,
+				      &dma_addr, gfp_mask);
+	if (new_head == NULL)
+		return -ENOMEM;
+
+	new_head->dma_addr = dma_addr;
+	pci_unmap_addr_set(new_head, mapping, new_head->dma_addr);
+
+	new_head->next = NULL;
+	new_head->head = 0;
+
+	/* build list where each index is the next free slot */
+	for (i = 0;
+	     i < (PAGE_SIZE - sizeof(struct sp_chunk) -
+		  sizeof(u16)) / sizeof(u16) - 1;
+	     i++) {
+		new_head->shared_ptr[i] = i + 1;
+	}
+	/* terminate list */
+	new_head->shared_ptr[i] = 0xFFFF;
+
+	*head = new_head;
+	return 0;
+}
+
+int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+		      struct sp_chunk **root)
+{
+	return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
+}
+
+void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
+{
+	struct sp_chunk *next;
+
+	while (root) {
+		next = root->next;
+		dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root,
+				  pci_unmap_addr(root, mapping));
+		root = next;
+	}
+}
+
+u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+		   dma_addr_t *dma_addr, gfp_t gfp_mask)
+{
+	u16 mqsp;
+
+	while (head) {
+		mqsp = head->head;
+		if (mqsp != 0xFFFF) {
+			head->head = head->shared_ptr[mqsp];
+			break;
+		} else if (head->next == NULL) {
+			if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
+			    0) {
+				head = head->next;
+				mqsp = head->head;
+				head->head = head->shared_ptr[mqsp];
+				break;
+			} else
+				return NULL;
+		} else
+			head = head->next;
+	}
+	if (head) {
+		*dma_addr = head->dma_addr +
+			    ((unsigned long) &(head->shared_ptr[mqsp]) -
+			     (unsigned long) head);
+		pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
+			 &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
+		return &(head->shared_ptr[mqsp]);
+	}
+	return NULL;
+}
+
+void c2_free_mqsp(u16 * mqsp)
+{
+	struct sp_chunk *head;
+	u16 idx;
+
+	/* The chunk containing this ptr begins at the page boundary */
+	head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
+
+	/* Link head to new mqsp */
+	*mqsp = head->head;
+
+	/* Compute the shared_ptr index */
+	idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
+	idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
+
+	/* Point this index at the head */
+	head->shared_ptr[idx] = head->head;
+
+	/* Point head at this index */
+	head->head = idx;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2.c
@@ -0,0 +1,1256 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_provider.h"
+
+MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
+MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
+    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
+
+static int debug = -1;		/* defaults above */
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static int c2_up(struct net_device *netdev);
+static int c2_down(struct net_device *netdev);
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+static void c2_tx_interrupt(struct net_device *netdev);
+static void c2_rx_interrupt(struct net_device *netdev);
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void c2_tx_timeout(struct net_device *netdev);
+static int c2_change_mtu(struct net_device *netdev, int new_mtu);
+static void c2_reset(struct c2_port *c2_port);
+static struct net_device_stats *c2_get_stats(struct net_device *netdev);
+
+static struct pci_device_id c2_pci_table[] = {
+	{ PCI_DEVICE(0x18b8, 0xb001) },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, c2_pci_table);
+
+static void c2_print_macaddr(struct net_device *netdev)
+{
+	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
+		"IRQ %u\n", netdev->name,
+		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
+		netdev->irq);
+}
+
+static void c2_set_rxbufsize(struct c2_port *c2_port)
+{
+	struct net_device *netdev = c2_port->netdev;
+
+	if (netdev->mtu > RX_BUF_SIZE)
+		c2_port->rx_buf_size =
+		    netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
+		    NET_IP_ALIGN;
+	else
+		c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
+}
+
+/*
+ * Allocate TX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
+			    dma_addr_t base, void __iomem * mmio_txp_ring)
+{
+	struct c2_tx_desc *tx_desc;
+	struct c2_txp_desc __iomem *txp_desc;
+	struct c2_element *elem;
+	int i;
+
+	tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL);
+	if (!tx_ring->start)
+		return -ENOMEM;
+
+	elem = tx_ring->start;
+	tx_desc = vaddr;
+	txp_desc = mmio_txp_ring;
+	for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
+		tx_desc->len = 0;
+		tx_desc->status = 0;
+
+		/* Set TXP_HTXD_UNINIT */
+		__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+			     (void __iomem *) txp_desc + C2_TXP_ADDR);
+		__raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
+		__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+			     (void __iomem *) txp_desc + C2_TXP_FLAGS);
+
+		elem->skb = NULL;
+		elem->ht_desc = tx_desc;
+		elem->hw_desc = txp_desc;
+
+		if (i == tx_ring->count - 1) {
+			elem->next = tx_ring->start;
+			tx_desc->next_offset = base;
+		} else {
+			elem->next = elem + 1;
+			tx_desc->next_offset =
+			    base + (i + 1) * sizeof(*tx_desc);
+		}
+	}
+
+	tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
+
+	return 0;
+}
+
+/*
+ * Allocate RX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
+			    dma_addr_t base, void __iomem * mmio_rxp_ring)
+{
+	struct c2_rx_desc *rx_desc;
+	struct c2_rxp_desc __iomem *rxp_desc;
+	struct c2_element *elem;
+	int i;
+
+	rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL);
+	if (!rx_ring->start)
+		return -ENOMEM;
+
+	elem = rx_ring->start;
+	rx_desc = vaddr;
+	rxp_desc = mmio_rxp_ring;
+	for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
+		rx_desc->len = 0;
+		rx_desc->status = 0;
+
+		/* Set RXP_HRXD_UNINIT */
+		__raw_writew(cpu_to_be16(RXP_HRXD_OK),
+		       (void __iomem *) rxp_desc + C2_RXP_STATUS);
+		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
+		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
+		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+			     (void __iomem *) rxp_desc + C2_RXP_ADDR);
+		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+			     (void __iomem *) rxp_desc + C2_RXP_FLAGS);
+
+		elem->skb = NULL;
+		elem->ht_desc = rx_desc;
+		elem->hw_desc = rxp_desc;
+
+		if (i == rx_ring->count - 1) {
+			elem->next = rx_ring->start;
+			rx_desc->next_offset = base;
+		} else {
+			elem->next = elem + 1;
+			rx_desc->next_offset =
+			    base + (i + 1) * sizeof(*rx_desc);
+		}
+	}
+
+	rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
+
+	return 0;
+}
+
+/* Setup buffer for receiving */
+static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_rx_desc *rx_desc = elem->ht_desc;
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen;
+	struct c2_rxp_hdr *rxp_hdr;
+
+	skb = dev_alloc_skb(c2_port->rx_buf_size);
+	if (unlikely(!skb)) {
+		pr_debug("%s: out of memory for receive\n",
+			c2_port->netdev->name);
+		return -ENOMEM;
+	}
+
+	/* Zero out the rxp hdr in the sk_buff */
+	memset(skb->data, 0, sizeof(*rxp_hdr));
+
+	skb->dev = c2_port->netdev;
+
+	maplen = c2_port->rx_buf_size;
+	mapaddr =
+	    pci_map_single(c2dev->pcidev, skb->data, maplen,
+			   PCI_DMA_FROMDEVICE);
+
+	/* Set the sk_buff RXP_header to RXP_HRXD_READY */
+	rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+	rxp_hdr->flags = RXP_HRXD_READY;
+
+	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+	__raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
+		     elem->hw_desc + C2_RXP_LEN);
+	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+	elem->skb = skb;
+	elem->mapaddr = mapaddr;
+	elem->maplen = maplen;
+	rx_desc->len = maplen;
+
+	return 0;
+}
+
+/*
+ * Allocate buffers for the Rx ring
+ * For receive:  rx_ring.to_clean is next received frame
+ */
+static int c2_rx_fill(struct c2_port *c2_port)
+{
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	int ret = 0;
+
+	elem = rx_ring->start;
+	do {
+		if (c2_rx_alloc(c2_port, elem)) {
+			ret = 1;
+			break;
+		}
+	} while ((elem = elem->next) != rx_ring->start);
+
+	rx_ring->to_clean = rx_ring->start;
+	return ret;
+}
+
+/* Free all buffers in RX ring, assumes receiver stopped */
+static void c2_rx_clean(struct c2_port *c2_port)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	struct c2_rx_desc *rx_desc;
+
+	elem = rx_ring->start;
+	do {
+		rx_desc = elem->ht_desc;
+		rx_desc->len = 0;
+
+		__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+		__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+		__raw_writew(0, elem->hw_desc + C2_RXP_LEN);
+		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+			     elem->hw_desc + C2_RXP_ADDR);
+		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+			     elem->hw_desc + C2_RXP_FLAGS);
+
+		if (elem->skb) {
+			pci_unmap_single(c2dev->pcidev, elem->mapaddr,
+					 elem->maplen, PCI_DMA_FROMDEVICE);
+			dev_kfree_skb(elem->skb);
+			elem->skb = NULL;
+		}
+	} while ((elem = elem->next) != rx_ring->start);
+}
+
+static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
+{
+	struct c2_tx_desc *tx_desc = elem->ht_desc;
+
+	tx_desc->len = 0;
+
+	pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
+			 PCI_DMA_TODEVICE);
+
+	if (elem->skb) {
+		dev_kfree_skb_any(elem->skb);
+		elem->skb = NULL;
+	}
+
+	return 0;
+}
+
+/* Free all buffers in TX ring, assumes transmitter stopped */
+static void c2_tx_clean(struct c2_port *c2_port)
+{
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	struct c2_txp_desc txp_htxd;
+	int retry;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+	elem = tx_ring->start;
+
+	do {
+		retry = 0;
+		do {
+			txp_htxd.flags =
+			    readw(elem->hw_desc + C2_TXP_FLAGS);
+
+			if (txp_htxd.flags == TXP_HTXD_READY) {
+				retry = 1;
+				__raw_writew(0,
+					     elem->hw_desc + C2_TXP_LEN);
+				__raw_writeq(0,
+					     elem->hw_desc + C2_TXP_ADDR);
+				__raw_writew(cpu_to_be16(TXP_HTXD_DONE),
+					     elem->hw_desc + C2_TXP_FLAGS);
+				c2_port->netstats.tx_dropped++;
+				break;
+			} else {
+				__raw_writew(0,
+					     elem->hw_desc + C2_TXP_LEN);
+				__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+					     elem->hw_desc + C2_TXP_ADDR);
+				__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+					     elem->hw_desc + C2_TXP_FLAGS);
+			}
+
+			c2_tx_free(c2_port->c2dev, elem);
+
+		} while ((elem = elem->next) != tx_ring->start);
+	} while (retry);
+
+	c2_port->tx_avail = c2_port->tx_ring.count - 1;
+	c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
+
+	if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+		netif_wake_queue(c2_port->netdev);
+
+	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+}
+
+/*
+ * Process transmit descriptors marked 'DONE' by the firmware,
+ * freeing up their unneeded sk_buffs.
+ */
+static void c2_tx_interrupt(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	struct c2_txp_desc txp_htxd;
+
+	spin_lock(&c2_port->tx_lock);
+
+	for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
+	     elem = elem->next) {
+		txp_htxd.flags =
+		    be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS));
+
+		if (txp_htxd.flags != TXP_HTXD_DONE)
+			break;
+
+		if (netif_msg_tx_done(c2_port)) {
+			/* PCI reads are expensive in fast path */
+			txp_htxd.len =
+			    be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN));
+			pr_debug("%s: tx done slot %3Zu status 0x%x len "
+				"%5u bytes\n",
+				netdev->name, elem - tx_ring->start,
+				txp_htxd.flags, txp_htxd.len);
+		}
+
+		c2_tx_free(c2dev, elem);
+		++(c2_port->tx_avail);
+	}
+
+	tx_ring->to_clean = elem;
+
+	if (netif_queue_stopped(netdev)
+	    && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+		netif_wake_queue(netdev);
+
+	spin_unlock(&c2_port->tx_lock);
+}
+
+static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
+{
+	struct c2_rx_desc *rx_desc = elem->ht_desc;
+	struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+
+	if (rxp_hdr->status != RXP_HRXD_OK ||
+	    rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
+		pr_debug("BAD RXP_HRXD\n");
+		pr_debug("  rx_desc : %p\n", rx_desc);
+		pr_debug("    index : %Zu\n",
+			elem - c2_port->rx_ring.start);
+		pr_debug("    len   : %u\n", rx_desc->len);
+		pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
+			(void *) __pa((unsigned long) rxp_hdr));
+		pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
+		pr_debug("    status: 0x%x\n", rxp_hdr->status);
+		pr_debug("    len   : %u\n", rxp_hdr->len);
+		pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
+	}
+
+	/* Setup the skb for reuse since we're dropping this pkt */
+	elem->skb->tail = elem->skb->data = elem->skb->head;
+
+	/* Zero out the rxp hdr in the sk_buff */
+	memset(elem->skb->data, 0, sizeof(*rxp_hdr));
+
+	/* Write the descriptor to the adapter's rx ring */
+	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+	__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+	__raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
+		     elem->hw_desc + C2_RXP_LEN);
+	__raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+	pr_debug("packet dropped\n");
+	c2_port->netstats.rx_dropped++;
+}
+
+static void c2_rx_interrupt(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	struct c2_rx_desc *rx_desc;
+	struct c2_rxp_hdr *rxp_hdr;
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen, buflen;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2dev->lock, flags);
+
+	/* Begin where we left off */
+	rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
+
+	for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
+	     elem = elem->next) {
+		rx_desc = elem->ht_desc;
+		mapaddr = elem->mapaddr;
+		maplen = elem->maplen;
+		skb = elem->skb;
+		rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+
+		if (rxp_hdr->flags != RXP_HRXD_DONE)
+			break;
+		buflen = rxp_hdr->len;
+
+		/* Sanity check the RXP header */
+		if (rxp_hdr->status != RXP_HRXD_OK ||
+		    buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
+			c2_rx_error(c2_port, elem);
+			continue;
+		}
+
+		/*
+		 * Allocate and map a new skb for replenishing the host
+		 * RX desc
+		 */
+		if (c2_rx_alloc(c2_port, elem)) {
+			c2_rx_error(c2_port, elem);
+			continue;
+		}
+
+		/* Unmap the old skb */
+		pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
+				 PCI_DMA_FROMDEVICE);
+
+		prefetch(skb->data);
+
+		/*
+		 * Skip past the leading 8 bytes comprising of the
+		 * "struct c2_rxp_hdr", prepended by the adapter
+		 * to the usual Ethernet header ("struct ethhdr"),
+		 * to the start of the raw Ethernet packet.
+		 *
+		 * Fix up the various fields in the sk_buff before
+		 * passing it up to netif_rx(). The transfer size
+		 * (in bytes) specified by the adapter len field of
+		 * the "struct rxp_hdr_t" does NOT include the
+		 * "sizeof(struct c2_rxp_hdr)".
+		 */
+		skb->data += sizeof(*rxp_hdr);
+		skb->tail = skb->data + buflen;
+		skb->len = buflen;
+		skb->dev = netdev;
+		skb->protocol = eth_type_trans(skb, netdev);
+
+		netif_rx(skb);
+
+		netdev->last_rx = jiffies;
+		c2_port->netstats.rx_packets++;
+		c2_port->netstats.rx_bytes += buflen;
+	}
+
+	/* Save where we left off */
+	rx_ring->to_clean = elem;
+	c2dev->cur_rx = elem - rx_ring->start;
+	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+	spin_unlock_irqrestore(&c2dev->lock, flags);
+}
+
+/*
+ * Handle netisr0 TX & RX interrupts.
+ */
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned int netisr0, dmaisr;
+	int handled = 0;
+	struct c2_dev *c2dev = (struct c2_dev *) dev_id;
+
+	/* Process CCILNET interrupts */
+	netisr0 = readl(c2dev->regs + C2_NISR0);
+	if (netisr0) {
+
+		/*
+		 * There is an issue with the firmware that always
+		 * provides the status of RX for both TX & RX
+		 * interrupts.  So process both queues here.
+		 */
+		c2_rx_interrupt(c2dev->netdev);
+		c2_tx_interrupt(c2dev->netdev);
+
+		/* Clear the interrupt */
+		writel(netisr0, c2dev->regs + C2_NISR0);
+		handled++;
+	}
+
+	/* Process RNIC interrupts */
+	dmaisr = readl(c2dev->regs + C2_DISR);
+	if (dmaisr) {
+		writel(dmaisr, c2dev->regs + C2_DISR);
+		c2_rnic_interrupt(c2dev);
+		handled++;
+	}
+
+	if (handled) {
+		return IRQ_HANDLED;
+	} else {
+		return IRQ_NONE;
+	}
+}
+
+static int c2_up(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_element *elem;
+	struct c2_rxp_hdr *rxp_hdr;
+	struct in_device *in_dev;
+	size_t rx_size, tx_size;
+	int ret, i;
+	unsigned int netimr0;
+
+	if (netif_msg_ifup(c2_port))
+		pr_debug("%s: enabling interface\n", netdev->name);
+
+	/* Set the Rx buffer size based on MTU */
+	c2_set_rxbufsize(c2_port);
+
+	/* Allocate DMA'able memory for Tx/Rx host descriptor rings */
+	rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
+	tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
+
+	c2_port->mem_size = tx_size + rx_size;
+	c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
+					    &c2_port->dma);
+	if (c2_port->mem == NULL) {
+		pr_debug("Unable to allocate memory for "
+			"host descriptor rings\n");
+		return -ENOMEM;
+	}
+
+	memset(c2_port->mem, 0, c2_port->mem_size);
+
+	/* Create the Rx host descriptor ring */
+	if ((ret =
+	     c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
+			      c2dev->mmio_rxp_ring))) {
+		pr_debug("Unable to create RX ring\n");
+		goto bail0;
+	}
+
+	/* Allocate Rx buffers for the host descriptor ring */
+	if (c2_rx_fill(c2_port)) {
+		pr_debug("Unable to fill RX ring\n");
+		goto bail1;
+	}
+
+	/* Create the Tx host descriptor ring */
+	if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
+				    c2_port->dma + rx_size,
+				    c2dev->mmio_txp_ring))) {
+		pr_debug("Unable to create TX ring\n");
+		goto bail1;
+	}
+
+	/* Set the TX pointer to where we left off */
+	c2_port->tx_avail = c2_port->tx_ring.count - 1;
+	c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
+	    c2_port->tx_ring.start + c2dev->cur_tx;
+
+	/* missing: Initialize MAC */
+
+	BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
+
+	/* Reset the adapter, ensures the driver is in sync with the RXP */
+	c2_reset(c2_port);
+
+	/* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
+	for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
+	     i++, elem++) {
+		rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+		rxp_hdr->flags = 0;
+		__raw_writew(cpu_to_be16(RXP_HRXD_READY),
+			     elem->hw_desc + C2_RXP_FLAGS);
+	}
+
+	/* Enable network packets */
+	netif_start_queue(netdev);
+
+	/* Enable IRQ */
+	writel(0, c2dev->regs + C2_IDIS);
+	netimr0 = readl(c2dev->regs + C2_NIMR0);
+	netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
+	writel(netimr0, c2dev->regs + C2_NIMR0);
+
+	/* Tell the stack to ignore arp requests for ipaddrs bound to
+	 * other interfaces.  This is needed to prevent the host stack
+	 * from responding to arp requests to the ipaddr bound on the
+	 * rdma interface.
+	 */
+	in_dev = in_dev_get(netdev);
+	in_dev->cnf.arp_ignore = 1;
+	in_dev_put(in_dev);
+
+	return 0;
+
+      bail1:
+	c2_rx_clean(c2_port);
+	kfree(c2_port->rx_ring.start);
+
+      bail0:
+	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+			    c2_port->dma);
+
+	return ret;
+}
+
+static int c2_down(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+
+	if (netif_msg_ifdown(c2_port))
+		pr_debug("%s: disabling interface\n",
+			netdev->name);
+
+	/* Wait for all the queued packets to get sent */
+	c2_tx_interrupt(netdev);
+
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+
+	/* Disable IRQs by clearing the interrupt mask */
+	writel(1, c2dev->regs + C2_IDIS);
+	writel(0, c2dev->regs + C2_NIMR0);
+
+	/* missing: Stop transmitter */
+
+	/* missing: Stop receiver */
+
+	/* Reset the adapter, ensures the driver is in sync with the RXP */
+	c2_reset(c2_port);
+
+	/* missing: Turn off LEDs here */
+
+	/* Free all buffers in the host descriptor rings */
+	c2_tx_clean(c2_port);
+	c2_rx_clean(c2_port);
+
+	/* Free the host descriptor rings */
+	kfree(c2_port->rx_ring.start);
+	kfree(c2_port->tx_ring.start);
+	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+			    c2_port->dma);
+
+	return 0;
+}
+
+static void c2_reset(struct c2_port *c2_port)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	unsigned int cur_rx = c2dev->cur_rx;
+
+	/* Tell the hardware to quiesce */
+	C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
+
+	/*
+	 * The hardware will reset the C2_PCI_HRX_QUI bit once
+	 * the RXP is quiesced.  Wait 2 seconds for this.
+	 */
+	ssleep(2);
+
+	cur_rx = C2_GET_CUR_RX(c2dev);
+
+	if (cur_rx & C2_PCI_HRX_QUI)
+		pr_debug("c2_reset: failed to quiesce the hardware!\n");
+
+	cur_rx &= ~C2_PCI_HRX_QUI;
+
+	c2dev->cur_rx = cur_rx;
+
+	pr_debug("Current RX: %u\n", c2dev->cur_rx);
+}
+
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	dma_addr_t mapaddr;
+	u32 maplen;
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+	if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
+		netif_stop_queue(netdev);
+		spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+		pr_debug("%s: Tx ring full when queue awake!\n",
+			netdev->name);
+		return NETDEV_TX_BUSY;
+	}
+
+	maplen = skb_headlen(skb);
+	mapaddr =
+	    pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
+
+	elem = tx_ring->to_use;
+	elem->skb = skb;
+	elem->mapaddr = mapaddr;
+	elem->maplen = maplen;
+
+	/* Tell HW to xmit */
+	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR);
+	__raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN);
+	__raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS);
+
+	c2_port->netstats.tx_packets++;
+	c2_port->netstats.tx_bytes += maplen;
+
+	/* Loop thru additional data fragments and queue them */
+	if (skb_shinfo(skb)->nr_frags) {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			maplen = frag->size;
+			mapaddr =
+			    pci_map_page(c2dev->pcidev, frag->page,
+					 frag->page_offset, maplen,
+					 PCI_DMA_TODEVICE);
+
+			elem = elem->next;
+			elem->skb = NULL;
+			elem->mapaddr = mapaddr;
+			elem->maplen = maplen;
+
+			/* Tell HW to xmit */
+			__raw_writeq(cpu_to_be64(mapaddr),
+				     elem->hw_desc + C2_TXP_ADDR);
+			__raw_writew(cpu_to_be16(maplen),
+				     elem->hw_desc + C2_TXP_LEN);
+			__raw_writew(cpu_to_be16(TXP_HTXD_READY),
+				     elem->hw_desc + C2_TXP_FLAGS);
+
+			c2_port->netstats.tx_packets++;
+			c2_port->netstats.tx_bytes += maplen;
+		}
+	}
+
+	tx_ring->to_use = elem->next;
+	c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
+
+	if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
+		netif_stop_queue(netdev);
+		if (netif_msg_tx_queued(c2_port))
+			pr_debug("%s: transmit queue full\n",
+				netdev->name);
+	}
+
+	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+	netdev->trans_start = jiffies;
+
+	return NETDEV_TX_OK;
+}
+
+static struct net_device_stats *c2_get_stats(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+
+	return &c2_port->netstats;
+}
+
+static void c2_tx_timeout(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+
+	if (netif_msg_timer(c2_port))
+		pr_debug("%s: tx timeout\n", netdev->name);
+
+	c2_tx_clean(c2_port);
+}
+
+static int c2_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int ret = 0;
+
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev)) {
+		c2_down(netdev);
+
+		c2_up(netdev);
+	}
+
+	return ret;
+}
+
+/* Initialize network device */
+static struct net_device *c2_devinit(struct c2_dev *c2dev,
+				     void __iomem * mmio_addr)
+{
+	struct c2_port *c2_port = NULL;
+	struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
+
+	if (!netdev) {
+		pr_debug("c2_port etherdev alloc failed");
+		return NULL;
+	}
+
+	SET_MODULE_OWNER(netdev);
+	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+	netdev->open = c2_up;
+	netdev->stop = c2_down;
+	netdev->hard_start_xmit = c2_xmit_frame;
+	netdev->get_stats = c2_get_stats;
+	netdev->tx_timeout = c2_tx_timeout;
+	netdev->change_mtu = c2_change_mtu;
+	netdev->watchdog_timeo = C2_TX_TIMEOUT;
+	netdev->irq = c2dev->pcidev->irq;
+
+	c2_port = netdev_priv(netdev);
+	c2_port->netdev = netdev;
+	c2_port->c2dev = c2dev;
+	c2_port->msg_enable = netif_msg_init(debug, default_msg);
+	c2_port->tx_ring.count = C2_NUM_TX_DESC;
+	c2_port->rx_ring.count = C2_NUM_RX_DESC;
+
+	spin_lock_init(&c2_port->tx_lock);
+
+	/* Copy our 48-bit ethernet hardware address */
+	memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
+
+	/* Validate the MAC address */
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		pr_debug("Invalid MAC Address\n");
+		c2_print_macaddr(netdev);
+		free_netdev(netdev);
+		return NULL;
+	}
+
+	c2dev->netdev = netdev;
+
+	return netdev;
+}
+
+static int __devinit c2_probe(struct pci_dev *pcidev,
+			      const struct pci_device_id *ent)
+{
+	int ret = 0, i;
+	unsigned long reg0_start, reg0_flags, reg0_len;
+	unsigned long reg2_start, reg2_flags, reg2_len;
+	unsigned long reg4_start, reg4_flags, reg4_len;
+	unsigned kva_map_size;
+	struct net_device *netdev = NULL;
+	struct c2_dev *c2dev = NULL;
+	void __iomem *mmio_regs = NULL;
+
+	printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
+		DRV_VERSION);
+
+	/* Enable PCI device */
+	ret = pci_enable_device(pcidev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
+			pci_name(pcidev));
+		goto bail0;
+	}
+
+	reg0_start = pci_resource_start(pcidev, BAR_0);
+	reg0_len = pci_resource_len(pcidev, BAR_0);
+	reg0_flags = pci_resource_flags(pcidev, BAR_0);
+
+	reg2_start = pci_resource_start(pcidev, BAR_2);
+	reg2_len = pci_resource_len(pcidev, BAR_2);
+	reg2_flags = pci_resource_flags(pcidev, BAR_2);
+
+	reg4_start = pci_resource_start(pcidev, BAR_4);
+	reg4_len = pci_resource_len(pcidev, BAR_4);
+	reg4_flags = pci_resource_flags(pcidev, BAR_4);
+
+	pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
+	pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
+	pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
+
+	/* Make sure PCI base addr are MMIO */
+	if (!(reg0_flags & IORESOURCE_MEM) ||
+	    !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
+		printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Check for weird/broken PCI region reporting */
+	if ((reg0_len < C2_REG0_SIZE) ||
+	    (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
+		printk(KERN_ERR PFX "Invalid PCI region sizes\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	ret = pci_request_regions(pcidev, DRV_NAME);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: Unable to request regions\n",
+			pci_name(pcidev));
+		goto bail1;
+	}
+
+	if ((sizeof(dma_addr_t) > 4)) {
+		ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "64b DMA configuration failed\n");
+			goto bail2;
+		}
+	} else {
+		ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "32b DMA configuration failed\n");
+			goto bail2;
+		}
+	}
+
+	/* Enables bus-mastering on the device */
+	pci_set_master(pcidev);
+
+	/* Remap the adapter PCI registers in BAR4 */
+	mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+				    sizeof(struct c2_adapter_pci_regs));
+	if (mmio_regs == 0UL) {
+		printk(KERN_ERR PFX
+			"Unable to remap adapter PCI registers in BAR4\n");
+		ret = -EIO;
+		goto bail2;
+	}
+
+	/* Validate PCI regs magic */
+	for (i = 0; i < sizeof(c2_magic); i++) {
+		if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
+			printk(KERN_ERR PFX "Downlevel Firmware boot loader "
+				"[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
+			       "utility to update your boot loader\n",
+				i + 1, sizeof(c2_magic),
+				readb(mmio_regs + C2_REGS_MAGIC + i),
+				c2_magic[i]);
+			printk(KERN_ERR PFX "Adapter not claimed\n");
+			iounmap(mmio_regs);
+			ret = -EIO;
+			goto bail2;
+		}
+	}
+
+	/* Validate the adapter version */
+	if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
+		printk(KERN_ERR PFX "Version mismatch "
+			"[fw=%u, c2=%u], Adapter not claimed\n",
+			be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)),
+			C2_VERSION);
+		ret = -EINVAL;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	/* Validate the adapter IVN */
+	if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
+		printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
+		       "the OpenIB device support kit. "
+		       "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
+			be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)),
+			C2_IVN);
+		ret = -EINVAL;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	/* Allocate hardware structure */
+	c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
+	if (!c2dev) {
+		printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
+			pci_name(pcidev));
+		ret = -ENOMEM;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	memset(c2dev, 0, sizeof(*c2dev));
+	spin_lock_init(&c2dev->lock);
+	c2dev->pcidev = pcidev;
+	c2dev->cur_tx = 0;
+
+	/* Get the last RX index */
+	c2dev->cur_rx =
+	    (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) -
+	     0xffffc000) / sizeof(struct c2_rxp_desc);
+
+	/* Request an interrupt line for the driver */
+	ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
+			pci_name(pcidev), pcidev->irq);
+		iounmap(mmio_regs);
+		goto bail3;
+	}
+
+	/* Set driver specific data */
+	pci_set_drvdata(pcidev, c2dev);
+
+	/* Initialize network device */
+	if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+		iounmap(mmio_regs);
+		goto bail4;
+	}
+
+	/* Save off the actual size prior to unmapping mmio_regs */
+	kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE));
+
+	/* Unmap the adapter PCI registers in BAR4 */
+	iounmap(mmio_regs);
+
+	/* Register network device */
+	ret = register_netdev(netdev);
+	if (ret) {
+		printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
+			ret);
+		goto bail5;
+	}
+
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+
+	/* Remap the adapter HRXDQ PA space to kernel VA space */
+	c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
+					       C2_RXP_HRXDQ_SIZE);
+	if (c2dev->mmio_rxp_ring == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
+		ret = -EIO;
+		goto bail6;
+	}
+
+	/* Remap the adapter HTXDQ PA space to kernel VA space */
+	c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
+					       C2_TXP_HTXDQ_SIZE);
+	if (c2dev->mmio_txp_ring == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
+		ret = -EIO;
+		goto bail7;
+	}
+
+	/* Save off the current RX index in the last 4 bytes of the TXP Ring */
+	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
+	c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
+	if (c2dev->regs == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap BAR0\n");
+		ret = -EIO;
+		goto bail8;
+	}
+
+	/* Remap the PCI registers in adapter BAR4 to kernel VA space */
+	c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
+	c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+				     kva_map_size);
+	if (c2dev->kva == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap BAR4\n");
+		ret = -EIO;
+		goto bail9;
+	}
+
+	/* Print out the MAC address */
+	c2_print_macaddr(netdev);
+
+	ret = c2_rnic_init(c2dev);
+	if (ret) {
+		printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
+		goto bail10;
+	}
+
+	if (c2_register_device(c2dev))
+		goto bail10;
+
+	return 0;
+
+ bail10:
+	iounmap(c2dev->kva);
+
+ bail9:
+	iounmap(c2dev->regs);
+
+ bail8:
+	iounmap(c2dev->mmio_txp_ring);
+
+ bail7:
+	iounmap(c2dev->mmio_rxp_ring);
+
+ bail6:
+	unregister_netdev(netdev);
+
+ bail5:
+	free_netdev(netdev);
+
+ bail4:
+	free_irq(pcidev->irq, c2dev);
+
+ bail3:
+	ib_dealloc_device(&c2dev->ibdev);
+
+ bail2:
+	pci_release_regions(pcidev);
+
+ bail1:
+	pci_disable_device(pcidev);
+
+ bail0:
+	return ret;
+}
+
+static void __devexit c2_remove(struct pci_dev *pcidev)
+{
+	struct c2_dev *c2dev = pci_get_drvdata(pcidev);
+	struct net_device *netdev = c2dev->netdev;
+
+	/* Unregister with OpenIB */
+	c2_unregister_device(c2dev);
+
+	/* Clean up the RNIC resources */
+	c2_rnic_term(c2dev);
+
+	/* Remove network device from the kernel */
+	unregister_netdev(netdev);
+
+	/* Free network device */
+	free_netdev(netdev);
+
+	/* Free the interrupt line */
+	free_irq(pcidev->irq, c2dev);
+
+	/* missing: Turn LEDs off here */
+
+	/* Unmap adapter PA space */
+	iounmap(c2dev->kva);
+	iounmap(c2dev->regs);
+	iounmap(c2dev->mmio_txp_ring);
+	iounmap(c2dev->mmio_rxp_ring);
+
+	/* Free the hardware structure */
+	ib_dealloc_device(&c2dev->ibdev);
+
+	/* Release reserved PCI I/O and memory resources */
+	pci_release_regions(pcidev);
+
+	/* Disable PCI device */
+	pci_disable_device(pcidev);
+
+	/* Clear driver specific data */
+	pci_set_drvdata(pcidev, NULL);
+}
+
+static struct pci_driver c2_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = c2_pci_table,
+	.probe = c2_probe,
+	.remove = __devexit_p(c2_remove),
+};
+
+static int __init c2_init_module(void)
+{
+	return pci_register_driver(&c2_pci_driver);
+}
+
+static void __exit c2_exit_module(void)
+{
+	pci_unregister_driver(&c2_pci_driver);
+}
+
+module_init(c2_init_module);
+module_exit(c2_exit_module);
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_cm.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_vq.h"
+#include <rdma/iw_cm.h>
+
+int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct ib_qp *ibqp;
+	struct c2_qp *qp;
+	struct c2wr_qp_connect_req *wr;	/* variable size needs a malloc. */
+	struct c2_vq_req *vq_req;
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Associate QP <--> CM_ID */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	/*
+	 * only support the max private_data length
+	 */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail0;
+	}
+	/*
+	 * Set the rdma read limits
+	 */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Create and send a WR_QP_CONNECT...
+	 */
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	c2_wr_set_id(wr, CCWR_QP_CONNECT);
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->qp_handle = qp->adapter_handle;
+
+	wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
+	wr->remote_port = cm_id->remote_addr.sin_port;
+
+	/*
+	 * Move any private data from the callers's buf into
+	 * the WR.
+	 */
+	if (iw_param->private_data) {
+		wr->private_data_length =
+			cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0], iw_param->private_data,
+		       iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/*
+	 * Send WR to adapter.  NOTE: There is no synch reply from
+	 * the adapter.
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	vq_req_free(c2dev, vq_req);
+
+ bail1:
+	kfree(wr);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_create_req wr;
+	struct c2wr_ep_listen_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
+	wr.local_port = cm_id->local_addr.sin_port;
+	wr.backlog = cpu_to_be32(backlog);
+	wr.user_context = (u64) (unsigned long) cm_id;
+
+	/*
+	 * Reference the request struct.  Dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply =
+	    (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+	/*
+	 * Keep the adapter handle. Used in subsequent destroy
+	 */
+	cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
+
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+
+int c2_llp_service_destroy(struct iw_cm_id *cm_id)
+{
+
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_destroy_req wr;
+	struct c2wr_ep_listen_destroy_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct c2_qp *qp;
+	struct ib_qp *ibqp;
+	struct c2wr_cr_accept_req *wr;	/* variable length WR */
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_accept_rep *reply;	/* VQ Reply msg ptr. */
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Set the RDMA read limits */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/* Allocate verbs request. */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	vq_req->qp = qp;
+	vq_req->cm_id = cm_id;
+	vq_req->event = IW_CM_EVENT_ESTABLISHED;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	/* Build the WR */
+	c2_wr_set_id(wr, CCWR_CR_ACCEPT);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
+	wr->qp_handle = qp->adapter_handle;
+
+	/* Replace the cr_handle with the QP after accept */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	cm_id->provider_data = qp;
+
+	/* Validate private_data length */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail1;
+	}
+
+	if (iw_param->private_data) {
+		wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0],
+		       iw_param->private_data, iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/* Reference the request struct.  Dereferenced in the int handler. */
+	vq_req_get(c2dev, vq_req);
+
+	/* Send WR to adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	/* Wait for reply from adapter */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	/* Check that reply is present */
+	reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+	if (!err)
+		c2_set_qp_state(qp, C2_QP_STATE_RTS);
+ bail1:
+	kfree(wr);
+	vq_req_free(c2dev, vq_req);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_cr_reject_req wr;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_reject_rep *reply;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_CR_REJECT);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_cr_reject_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	err = c2_errno(reply);
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_cq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
+
+static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
+{
+	struct c2_cq *cq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2dev->lock, flags);
+	cq = c2dev->qptr_array[cqn];
+	if (!cq) {
+		spin_unlock_irqrestore(&c2dev->lock, flags);
+		return NULL;
+	}
+	atomic_inc(&cq->refcount);
+	spin_unlock_irqrestore(&c2dev->lock, flags);
+	return cq;
+}
+
+static void c2_cq_put(struct c2_cq *cq)
+{
+	if (atomic_dec_and_test(&cq->refcount))
+		wake_up(&cq->wait);
+}
+
+void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_cq *cq;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq) {
+		printk("discarding events on destroyed CQN=%d\n", mq_index);
+		return;
+	}
+
+	(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+	c2_cq_put(cq);
+}
+
+void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
+{
+	struct c2_cq *cq;
+	struct c2_mq *q;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq)
+		return;
+
+	spin_lock_irq(&cq->lock);
+	q = &cq->mq;
+	if (q && !c2_mq_empty(q)) {
+		u16 priv = q->priv;
+		struct c2wr_ce *msg;
+
+		while (priv != be16_to_cpu(*q->shared)) {
+			msg = (struct c2wr_ce *)
+				(q->msg_pool.host + priv * q->msg_size);
+			if (msg->qp_user_context == (u64) (unsigned long) qp) {
+				msg->qp_user_context = (u64) 0;
+			}
+			priv = (priv + 1) % q->q_size;
+		}
+	}
+	spin_unlock_irq(&cq->lock);
+	c2_cq_put(cq);
+}
+
+static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
+{
+	switch (status) {
+	case C2_OK:
+		return IB_WC_SUCCESS;
+	case CCERR_FLUSHED:
+		return IB_WC_WR_FLUSH_ERR;
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return IB_WC_LOC_PROT_ERR;
+	case CCERR_ACCESS_VIOLATION:
+		return IB_WC_LOC_ACCESS_ERR;
+	case CCERR_TOTAL_LENGTH_TOO_BIG:
+		return IB_WC_LOC_LEN_ERR;
+	case CCERR_INVALID_WINDOW:
+		return IB_WC_MW_BIND_ERR;
+	default:
+		return IB_WC_GENERAL_ERR;
+	}
+}
+
+
+static inline int c2_poll_one(struct c2_dev *c2dev,
+			      struct c2_cq *cq, struct ib_wc *entry)
+{
+	struct c2wr_ce *ce;
+	struct c2_qp *qp;
+	int is_recv = 0;
+
+	ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+	if (!ce) {
+		return -EAGAIN;
+	}
+
+	/*
+	 * if the qp returned is null then this qp has already
+	 * been freed and we are unable process the completion.
+	 * try pulling the next message
+	 */
+	while ((qp =
+		(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
+		c2_mq_free(&cq->mq);
+		ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+		if (!ce)
+			return -EAGAIN;
+	}
+
+	entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
+	entry->wr_id = ce->hdr.context;
+	entry->qp = &qp->ibqp;
+	entry->wc_flags = 0;
+	entry->slid = 0;
+	entry->sl = 0;
+	entry->src_qp = 0;
+	entry->dlid_path_bits = 0;
+	entry->pkey_index = 0;
+
+	switch (c2_wr_get_id(ce)) {
+	case C2_WR_TYPE_SEND:
+		entry->opcode = IB_WC_SEND;
+		break;
+	case C2_WR_TYPE_RDMA_WRITE:
+		entry->opcode = IB_WC_RDMA_WRITE;
+		break;
+	case C2_WR_TYPE_RDMA_READ:
+		entry->opcode = IB_WC_RDMA_READ;
+		break;
+	case C2_WR_TYPE_BIND_MW:
+		entry->opcode = IB_WC_BIND_MW;
+		break;
+	case C2_WR_TYPE_RECV:
+		entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
+		entry->opcode = IB_WC_RECV;
+		is_recv = 1;
+		break;
+	default:
+		break;
+	}
+
+	/* consume the WQEs */
+	if (is_recv)
+		c2_mq_lconsume(&qp->rq_mq, 1);
+	else
+		c2_mq_lconsume(&qp->sq_mq,
+			       be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
+
+	/* free the message */
+	c2_mq_free(&cq->mq);
+
+	return 0;
+}
+
+int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+	struct c2_dev *c2dev = to_c2dev(ibcq->device);
+	struct c2_cq *cq = to_c2cq(ibcq);
+	unsigned long flags;
+	int npolled, err;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	for (npolled = 0; npolled < num_entries; ++npolled) {
+
+		err = c2_poll_one(c2dev, cq, entry + npolled);
+		if (err)
+			break;
+	}
+
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return npolled;
+}
+
+int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+	struct c2_mq_shared __iomem *shared;
+	struct c2_cq *cq;
+
+	cq = to_c2cq(ibcq);
+	shared = cq->mq.peer;
+
+	if (notify == IB_CQ_NEXT_COMP)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
+	else if (notify == IB_CQ_SOLICITED)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
+	else
+		return -EINVAL;
+
+	writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
+
+	/*
+	 * Now read back shared->armed to make the PCI
+	 * write synchronous.  This is necessary for
+	 * correct cq notification semantics.
+	 */
+	readb(&shared->armed);
+
+	return 0;
+}
+
+static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
+{
+	dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size,
+			  mq->msg_pool.host, pci_unmap_addr(mq, mapping));
+}
+
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
+			   int msg_size)
+{
+	u8 *pool_start;
+
+	pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
+					&mq->host_dma, GFP_KERNEL);
+	if (!pool_start)
+		return -ENOMEM;
+
+	c2_mq_rep_init(mq,
+		       0,		/* index (currently unknown) */
+		       q_size,
+		       msg_size,
+		       pool_start,
+		       NULL,	/* peer (currently unknown) */
+		       C2_MQ_HOST_TARGET);
+
+	pci_unmap_addr_set(mq, mapping, mq->host_dma);
+
+	return 0;
+}
+
+int c2_init_cq(struct c2_dev *c2dev, int entries,
+	       struct c2_ucontext *ctx, struct c2_cq *cq)
+{
+	struct c2wr_cq_create_req wr;
+	struct c2wr_cq_create_rep *reply;
+	unsigned long peer_pa;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	might_sleep();
+
+	cq->ibcq.cqe = entries - 1;
+	cq->is_kernel = !ctx;
+
+	/* Allocate a shared pointer */
+	cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+				      &cq->mq.shared_dma, GFP_KERNEL);
+	if (!cq->mq.shared)
+		return -ENOMEM;
+
+	/* Allocate pages for the message pool */
+	err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
+	if (err)
+		goto bail0;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.msg_size = cpu_to_be32(cq->mq.msg_size);
+	wr.depth = cpu_to_be32(cq->mq.q_size);
+	wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
+	wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
+	wr.user_context = (u64) (unsigned long) (cq);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail2;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail2;
+
+	reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail3;
+
+	cq->adapter_handle = reply->cq_handle;
+	cq->mq.index = be32_to_cpu(reply->mq_index);
+
+	peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
+	cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
+	if (!cq->mq.peer) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	spin_lock_init(&cq->lock);
+	atomic_set(&cq->refcount, 1);
+	init_waitqueue_head(&cq->wait);
+
+	/*
+	 * Use the MQ index allocated by the adapter to
+	 * store the CQ in the qptr_array
+	 */
+	cq->cqn = cq->mq.index;
+	c2dev->qptr_array[cq->cqn] = cq;
+
+	return 0;
+
+      bail3:
+	vq_repbuf_free(c2dev, reply);
+      bail2:
+	vq_req_free(c2dev, vq_req);
+      bail1:
+	c2_free_cq_buf(c2dev, &cq->mq);
+      bail0:
+	c2_free_mqsp(cq->mq.shared);
+
+	return err;
+}
+
+void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
+{
+	int err;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cq_destroy_req wr;
+	struct c2wr_cq_destroy_rep *reply;
+
+	might_sleep();
+
+	/* Clear CQ from the qptr array */
+	spin_lock_irq(&c2dev->lock);
+	c2dev->qptr_array[cq->mq.index] = NULL;
+	atomic_dec(&cq->refcount);
+	spin_unlock_irq(&c2dev->lock);
+
+	wait_event(cq->wait, !atomic_read(&cq->refcount));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		goto bail0;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.cq_handle = cq->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	vq_req_free(c2dev, vq_req);
+      bail0:
+	if (cq->is_kernel) {
+		c2_free_cq_buf(c2dev, &cq->mq);
+	}
+
+	return;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __C2_H
+#define __C2_H
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <asm/semaphore.h>
+
+#include "c2_provider.h"
+#include "c2_mq.h"
+#include "c2_status.h"
+
+#define DRV_NAME     "c2"
+#define DRV_VERSION  "1.1"
+#define PFX          DRV_NAME ": "
+
+#define BAR_0                0
+#define BAR_2                2
+#define BAR_4                4
+
+#define RX_BUF_SIZE         (1536 + 8)
+#define ETH_JUMBO_MTU        9000
+#define C2_MAGIC            "CEPHEUS"
+#define C2_VERSION           4
+#define C2_IVN              (18 & 0x7fffffff)
+
+#define C2_REG0_SIZE        (16 * 1024)
+#define C2_REG2_SIZE        (2 * 1024 * 1024)
+#define C2_REG4_SIZE        (256 * 1024 * 1024)
+#define C2_NUM_TX_DESC       341
+#define C2_NUM_RX_DESC       256
+#define C2_PCI_REGS_OFFSET  (0x10000)
+#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
+#define C2_RXP_HRXDQ_SIZE   (4096)
+#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
+#define C2_TXP_HTXDQ_SIZE   (4096)
+#define C2_TX_TIMEOUT	    (6*HZ)
+
+/* CEPHEUS */
+static const u8 c2_magic[] = {
+	0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
+};
+
+enum adapter_pci_regs {
+	C2_REGS_MAGIC = 0x0000,
+	C2_REGS_VERS = 0x0008,
+	C2_REGS_IVN = 0x000C,
+	C2_REGS_PCI_WINSIZE = 0x0010,
+	C2_REGS_Q0_QSIZE = 0x0014,
+	C2_REGS_Q0_MSGSIZE = 0x0018,
+	C2_REGS_Q0_POOLSTART = 0x001C,
+	C2_REGS_Q0_SHARED = 0x0020,
+	C2_REGS_Q1_QSIZE = 0x0024,
+	C2_REGS_Q1_MSGSIZE = 0x0028,
+	C2_REGS_Q1_SHARED = 0x0030,
+	C2_REGS_Q2_QSIZE = 0x0034,
+	C2_REGS_Q2_MSGSIZE = 0x0038,
+	C2_REGS_Q2_SHARED = 0x0040,
+	C2_REGS_ENADDR = 0x004C,
+	C2_REGS_RDMA_ENADDR = 0x0054,
+	C2_REGS_HRX_CUR = 0x006C,
+};
+
+struct c2_adapter_pci_regs {
+	char reg_magic[8];
+	u32 version;
+	u32 ivn;
+	u32 pci_window_size;
+	u32 q0_q_size;
+	u32 q0_msg_size;
+	u32 q0_pool_start;
+	u32 q0_shared;
+	u32 q1_q_size;
+	u32 q1_msg_size;
+	u32 q1_pool_start;
+	u32 q1_shared;
+	u32 q2_q_size;
+	u32 q2_msg_size;
+	u32 q2_pool_start;
+	u32 q2_shared;
+	u32 log_start;
+	u32 log_size;
+	u8 host_enaddr[8];
+	u8 rdma_enaddr[8];
+	u32 crash_entry;
+	u32 crash_ready[2];
+	u32 fw_txd_cur;
+	u32 fw_hrxd_cur;
+	u32 fw_rxd_cur;
+};
+
+enum pci_regs {
+	C2_HISR = 0x0000,
+	C2_DISR = 0x0004,
+	C2_HIMR = 0x0008,
+	C2_DIMR = 0x000C,
+	C2_NISR0 = 0x0010,
+	C2_NISR1 = 0x0014,
+	C2_NIMR0 = 0x0018,
+	C2_NIMR1 = 0x001C,
+	C2_IDIS = 0x0020,
+};
+
+enum {
+	C2_PCI_HRX_INT = 1 << 8,
+	C2_PCI_HTX_INT = 1 << 17,
+	C2_PCI_HRX_QUI = 1 << 31,
+};
+
+/*
+ * Cepheus registers in BAR0.
+ */
+struct c2_pci_regs {
+	u32 hostisr;
+	u32 dmaisr;
+	u32 hostimr;
+	u32 dmaimr;
+	u32 netisr0;
+	u32 netisr1;
+	u32 netimr0;
+	u32 netimr1;
+	u32 int_disable;
+};
+
+/* TXP flags */
+enum c2_txp_flags {
+	TXP_HTXD_DONE = 0,
+	TXP_HTXD_READY = 1 << 0,
+	TXP_HTXD_UNINIT = 1 << 1,
+};
+
+/* RXP flags */
+enum c2_rxp_flags {
+	RXP_HRXD_UNINIT = 0,
+	RXP_HRXD_READY = 1 << 0,
+	RXP_HRXD_DONE = 1 << 1,
+};
+
+/* RXP status */
+enum c2_rxp_status {
+	RXP_HRXD_ZERO = 0,
+	RXP_HRXD_OK = 1 << 0,
+	RXP_HRXD_BUF_OV = 1 << 1,
+};
+
+/* TXP descriptor fields */
+enum txp_desc {
+	C2_TXP_FLAGS = 0x0000,
+	C2_TXP_LEN = 0x0002,
+	C2_TXP_ADDR = 0x0004,
+};
+
+/* RXP descriptor fields */
+enum rxp_desc {
+	C2_RXP_FLAGS = 0x0000,
+	C2_RXP_STATUS = 0x0002,
+	C2_RXP_COUNT = 0x0004,
+	C2_RXP_LEN = 0x0006,
+	C2_RXP_ADDR = 0x0008,
+};
+
+struct c2_txp_desc {
+	u16 flags;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_desc {
+	u16 flags;
+	u16 status;
+	u16 count;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_hdr {
+	u16 flags;
+	u16 status;
+	u16 len;
+	u16 rsvd;
+} __attribute__ ((packed));
+
+struct c2_tx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_rx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_alloc {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_array {
+	struct {
+		void **page;
+		int used;
+	} *page_list;
+};
+
+/*
+ * The MQ shared pointer pool is organized as a linked list of
+ * chunks. Each chunk contains a linked list of free shared pointers
+ * that can be allocated to a given user mode client.
+ *
+ */
+struct sp_chunk {
+	struct sp_chunk *next;
+	dma_addr_t dma_addr;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 head;
+	u16 shared_ptr[0];
+};
+
+struct c2_pd_table {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_qp_table {
+	struct idr idr;
+	spinlock_t lock;
+	int last;
+};
+
+struct c2_element {
+	struct c2_element *next;
+	void *ht_desc;		/* host     descriptor */
+	void __iomem *hw_desc;	/* hardware descriptor */
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen;
+};
+
+struct c2_ring {
+	struct c2_element *to_clean;
+	struct c2_element *to_use;
+	struct c2_element *start;
+	unsigned long count;
+};
+
+struct c2_dev {
+	struct ib_device ibdev;
+	void __iomem *regs;
+	void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
+	void __iomem *mmio_rxp_ring;
+	spinlock_t lock;
+	struct pci_dev *pcidev;
+	struct net_device *netdev;
+	struct net_device *pseudo_netdev;
+	unsigned int cur_tx;
+	unsigned int cur_rx;
+	u32 adapter_handle;
+	int device_cap_flags;
+	void __iomem *kva;	/* KVA device memory */
+	unsigned long pa;	/* PA device memory */
+	void **qptr_array;
+
+	struct kmem_cache *host_msg_cache;
+
+	struct list_head cca_link;		/* adapter list */
+	struct list_head eh_wakeup_list;	/* event wakeup list */
+	wait_queue_head_t req_vq_wo;
+
+	/* Cached RNIC properties */
+	struct ib_device_attr props;
+
+	struct c2_pd_table pd_table;
+	struct c2_qp_table qp_table;
+	int ports;		/* num of GigE ports */
+	int devnum;
+	spinlock_t vqlock;	/* sync vbs req MQ */
+
+	/* Verbs Queues */
+	struct c2_mq req_vq;	/* Verbs Request MQ */
+	struct c2_mq rep_vq;	/* Verbs Reply MQ */
+	struct c2_mq aeq;	/* Async Events MQ */
+
+	/* Kernel client MQs */
+	struct sp_chunk *kern_mqsp_pool;
+
+	/* Device updates these values when posting messages to a host
+	 * target queue */
+	u16 req_vq_shared;
+	u16 rep_vq_shared;
+	u16 aeq_shared;
+	u16 irq_claimed;
+
+	/*
+	 * Shared host target pages for user-accessible MQs.
+	 */
+	int hthead;		/* index of first free entry */
+	void *htpages;		/* kernel vaddr */
+	int htlen;		/* length of htpages memory */
+	void *htuva;		/* user mapped vaddr */
+	spinlock_t htlock;	/* serialize allocation */
+
+	u64 adapter_hint_uva;	/* access to the activity FIFO */
+
+	//	spinlock_t aeq_lock;
+	//	spinlock_t rnic_lock;
+
+	u16 *hint_count;
+	dma_addr_t hint_count_dma;
+	u16 hints_read;
+
+	int init;		/* TRUE if it's ready */
+	char ae_cache_name[16];
+	char vq_cache_name[16];
+};
+
+struct c2_port {
+	u32 msg_enable;
+	struct c2_dev *c2dev;
+	struct net_device *netdev;
+
+	spinlock_t tx_lock;
+	u32 tx_avail;
+	struct c2_ring tx_ring;
+	struct c2_ring rx_ring;
+
+	void *mem;		/* PCI memory for host rings */
+	dma_addr_t dma;
+	unsigned long mem_size;
+
+	u32 rx_buf_size;
+
+	struct net_device_stats netstats;
+};
+
+/*
+ * Activity FIFO registers in BAR0.
+ */
+#define PCI_BAR0_HOST_HINT	0x100
+#define PCI_BAR0_ADAPTER_HINT	0x2000
+
+/*
+ * Ammasso PCI vendor id and Cepheus PCI device id.
+ */
+#define CQ_ARMED 	0x01
+#define CQ_WAIT_FOR_DMA	0x80
+
+/*
+ * The format of a hint is as follows:
+ * Lower 16 bits are the count of hints for the queue.
+ * Next 15 bits are the qp_index
+ * Upper most bit depends on who reads it:
+ *    If read by producer, then it means Full (1) or Not-Full (0)
+ *    If read by consumer, then it means Empty (1) or Not-Empty (0)
+ */
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+
+/*
+ * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
+ * struct.
+ */
+#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
+
+#ifndef readq
+static inline u64 readq(const void __iomem * addr)
+{
+	u64 ret = readl(addr + 4);
+	ret <<= 32;
+	ret |= readl(addr);
+
+	return ret;
+}
+#endif
+
+#ifndef writeq
+static inline void __raw_writeq(u64 val, void __iomem * addr)
+{
+	__raw_writel((u32) (val), addr);
+	__raw_writel((u32) (val >> 32), (addr + 4));
+}
+#endif
+
+#define C2_SET_CUR_RX(c2dev, cur_rx) \
+	__raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
+
+#define C2_GET_CUR_RX(c2dev) \
+	be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092))
+
+static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct c2_dev, ibdev);
+}
+
+static inline int c2_errno(void *reply)
+{
+	switch (c2_wr_get_result(reply)) {
+	case C2_OK:
+		return 0;
+	case CCERR_NO_BUFS:
+	case CCERR_INSUFFICIENT_RESOURCES:
+	case CCERR_ZERO_RDMA_READ_RESOURCES:
+		return -ENOMEM;
+	case CCERR_MR_IN_USE:
+	case CCERR_QP_IN_USE:
+		return -EBUSY;
+	case CCERR_ADDR_IN_USE:
+		return -EADDRINUSE;
+	case CCERR_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	case CCERR_CONN_RESET:
+		return -ECONNRESET;
+	case CCERR_NOT_IMPLEMENTED:
+	case CCERR_INVALID_WQE:
+		return -ENOSYS;
+	case CCERR_QP_NOT_PRIVILEGED:
+		return -EPERM;
+	case CCERR_STACK_ERROR:
+		return -EPROTO;
+	case CCERR_ACCESS_VIOLATION:
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return -EFAULT;
+	case CCERR_STAG_STATE_NOT_INVALID:
+	case CCERR_INVALID_ADDRESS:
+	case CCERR_INVALID_CQ:
+	case CCERR_INVALID_EP:
+	case CCERR_INVALID_MODIFIER:
+	case CCERR_INVALID_MTU:
+	case CCERR_INVALID_PD_ID:
+	case CCERR_INVALID_QP:
+	case CCERR_INVALID_RNIC:
+	case CCERR_INVALID_STAG:
+		return -EINVAL;
+	default:
+		return -EAGAIN;
+	}
+}
+
+/* Device */
+extern int c2_register_device(struct c2_dev *c2dev);
+extern void c2_unregister_device(struct c2_dev *c2dev);
+extern int c2_rnic_init(struct c2_dev *c2dev);
+extern void c2_rnic_term(struct c2_dev *c2dev);
+extern void c2_rnic_interrupt(struct c2_dev *c2dev);
+extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+
+/* QPs */
+extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
+		       struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
+extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
+extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
+extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+			struct ib_qp_attr *attr, int attr_mask);
+extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+				 int ord, int ird);
+extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+			struct ib_send_wr **bad_wr);
+extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+			   struct ib_recv_wr **bad_wr);
+extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
+extern void c2_set_qp_state(struct c2_qp *, int);
+extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
+
+/* PDs */
+extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
+extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
+extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
+
+/* CQs */
+extern int c2_init_cq(struct c2_dev *c2dev, int entries,
+		      struct c2_ucontext *ctx, struct c2_cq *cq);
+extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
+extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
+extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
+extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
+
+/* CM */
+extern int c2_llp_connect(struct iw_cm_id *cm_id,
+			  struct iw_cm_conn_param *iw_param);
+extern int c2_llp_accept(struct iw_cm_id *cm_id,
+			 struct iw_cm_conn_param *iw_param);
+extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
+			 u8 pdata_len);
+extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
+extern int c2_llp_service_destroy(struct iw_cm_id *cm_id);
+
+/* MM */
+extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 				      int page_size, int pbl_depth, u32 length,
+ 				      u32 off, u64 *va, enum c2_acf acf,
+				      struct c2_mr *mr);
+extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
+
+/* AE */
+extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
+
+/* MQSP Allocator */
+extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+			     struct sp_chunk **root);
+extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
+extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+			  dma_addr_t *dma_addr, gfp_t gfp_mask);
+extern void c2_free_mqsp(u16 * mqsp);
+#endif
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_intr.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_vq.h"
+
+static void handle_mq(struct c2_dev *c2dev, u32 index);
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
+
+/*
+ * Handle RNIC interrupts
+ */
+void c2_rnic_interrupt(struct c2_dev *c2dev)
+{
+	unsigned int mq_index;
+
+	while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
+		mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
+		if (mq_index & 0x80000000) {
+			break;
+		}
+
+		c2dev->hints_read++;
+		handle_mq(c2dev, mq_index);
+	}
+
+}
+
+/*
+ * Top level MQ handler
+ */
+static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
+{
+	if (c2dev->qptr_array[mq_index] == NULL) {
+		pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n",
+			mq_index);
+		return;
+	}
+
+	switch (mq_index) {
+	case (0):
+		/*
+		 * An index of 0 in the activity queue
+		 * indicates the req vq now has messages
+		 * available...
+		 *
+		 * Wake up any waiters waiting on req VQ
+		 * message availability.
+		 */
+		wake_up(&c2dev->req_vq_wo);
+		break;
+	case (1):
+		handle_vq(c2dev, mq_index);
+		break;
+	case (2):
+		/* We have to purge the VQ in case there are pending
+		 * accept reply requests that would result in the
+		 * generation of an ESTABLISHED event. If we don't
+		 * generate these first, a CLOSE event could end up
+		 * being delivered before the ESTABLISHED event.
+		 */
+		handle_vq(c2dev, 1);
+
+		c2_ae_event(c2dev, mq_index);
+		break;
+	default:
+		/* There is no event synchronization between CQ events
+		 * and AE or CM events. In fact, CQE could be
+		 * delivered for all of the I/O up to and including the
+		 * FLUSH for a peer disconenct prior to the ESTABLISHED
+		 * event being delivered to the app. The reason for this
+		 * is that CM events are delivered on a thread, while AE
+		 * and CM events are delivered on interrupt context.
+		 */
+		c2_cq_event(c2dev, mq_index);
+		break;
+	}
+
+	return;
+}
+
+/*
+ * Handles verbs WR replies.
+ */
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
+{
+	void *adapter_msg, *reply_msg;
+	struct c2wr_hdr *host_msg;
+	struct c2wr_hdr tmp;
+	struct c2_mq *reply_vq;
+	struct c2_vq_req *req;
+	struct iw_cm_event cm_event;
+	int err;
+
+	reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index];
+
+	/*
+	 * get next msg from mq_index into adapter_msg.
+	 * don't free it yet.
+	 */
+	adapter_msg = c2_mq_consume(reply_vq);
+	if (adapter_msg == NULL) {
+		return;
+	}
+
+	host_msg = vq_repbuf_alloc(c2dev);
+
+	/*
+	 * If we can't get a host buffer, then we'll still
+	 * wakeup the waiter, we just won't give him the msg.
+	 * It is assumed the waiter will deal with this...
+	 */
+	if (!host_msg) {
+		pr_debug("handle_vq: no repbufs!\n");
+
+		/*
+		 * just copy the WR header into a local variable.
+		 * this allows us to still demux on the context
+		 */
+		host_msg = &tmp;
+		memcpy(host_msg, adapter_msg, sizeof(tmp));
+		reply_msg = NULL;
+	} else {
+		memcpy(host_msg, adapter_msg, reply_vq->msg_size);
+		reply_msg = host_msg;
+	}
+
+	/*
+	 * consume the msg from the MQ
+	 */
+	c2_mq_free(reply_vq);
+
+	/*
+	 * wakeup the waiter.
+	 */
+	req = (struct c2_vq_req *) (unsigned long) host_msg->context;
+	if (req == NULL) {
+		/*
+		 * We should never get here, as the adapter should
+		 * never send us a reply that we're not expecting.
+		 */
+		vq_repbuf_free(c2dev, host_msg);
+		pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
+		return;
+	}
+
+	err = c2_errno(reply_msg);
+	if (!err) switch (req->event) {
+	case IW_CM_EVENT_ESTABLISHED:
+		c2_set_qp_state(req->qp,
+				C2_QP_STATE_RTS);
+	case IW_CM_EVENT_CLOSE:
+
+		/*
+		 * Move the QP to RTS if this is
+		 * the established event
+		 */
+		cm_event.event = req->event;
+		cm_event.status = 0;
+		cm_event.local_addr = req->cm_id->local_addr;
+		cm_event.remote_addr = req->cm_id->remote_addr;
+		cm_event.private_data = NULL;
+		cm_event.private_data_len = 0;
+		req->cm_id->event_handler(req->cm_id, &cm_event);
+		break;
+	default:
+		break;
+	}
+
+	req->reply_msg = (u64) (unsigned long) (reply_msg);
+	atomic_set(&req->reply_ready, 1);
+	wake_up(&req->wait_object);
+
+	/*
+	 * If the request was cancelled, then this put will
+	 * free the vq_req memory...and reply_msg!!!
+	 */
+	vq_req_put(c2dev, req);
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_mm.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_mm.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_vq.h"
+
+#define PBL_VIRT 1
+#define PBL_PHYS 2
+
+/*
+ * Send all the PBL messages to convey the remainder of the PBL
+ * Wait for the adapter's reply on the last one.
+ * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
+ *
+ * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
+ *	  Reply buffer _is_ freed by this function.
+ */
+static int
+send_pbl_messages(struct c2_dev *c2dev, u32 stag_index,
+		  unsigned long va, u32 pbl_depth,
+		  struct c2_vq_req *vq_req, int pbl_type)
+{
+	u32 pbe_count;		/* amt that fits in a PBL msg */
+	u32 count;		/* amt in this PBL MSG. */
+	struct c2wr_nsmr_pbl_req *wr;	/* PBL WR ptr */
+	struct c2wr_nsmr_pbl_rep *reply;	/* reply ptr */
+ 	int err, pbl_virt, pbl_index, i;
+
+	switch (pbl_type) {
+	case PBL_VIRT:
+		pbl_virt = 1;
+		break;
+	case PBL_PHYS:
+		pbl_virt = 0;
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		return -ENOMEM;
+	}
+	c2_wr_set_id(wr, CCWR_NSMR_PBL);
+
+	/*
+	 * Only the last PBL message will generate a reply from the verbs,
+	 * so we set the context to 0 indicating there is no kernel verbs
+	 * handler blocked awaiting this reply.
+	 */
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->stag_index = stag_index;	/* already swapped */
+	wr->flags = 0;
+	pbl_index = 0;
+	while (pbl_depth) {
+		count = min(pbe_count, pbl_depth);
+		wr->addrs_length = cpu_to_be32(count);
+
+		/*
+		 *  If this is the last message, then reference the
+		 *  vq request struct cuz we're gonna wait for a reply.
+		 *  also make this PBL msg as the last one.
+		 */
+		if (count == pbl_depth) {
+			/*
+			 * reference the request struct.  dereferenced in the
+			 * int handler.
+			 */
+			vq_req_get(c2dev, vq_req);
+			wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
+
+			/*
+			 * This is the last PBL message.
+			 * Set the context to our VQ Request Object so we can
+			 * wait for the reply.
+			 */
+			wr->hdr.context = (unsigned long) vq_req;
+		}
+
+		/*
+		 * If pbl_virt is set then va is a virtual address
+		 * that describes a virtually contiguous memory
+		 * allocation. The wr needs the start of each virtual page
+		 * to be converted to the corresponding physical address
+		 * of the page. If pbl_virt is not set then va is an array
+		 * of physical addresses and there is no conversion to do.
+		 * Just fill in the wr with what is in the array.
+		 */
+		for (i = 0; i < count; i++) {
+			if (pbl_virt) {
+				va += PAGE_SIZE;
+			} else {
+ 				wr->paddrs[i] =
+				    cpu_to_be64(((u64 *)va)[pbl_index + i]);
+			}
+		}
+
+		/*
+		 * Send WR to adapter
+		 */
+		err = vq_send_wr(c2dev, (union c2wr *) wr);
+		if (err) {
+			if (count <= pbe_count) {
+				vq_req_put(c2dev, vq_req);
+			}
+			goto bail0;
+		}
+		pbl_depth -= count;
+		pbl_index += count;
+	}
+
+	/*
+	 *  Now wait for the reply...
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	kfree(wr);
+	return err;
+}
+
+#define C2_PBL_MAX_DEPTH 131072
+int
+c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 			   int page_size, int pbl_depth, u32 length,
+ 			   u32 offset, u64 *va, enum c2_acf acf,
+			   struct c2_mr *mr)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_nsmr_register_req *wr;
+	struct c2wr_nsmr_register_rep *reply;
+	u16 flags;
+	int i, pbe_count, count;
+	int err;
+
+	if (!va || !length || !addr_list || !pbl_depth)
+		return -EINTR;
+
+	/*
+	 * Verify PBL depth is within rnic max
+	 */
+	if (pbl_depth > C2_PBL_MAX_DEPTH) {
+		return -EINTR;
+	}
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	/*
+	 * build the WR
+	 */
+	c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+
+	flags = (acf | MEM_VA_BASED | MEM_REMOTE);
+
+	/*
+	 * compute how many pbes can fit in the message
+	 */
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
+
+	if (pbl_depth <= pbe_count) {
+		flags |= MEM_PBL_COMPLETE;
+	}
+	wr->flags = cpu_to_be16(flags);
+	wr->stag_key = 0;	//stag_key;
+	wr->va = cpu_to_be64(*va);
+	wr->pd_id = mr->pd->pd_id;
+	wr->pbe_size = cpu_to_be32(page_size);
+	wr->length = cpu_to_be32(length);
+	wr->pbl_depth = cpu_to_be32(pbl_depth);
+	wr->fbo = cpu_to_be32(offset);
+	count = min(pbl_depth, pbe_count);
+	wr->addrs_length = cpu_to_be32(count);
+
+	/*
+	 * fill out the PBL for this message
+	 */
+	for (i = 0; i < count; i++) {
+		wr->paddrs[i] = cpu_to_be64(addr_list[i]);
+	}
+
+	/*
+	 * regerence the request struct
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * send the WR to the adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	/*
+	 * wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail1;
+	}
+
+	/*
+	 * process reply
+	 */
+	reply =
+	    (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	if ((err = c2_errno(reply))) {
+		goto bail2;
+	}
+	//*p_pb_entries = be32_to_cpu(reply->pbl_depth);
+	mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
+	vq_repbuf_free(c2dev, reply);
+
+	/*
+	 * if there are still more PBEs we need to send them to
+	 * the adapter and wait for a reply on the final one.
+	 * reuse vq_req for this purpose.
+	 */
+	pbl_depth -= count;
+	if (pbl_depth) {
+
+		vq_req->reply_msg = (unsigned long) NULL;
+		atomic_set(&vq_req->reply_ready, 0);
+		err = send_pbl_messages(c2dev,
+					cpu_to_be32(mr->ibmr.lkey),
+					(unsigned long) &addr_list[i],
+					pbl_depth, vq_req, PBL_PHYS);
+		if (err) {
+			goto bail1;
+		}
+	}
+
+	vq_req_free(c2dev, vq_req);
+	kfree(wr);
+
+	return err;
+
+      bail2:
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
+{
+	struct c2_vq_req *vq_req;	/* verbs request object */
+	struct c2wr_stag_dealloc_req wr;	/* work request */
+	struct c2wr_stag_dealloc_rep *reply;	/* WR reply  */
+	int err;
+
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.stag_index = cpu_to_be32(stag_index);
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_mq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_mq.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_mq.h"
+
+void *c2_mq_alloc(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (c2_mq_full(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m =
+		    (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
+		m->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_produce(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (!c2_mq_full(q)) {
+		q->priv = (q->priv + 1) % q->q_size;
+		q->hint_count++;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+void *c2_mq_consume(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (c2_mq_empty(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m = (struct c2wr_hdr *)
+		    (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_free(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (!c2_mq_empty(q)) {
+
+#ifdef CCMSGMAGIC
+		{
+			struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
+			    (q->msg_pool.adapter + q->priv * q->msg_size);
+			__raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
+		}
+#endif
+		q->priv = (q->priv + 1) % q->q_size;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+
+void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	while (wqe_count--) {
+		BUG_ON(c2_mq_empty(q));
+		*q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
+	}
+}
+
+#if 0
+u32 c2_mq_count(struct c2_mq *q)
+{
+	s32 count;
+
+	if (q->type == C2_MQ_HOST_TARGET)
+		count = be16_to_cpu(*q->shared) - q->priv;
+	else
+		count = q->priv - be16_to_cpu(*q->shared);
+
+	if (count < 0)
+		count += q->q_size;
+
+	return (u32) count;
+}
+#endif  /*  0  */
+
+void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.adapter = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
+void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.host = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_mq.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_mq.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _C2_MQ_H_
+#define _C2_MQ_H_
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include "c2_wr.h"
+
+enum c2_shared_regs {
+
+	C2_SHARED_ARMED = 0x10,
+	C2_SHARED_NOTIFY = 0x18,
+	C2_SHARED_SHARED = 0x40,
+};
+
+struct c2_mq_shared {
+	u16 unused1;
+	u8 armed;
+	u8 notification_type;
+	u32 unused2;
+	u16 shared;
+	/* Pad to 64 bytes. */
+	u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
+};
+
+enum c2_mq_type {
+	C2_MQ_HOST_TARGET = 1,
+	C2_MQ_ADAPTER_TARGET = 2,
+};
+
+/*
+ * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
+ * c2_user_mq_t (which is the same format) is for user-mode MQs...
+ */
+#define C2_MQ_MAGIC 0x4d512020	/* 'MQ  ' */
+struct c2_mq {
+	u32 magic;
+	union {
+		u8 *host;
+		u8 __iomem *adapter;
+	} msg_pool;
+	dma_addr_t host_dma;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 hint_count;
+	u16 priv;
+	struct c2_mq_shared __iomem *peer;
+	u16 *shared;
+	dma_addr_t shared_dma;
+	u32 q_size;
+	u32 msg_size;
+	u32 index;
+	enum c2_mq_type type;
+};
+
+static __inline__ int c2_mq_empty(struct c2_mq *q)
+{
+	return q->priv == be16_to_cpu(*q->shared);
+}
+
+static __inline__ int c2_mq_full(struct c2_mq *q)
+{
+	return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
+}
+
+extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
+extern void *c2_mq_alloc(struct c2_mq *q);
+extern void c2_mq_produce(struct c2_mq *q);
+extern void *c2_mq_consume(struct c2_mq *q);
+extern void c2_mq_free(struct c2_mq *q);
+extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		       u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
+extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+			   u8 *pool_start, u16 __iomem *peer, u32 type);
+
+#endif				/* _C2_MQ_H_ */
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_pd.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_pd.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include "c2.h"
+#include "c2_provider.h"
+
+int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
+{
+	u32 obj;
+	int ret = 0;
+
+	spin_lock(&c2dev->pd_table.lock);
+	obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
+				 c2dev->pd_table.last);
+	if (obj >= c2dev->pd_table.max)
+		obj = find_first_zero_bit(c2dev->pd_table.table,
+					  c2dev->pd_table.max);
+	if (obj < c2dev->pd_table.max) {
+		pd->pd_id = obj;
+		__set_bit(obj, c2dev->pd_table.table);
+		c2dev->pd_table.last = obj+1;
+		if (c2dev->pd_table.last >= c2dev->pd_table.max)
+			c2dev->pd_table.last = 0;
+	} else
+		ret = -ENOMEM;
+	spin_unlock(&c2dev->pd_table.lock);
+	return ret;
+}
+
+void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
+{
+	spin_lock(&c2dev->pd_table.lock);
+	__clear_bit(pd->pd_id, c2dev->pd_table.table);
+	spin_unlock(&c2dev->pd_table.lock);
+}
+
+int __devinit c2_init_pd_table(struct c2_dev *c2dev)
+{
+
+	c2dev->pd_table.last = 0;
+	c2dev->pd_table.max = c2dev->props.max_pd;
+	spin_lock_init(&c2dev->pd_table.lock);
+	c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
+					sizeof(long), GFP_KERNEL);
+	if (!c2dev->pd_table.table)
+		return -ENOMEM;
+	bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
+	return 0;
+}
+
+void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
+{
+	kfree(c2dev->pd_table.table);
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_provider.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -0,0 +1,875 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_arp.h>
+#include <linux/vmalloc.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include "c2.h"
+#include "c2_provider.h"
+#include "c2_user.h"
+
+static int c2_query_device(struct ib_device *ibdev,
+			   struct ib_device_attr *props)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	*props = c2dev->props;
+	return 0;
+}
+
+static int c2_query_port(struct ib_device *ibdev,
+			 u8 port, struct ib_port_attr *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	props->max_mtu = IB_MTU_4096;
+	props->lid = 0;
+	props->lmc = 0;
+	props->sm_lid = 0;
+	props->sm_sl = 0;
+	props->state = IB_PORT_ACTIVE;
+	props->phys_state = 0;
+	props->port_cap_flags =
+	    IB_PORT_CM_SUP |
+	    IB_PORT_REINIT_SUP |
+	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->qkey_viol_cntr = 0;
+	props->active_width = 1;
+	props->active_speed = 1;
+
+	return 0;
+}
+
+static int c2_modify_port(struct ib_device *ibdev,
+			  u8 port, int port_modify_mask,
+			  struct ib_port_modify *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+static int c2_query_pkey(struct ib_device *ibdev,
+			 u8 port, u16 index, u16 * pkey)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	*pkey = 0;
+	return 0;
+}
+
+static int c2_query_gid(struct ib_device *ibdev, u8 port,
+			int index, union ib_gid *gid)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+	memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
+
+	return 0;
+}
+
+/* Allocate the user context data structure. This keeps track
+ * of all objects associated with a particular user-mode client.
+ */
+static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
+					     struct ib_udata *udata)
+{
+	struct c2_ucontext *context;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	context = kmalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+
+	return &context->ibucontext;
+}
+
+static int c2_dealloc_ucontext(struct ib_ucontext *context)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	kfree(context);
+	return 0;
+}
+
+static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
+				 struct ib_ucontext *context,
+				 struct ib_udata *udata)
+{
+	struct c2_pd *pd;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
+	if (err) {
+		kfree(pd);
+		return ERR_PTR(err);
+	}
+
+	if (context) {
+		if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
+			c2_pd_free(to_c2dev(ibdev), pd);
+			kfree(pd);
+			return ERR_PTR(-EFAULT);
+		}
+	}
+
+	return &pd->ibpd;
+}
+
+static int c2_dealloc_pd(struct ib_pd *pd)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
+	kfree(pd);
+
+	return 0;
+}
+
+static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return ERR_PTR(-ENOSYS);
+}
+
+static int c2_ah_destroy(struct ib_ah *ah)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static void c2_add_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	atomic_inc(&qp->refcount);
+}
+
+static void c2_rem_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
+{
+	struct c2_dev* c2dev = to_c2dev(device);
+	struct c2_qp *qp;
+
+	qp = c2_find_qpn(c2dev, qpn);
+	pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
+		__FUNCTION__, qp, qpn, device,
+		(qp?atomic_read(&qp->refcount):0));
+
+	return (qp?&qp->ibqp:NULL);
+}
+
+static struct ib_qp *c2_create_qp(struct ib_pd *pd,
+				  struct ib_qp_init_attr *init_attr,
+				  struct ib_udata *udata)
+{
+	struct c2_qp *qp;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_RC:
+		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+		if (!qp) {
+			pr_debug("%s: Unable to allocate QP\n", __FUNCTION__);
+			return ERR_PTR(-ENOMEM);
+		}
+		spin_lock_init(&qp->lock);
+		if (pd->uobject) {
+			/* userspace specific */
+		}
+
+		err = c2_alloc_qp(to_c2dev(pd->device),
+				  to_c2pd(pd), init_attr, qp);
+
+		if (err && pd->uobject) {
+			/* userspace specific */
+		}
+
+		break;
+	default:
+		pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__,
+			init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+		break;
+	}
+
+	if (err) {
+		kfree(qp);
+		return ERR_PTR(err);
+	}
+
+	return &qp->ibqp;
+}
+
+static int c2_destroy_qp(struct ib_qp *ib_qp)
+{
+	struct c2_qp *qp = to_c2qp(ib_qp);
+
+	pr_debug("%s:%u qp=%p,qp->state=%d\n",
+		__FUNCTION__, __LINE__,ib_qp,qp->state);
+	c2_free_qp(to_c2dev(ib_qp->device), qp);
+	kfree(qp);
+	return 0;
+}
+
+static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries,
+				  struct ib_ucontext *context,
+				  struct ib_udata *udata)
+{
+	struct c2_cq *cq;
+	int err;
+
+	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq) {
+		pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
+	if (err) {
+		pr_debug("%s: error initializing CQ\n", __FUNCTION__);
+		kfree(cq);
+		return ERR_PTR(err);
+	}
+
+	return &cq->ibcq;
+}
+
+static int c2_destroy_cq(struct ib_cq *ib_cq)
+{
+	struct c2_cq *cq = to_c2cq(ib_cq);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	c2_free_cq(to_c2dev(ib_cq->device), cq);
+	kfree(cq);
+
+	return 0;
+}
+
+static inline u32 c2_convert_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
+	    (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
+	    (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
+	    C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
+}
+
+static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
+				    struct ib_phys_buf *buffer_list,
+				    int num_phys_buf, int acc, u64 * iova_start)
+{
+	struct c2_mr *mr;
+	u64 *page_list;
+	u32 total_len;
+	int err, i, j, k, page_shift, pbl_depth;
+
+	pbl_depth = 0;
+	total_len = 0;
+
+	page_shift = PAGE_SHIFT;
+	/*
+	 * If there is only 1 buffer we assume this could
+	 * be a map of all phy mem...use a 32k page_shift.
+	 */
+	if (num_phys_buf == 1)
+		page_shift += 3;
+
+	for (i = 0; i < num_phys_buf; i++) {
+
+		if (buffer_list[i].addr & ~PAGE_MASK) {
+			pr_debug("Unaligned Memory Buffer: 0x%x\n",
+				(unsigned int) buffer_list[i].addr);
+			return ERR_PTR(-EINVAL);
+		}
+
+		if (!buffer_list[i].size) {
+			pr_debug("Invalid Buffer Size\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		total_len += buffer_list[i].size;
+		pbl_depth += ALIGN(buffer_list[i].size,
+				   (1 << page_shift)) >> page_shift;
+	}
+
+	page_list = vmalloc(sizeof(u64) * pbl_depth);
+	if (!page_list) {
+		pr_debug("couldn't vmalloc page_list of size %zd\n",
+			(sizeof(u64) * pbl_depth));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0, j = 0; i < num_phys_buf; i++) {
+
+		int naddrs;
+
+ 		naddrs = ALIGN(buffer_list[i].size,
+			       (1 << page_shift)) >> page_shift;
+		for (k = 0; k < naddrs; k++)
+			page_list[j++] = (buffer_list[i].addr +
+						     (k << page_shift));
+	}
+
+	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr) {
+		vfree(page_list);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	mr->pd = to_c2pd(ib_pd);
+	pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
+		"*iova_start %llx, first pa %llx, last pa %llx\n",
+		__FUNCTION__, page_shift, pbl_depth, total_len,
+		(unsigned long long) *iova_start,
+	       	(unsigned long long) page_list[0],
+	       	(unsigned long long) page_list[pbl_depth-1]);
+  	err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+ 					 (1 << page_shift), pbl_depth,
+					 total_len, 0, iova_start,
+					 c2_convert_access(acc), mr);
+	vfree(page_list);
+	if (err) {
+		kfree(mr);
+		return ERR_PTR(err);
+	}
+
+	return &mr->ibmr;
+}
+
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct ib_phys_buf bl;
+	u64 kva = 0;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* AMSO1100 limit */
+	bl.size = 0xffffffff;
+	bl.addr = 0;
+	return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
+}
+
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+				    int acc, struct ib_udata *udata)
+{
+	u64 *pages;
+	u64 kva = 0;
+	int shift, n, len;
+	int i, j, k;
+	int err = 0;
+	struct ib_umem_chunk *chunk;
+	struct c2_pd *c2pd = to_c2pd(pd);
+	struct c2_mr *c2mr;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	shift = ffs(region->page_size) - 1;
+
+	c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
+	if (!c2mr)
+		return ERR_PTR(-ENOMEM);
+	c2mr->pd = c2pd;
+
+	n = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list)
+		n += chunk->nents;
+
+	pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+	if (!pages) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	i = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list) {
+		for (j = 0; j < chunk->nmap; ++j) {
+			len = sg_dma_len(&chunk->page_list[j]) >> shift;
+			for (k = 0; k < len; ++k) {
+				pages[i++] =
+					sg_dma_address(&chunk->page_list[j]) +
+					(region->page_size * k);
+			}
+		}
+	}
+
+	kva = (u64)region->virt_base;
+  	err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
+					 pages,
+ 					 region->page_size,
+					 i,
+					 region->length,
+					 region->offset,
+					 &kva,
+					 c2_convert_access(acc),
+					 c2mr);
+	kfree(pages);
+	if (err) {
+		kfree(c2mr);
+		return ERR_PTR(err);
+	}
+	return &c2mr->ibmr;
+
+err:
+	kfree(c2mr);
+	return ERR_PTR(err);
+}
+
+static int c2_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct c2_mr *mr = to_c2mr(ib_mr);
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
+	if (err)
+		pr_debug("c2_stag_dealloc failed: %d\n", err);
+	else
+		kfree(mr);
+
+	return err;
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x\n", dev->props.hw_ver);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x.%x.%x\n",
+		       (int) (dev->props.fw_ver >> 32),
+		       (int) (dev->props.fw_ver >> 16) & 0xffff,
+		       (int) (dev->props.fw_ver & 0xffff));
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "AMSO1100\n");
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *c2_class_attributes[] = {
+	&class_device_attr_hw_rev,
+	&class_device_attr_fw_ver,
+	&class_device_attr_hca_type,
+	&class_device_attr_board_id
+};
+
+static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+			int attr_mask, struct ib_udata *udata)
+{
+	int err;
+
+	err =
+	    c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
+			 attr_mask);
+
+	return err;
+}
+
+static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_process_mad(struct ib_device *ibdev,
+			  int mad_flags,
+			  u8 port_num,
+			  struct ib_wc *in_wc,
+			  struct ib_grh *in_grh,
+			  struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Request a connection */
+	return c2_llp_connect(cm_id, iw_param);
+}
+
+static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Accept the new connection */
+	return c2_llp_accept(cm_id, iw_param);
+}
+
+static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_reject(cm_id, pdata, pdata_len);
+	return err;
+}
+
+static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	err = c2_llp_service_create(cm_id, backlog);
+	pr_debug("%s:%u err=%d\n",
+		__FUNCTION__, __LINE__,
+		err);
+	return err;
+}
+
+static int c2_service_destroy(struct iw_cm_id *cm_id)
+{
+	int err;
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_service_destroy(cm_id);
+
+	return err;
+}
+
+static int c2_pseudo_up(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("adding...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_down(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("deleting...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int ret = 0;
+
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+
+	/* TODO: Tell rnic about new rmda interface mtu */
+	return ret;
+}
+
+static void setup(struct net_device *netdev)
+{
+	SET_MODULE_OWNER(netdev);
+	netdev->open = c2_pseudo_up;
+	netdev->stop = c2_pseudo_down;
+	netdev->hard_start_xmit = c2_pseudo_xmit_frame;
+	netdev->get_stats = NULL;
+	netdev->tx_timeout = NULL;
+	netdev->set_mac_address = NULL;
+	netdev->change_mtu = c2_pseudo_change_mtu;
+	netdev->watchdog_timeo = 0;
+	netdev->type = ARPHRD_ETHER;
+	netdev->mtu = 1500;
+	netdev->hard_header_len = ETH_HLEN;
+	netdev->addr_len = ETH_ALEN;
+	netdev->tx_queue_len = 0;
+	netdev->flags |= IFF_NOARP;
+	return;
+}
+
+static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
+{
+	char name[IFNAMSIZ];
+	struct net_device *netdev;
+
+	/* change ethxxx to iwxxx */
+	strcpy(name, "iw");
+	strcat(name, &c2dev->netdev->name[3]);
+	netdev = alloc_netdev(sizeof(*netdev), name, setup);
+	if (!netdev) {
+		printk(KERN_ERR PFX "%s -  etherdev alloc failed",
+			__FUNCTION__);
+		return NULL;
+	}
+
+	netdev->priv = c2dev;
+
+	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+	memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
+
+	/* Print out the MAC address */
+	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
+		netdev->name,
+		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+
+#if 0
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+#endif
+	return netdev;
+}
+
+int c2_register_device(struct c2_dev *dev)
+{
+	int ret = -ENOMEM;
+	int i;
+
+	/* Register pseudo network device */
+	dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
+	if (!dev->pseudo_netdev)
+		goto out3;
+
+	ret = register_netdev(dev->pseudo_netdev);
+	if (ret)
+		goto out2;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
+	dev->ibdev.owner = THIS_MODULE;
+	dev->ibdev.uverbs_cmd_mask =
+	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
+
+	dev->ibdev.node_type = RDMA_NODE_RNIC;
+	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+	memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
+	dev->ibdev.phys_port_cnt = 1;
+	dev->ibdev.dma_device = &dev->pcidev->dev;
+	dev->ibdev.class_dev.dev = &dev->pcidev->dev;
+	dev->ibdev.query_device = c2_query_device;
+	dev->ibdev.query_port = c2_query_port;
+	dev->ibdev.modify_port = c2_modify_port;
+	dev->ibdev.query_pkey = c2_query_pkey;
+	dev->ibdev.query_gid = c2_query_gid;
+	dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
+	dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
+	dev->ibdev.mmap = c2_mmap_uar;
+	dev->ibdev.alloc_pd = c2_alloc_pd;
+	dev->ibdev.dealloc_pd = c2_dealloc_pd;
+	dev->ibdev.create_ah = c2_ah_create;
+	dev->ibdev.destroy_ah = c2_ah_destroy;
+	dev->ibdev.create_qp = c2_create_qp;
+	dev->ibdev.modify_qp = c2_modify_qp;
+	dev->ibdev.destroy_qp = c2_destroy_qp;
+	dev->ibdev.create_cq = c2_create_cq;
+	dev->ibdev.destroy_cq = c2_destroy_cq;
+	dev->ibdev.poll_cq = c2_poll_cq;
+	dev->ibdev.get_dma_mr = c2_get_dma_mr;
+	dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
+	dev->ibdev.reg_user_mr = c2_reg_user_mr;
+	dev->ibdev.dereg_mr = c2_dereg_mr;
+
+	dev->ibdev.alloc_fmr = NULL;
+	dev->ibdev.unmap_fmr = NULL;
+	dev->ibdev.dealloc_fmr = NULL;
+	dev->ibdev.map_phys_fmr = NULL;
+
+	dev->ibdev.attach_mcast = c2_multicast_attach;
+	dev->ibdev.detach_mcast = c2_multicast_detach;
+	dev->ibdev.process_mad = c2_process_mad;
+
+	dev->ibdev.req_notify_cq = c2_arm_cq;
+	dev->ibdev.post_send = c2_post_send;
+	dev->ibdev.post_recv = c2_post_receive;
+
+	dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+	dev->ibdev.iwcm->add_ref = c2_add_ref;
+	dev->ibdev.iwcm->rem_ref = c2_rem_ref;
+	dev->ibdev.iwcm->get_qp = c2_get_qp;
+	dev->ibdev.iwcm->connect = c2_connect;
+	dev->ibdev.iwcm->accept = c2_accept;
+	dev->ibdev.iwcm->reject = c2_reject;
+	dev->ibdev.iwcm->create_listen = c2_service_create;
+	dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
+
+	ret = ib_register_device(&dev->ibdev);
+	if (ret)
+		goto out1;
+
+	for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) {
+		ret = class_device_create_file(&dev->ibdev.class_dev,
+					       c2_class_attributes[i]);
+		if (ret)
+			goto out0;
+	}
+	goto out3;
+
+out0:
+	ib_unregister_device(&dev->ibdev);
+out1:
+	unregister_netdev(dev->pseudo_netdev);
+out2:
+	free_netdev(dev->pseudo_netdev);
+out3:
+	pr_debug("%s:%u ret=%d\n", __FUNCTION__, __LINE__, ret);
+	return ret;
+}
+
+void c2_unregister_device(struct c2_dev *dev)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	unregister_netdev(dev->pseudo_netdev);
+	free_netdev(dev->pseudo_netdev);
+	ib_unregister_device(&dev->ibdev);
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_provider.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_PROVIDER_H
+#define C2_PROVIDER_H
+#include <linux/inetdevice.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+
+#include "c2_mq.h"
+#include <rdma/iw_cm.h>
+
+#define C2_MPT_FLAG_ATOMIC        (1 << 14)
+#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
+#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
+#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
+#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
+
+struct c2_buf_list {
+	void *buf;
+	 DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+
+/* The user context keeps track of objects allocated for a
+ * particular user-mode client. */
+struct c2_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+struct c2_mtt;
+
+/* All objects associated with a PD are kept in the
+ * associated user context if present.
+ */
+struct c2_pd {
+	struct ib_pd ibpd;
+	u32 pd_id;
+};
+
+struct c2_mr {
+	struct ib_mr ibmr;
+	struct c2_pd *pd;
+};
+
+struct c2_av;
+
+enum c2_ah_type {
+	C2_AH_ON_HCA,
+	C2_AH_PCI_POOL,
+	C2_AH_KMALLOC
+};
+
+struct c2_ah {
+	struct ib_ah ibah;
+};
+
+struct c2_cq {
+	struct ib_cq ibcq;
+	spinlock_t lock;
+	atomic_t refcount;
+	int cqn;
+	int is_kernel;
+	wait_queue_head_t wait;
+
+	u32 adapter_handle;
+	struct c2_mq mq;
+};
+
+struct c2_wq {
+	spinlock_t lock;
+};
+struct iw_cm_id;
+struct c2_qp {
+	struct ib_qp ibqp;
+	struct iw_cm_id *cm_id;
+	spinlock_t lock;
+	atomic_t refcount;
+	wait_queue_head_t wait;
+	int qpn;
+
+	u32 adapter_handle;
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u8 state;
+
+	struct c2_mq sq_mq;
+	struct c2_mq rq_mq;
+};
+
+struct c2_cr_query_attrs {
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+};
+
+static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct c2_pd, ibpd);
+}
+
+static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct c2_ucontext, ibucontext);
+}
+
+static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct c2_mr, ibmr);
+}
+
+
+static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct c2_ah, ibah);
+}
+
+static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct c2_cq, ibcq);
+}
+
+static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct c2_qp, ibqp);
+}
+
+static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
+{
+	struct in_device *ind;
+	int ret = 0;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	for_ifa(ind) {
+		if (ifa->ifa_address == addr) {
+			ret = 1;
+			break;
+		}
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+	return ret;
+}
+#endif				/* C2_PROVIDER_H */
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_qp.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -0,0 +1,1006 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_MAX_ORD_PER_QP 128
+#define C2_MAX_IRD_PER_QP 128
+
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+#define NO_SUPPORT -1
+static const u8 c2_opcode[] = {
+	[IB_WR_SEND] = C2_WR_TYPE_SEND,
+	[IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
+	[IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
+	[IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
+	[IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
+};
+
+static int to_c2_state(enum ib_qp_state ib_state)
+{
+	switch (ib_state) {
+	case IB_QPS_RESET:
+		return C2_QP_STATE_IDLE;
+	case IB_QPS_RTS:
+		return C2_QP_STATE_RTS;
+	case IB_QPS_SQD:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_SQE:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_ERR:
+		return C2_QP_STATE_ERROR;
+	default:
+		return -1;
+	}
+}
+
+static int to_ib_state(enum c2_qp_state c2_state)
+{
+	switch (c2_state) {
+	case C2_QP_STATE_IDLE:
+		return IB_QPS_RESET;
+	case C2_QP_STATE_CONNECTING:
+		return IB_QPS_RTR;
+	case C2_QP_STATE_RTS:
+		return IB_QPS_RTS;
+	case C2_QP_STATE_CLOSING:
+		return IB_QPS_SQD;
+	case C2_QP_STATE_ERROR:
+		return IB_QPS_ERR;
+	case C2_QP_STATE_TERMINATE:
+		return IB_QPS_SQE;
+	default:
+		return -1;
+	}
+}
+
+static const char *to_ib_state_str(int ib_state)
+{
+	static const char *state_str[] = {
+		"IB_QPS_RESET",
+		"IB_QPS_INIT",
+		"IB_QPS_RTR",
+		"IB_QPS_RTS",
+		"IB_QPS_SQD",
+		"IB_QPS_SQE",
+		"IB_QPS_ERR"
+	};
+	if (ib_state < IB_QPS_RESET ||
+	    ib_state > IB_QPS_ERR)
+		return "<invalid IB QP state>";
+
+	ib_state -= IB_QPS_RESET;
+	return state_str[ib_state];
+}
+
+void c2_set_qp_state(struct c2_qp *qp, int c2_state)
+{
+	int new_state = to_ib_state(c2_state);
+
+	pr_debug("%s: qp[%p] state modify %s --> %s\n",
+	       __FUNCTION__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(new_state));
+	qp->state = new_state;
+}
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+		 struct ib_qp_attr *attr, int attr_mask)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	unsigned long flags;
+	u8 next_state;
+	int err;
+
+	pr_debug("%s:%d qp=%p, %s --> %s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(attr->qp_state));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	if (attr_mask & IB_QP_STATE) {
+		/* Ensure the state is valid */
+		if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) {
+			err = -EINVAL;
+			goto bail0;
+		}
+
+		wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
+
+		if (attr->qp_state == IB_QPS_ERR) {
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id && qp->state == IB_QPS_RTS) {
+				pr_debug("Generating CLOSE event for QP-->ERR, "
+					"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+				/* Generate an CLOSE event */
+				vq_req->cm_id = qp->cm_id;
+				vq_req->event = IW_CM_EVENT_CLOSE;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+		}
+		next_state =  attr->qp_state;
+
+	} else if (attr_mask & IB_QP_CUR_STATE) {
+
+		if (attr->cur_qp_state != IB_QPS_RTR &&
+		    attr->cur_qp_state != IB_QPS_RTS &&
+		    attr->cur_qp_state != IB_QPS_SQD &&
+		    attr->cur_qp_state != IB_QPS_SQE) {
+			err = -EINVAL;
+			goto bail0;
+		} else
+			wr.next_qp_state =
+			    cpu_to_be32(to_c2_state(attr->cur_qp_state));
+
+		next_state = attr->cur_qp_state;
+
+	} else {
+		err = 0;
+		goto bail0;
+	}
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	if (!err)
+		qp->state = next_state;
+#ifdef DEBUG
+	else
+		pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err);
+#endif
+	/*
+	 * If we're going to error and generating the event here, then
+	 * we need to remove the reference because there will be no
+	 * close event generated by the adapter
+	*/
+	spin_lock_irqsave(&qp->lock, flags);
+	if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+
+	pr_debug("%s:%d qp=%p, cur_state=%s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state));
+	return err;
+}
+
+int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+			  int ord, int ird)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(ord);
+	wr.ird = cpu_to_be32(ird);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_qp_destroy_req wr;
+	struct c2wr_qp_destroy_rep *reply;
+	unsigned long flags;
+	int err;
+
+	/*
+	 * Allocate a verb request message
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Initialize the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_QP_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id && qp->state == IB_QPS_RTS) {
+		pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
+			"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+		/* Generate an CLOSE event */
+		vq_req->qp = qp;
+		vq_req->cm_id = qp->cm_id;
+		vq_req->event = IW_CM_EVENT_CLOSE;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	int ret;
+
+        do {
+		spin_lock_irq(&c2dev->qp_table.lock);
+		ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
+					c2dev->qp_table.last++, &qp->qpn);
+		spin_unlock_irq(&c2dev->qp_table.lock);
+        } while ((ret == -EAGAIN) &&
+	 	 idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
+	return ret;
+}
+
+static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
+{
+	spin_lock_irq(&c2dev->qp_table.lock);
+	idr_remove(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irq(&c2dev->qp_table.lock);
+}
+
+struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
+{
+	unsigned long flags;
+	struct c2_qp *qp;
+
+	spin_lock_irqsave(&c2dev->qp_table.lock, flags);
+	qp = idr_find(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
+	return qp;
+}
+
+int c2_alloc_qp(struct c2_dev *c2dev,
+		struct c2_pd *pd,
+		struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
+{
+	struct c2wr_qp_create_req wr;
+	struct c2wr_qp_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
+	struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
+	unsigned long peer_pa;
+	u32 q_size, msg_size, mmap_size;
+	void __iomem *mmap;
+	int err;
+
+	err = c2_alloc_qpn(c2dev, qp);
+	if (err)
+		return err;
+	qp->ibqp.qp_num = qp->qpn;
+	qp->ibqp.qp_type = IB_QPT_RC;
+
+	/* Allocate the SQ and RQ shared pointers */
+	qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->sq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->sq_mq.shared) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->rq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->rq_mq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	/* Allocate the verbs request */
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	/* Initialize the work request */
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_QP_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.sq_cq_handle = send_cq->adapter_handle;
+	wr.rq_cq_handle = recv_cq->adapter_handle;
+	wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
+	wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
+	wr.srq_handle = 0;
+	wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
+			       QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
+	wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
+	wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
+	wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
+	wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
+	wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
+	wr.pd_id = pd->pd_id;
+	wr.user_context = (unsigned long) qp;
+
+	vq_req_get(c2dev, vq_req);
+
+	/* Send the WR to the adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail3;
+	}
+
+	/* Wait for the verb reply  */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail3;
+	}
+
+	/* Process the reply */
+	reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	if ((err = c2_wr_get_result(reply)) != 0) {
+		goto bail4;
+	}
+
+	/* Fill in the kernel QP struct */
+	atomic_set(&qp->refcount, 1);
+	qp->adapter_handle = reply->qp_handle;
+	qp->state = IB_QPS_RESET;
+	qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
+
+	/* Initialize the SQ MQ */
+	q_size = be32_to_cpu(reply->sq_depth);
+	msg_size = be32_to_cpu(reply->sq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail5;
+	}
+
+	c2_mq_req_init(&qp->sq_mq,
+		       be32_to_cpu(reply->sq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the RQ mq */
+	q_size = be32_to_cpu(reply->rq_depth);
+	msg_size = be32_to_cpu(reply->rq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail6;
+	}
+
+	c2_mq_req_init(&qp->rq_mq,
+		       be32_to_cpu(reply->rq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+      bail6:
+	iounmap(qp->sq_mq.peer);
+      bail5:
+	destroy_qp(c2dev, qp);
+      bail4:
+	vq_repbuf_free(c2dev, reply);
+      bail3:
+	vq_req_free(c2dev, vq_req);
+      bail2:
+	c2_free_mqsp(qp->rq_mq.shared);
+      bail1:
+	c2_free_mqsp(qp->sq_mq.shared);
+      bail0:
+	c2_free_qpn(c2dev, qp->qpn);
+	return err;
+}
+
+static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
+{
+	if (send_cq == recv_cq)
+		spin_lock_irq(&send_cq->lock);
+	else if (send_cq > recv_cq) {
+		spin_lock_irq(&send_cq->lock);
+		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
+	} else {
+		spin_lock_irq(&recv_cq->lock);
+		spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
+	}
+}
+
+static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
+{
+	if (send_cq == recv_cq)
+		spin_unlock_irq(&send_cq->lock);
+	else if (send_cq > recv_cq) {
+		spin_unlock(&recv_cq->lock);
+		spin_unlock_irq(&send_cq->lock);
+	} else {
+		spin_unlock(&send_cq->lock);
+		spin_unlock_irq(&recv_cq->lock);
+	}
+}
+
+void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_cq *send_cq;
+	struct c2_cq *recv_cq;
+
+	send_cq = to_c2cq(qp->ibqp.send_cq);
+	recv_cq = to_c2cq(qp->ibqp.recv_cq);
+
+	/*
+	 * Lock CQs here, so that CQ polling code can do QP lookup
+	 * without taking a lock.
+	 */
+	c2_lock_cqs(send_cq, recv_cq);
+	c2_free_qpn(c2dev, qp->qpn);
+	c2_unlock_cqs(send_cq, recv_cq);
+
+	/*
+	 * Destory qp in the rnic...
+	 */
+	destroy_qp(c2dev, qp);
+
+	/*
+	 * Mark any unreaped CQEs as null and void.
+	 */
+	c2_cq_clean(c2dev, qp, send_cq->cqn);
+	if (send_cq != recv_cq)
+		c2_cq_clean(c2dev, qp, recv_cq->cqn);
+	/*
+	 * Unmap the MQs and return the shared pointers
+	 * to the message pool.
+	 */
+	iounmap(qp->sq_mq.peer);
+	iounmap(qp->rq_mq.peer);
+	c2_free_mqsp(qp->sq_mq.shared);
+	c2_free_mqsp(qp->rq_mq.shared);
+
+	atomic_dec(&qp->refcount);
+	wait_event(qp->wait, !atomic_read(&qp->refcount));
+}
+
+/*
+ * Function: move_sgl
+ *
+ * Description:
+ * Move an SGL from the user's work request struct into a CCIL Work Request
+ * message, swapping to WR byte order and ensure the total length doesn't
+ * overflow.
+ *
+ * IN:
+ * dst		- ptr to CCIL Work Request message SGL memory.
+ * src		- ptr to the consumers SGL memory.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int
+move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
+	 u8 * actual_count)
+{
+	u32 tot = 0;		/* running total */
+	u8 acount = 0;		/* running total non-0 len sge's */
+
+	while (count > 0) {
+		/*
+		 * If the addition of this SGE causes the
+		 * total SGL length to exceed 2^32-1, then
+		 * fail-n-bail.
+		 *
+		 * If the current total plus the next element length
+		 * wraps, then it will go negative and be less than the
+		 * current total...
+		 */
+		if ((tot + src->length) < tot) {
+			return -EINVAL;
+		}
+		/*
+		 * Bug: 1456 (as well as 1498 & 1643)
+		 * Skip over any sge's supplied with len=0
+		 */
+		if (src->length) {
+			tot += src->length;
+			dst->stag = cpu_to_be32(src->lkey);
+			dst->to = cpu_to_be64(src->addr);
+			dst->length = cpu_to_be32(src->length);
+			dst++;
+			acount++;
+		}
+		src++;
+		count--;
+	}
+
+	if (acount == 0) {
+		/*
+		 * Bug: 1476 (as well as 1498, 1456 and 1643)
+		 * Setup the SGL in the WR to make it easier for the RNIC.
+		 * This way, the FW doesn't have to deal with special cases.
+		 * Setting length=0 should be sufficient.
+		 */
+		dst->stag = 0;
+		dst->to = 0;
+		dst->length = 0;
+	}
+
+	*p_len = tot;
+	*actual_count = acount;
+	return 0;
+}
+
+/*
+ * Function: c2_activity (private function)
+ *
+ * Description:
+ * Post an mq index to the host->adapter activity fifo.
+ *
+ * IN:
+ * c2dev	- ptr to c2dev structure
+ * mq_index	- mq index to post
+ * shared	- value most recently written to shared
+ *
+ * OUT:
+ *
+ * Return:
+ * none
+ */
+static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
+{
+	/*
+	 * First read the register to see if the FIFO is full, and if so,
+	 * spin until it's not.  This isn't perfect -- there is no
+	 * synchronization among the clients of the register, but in
+	 * practice it prevents multiple CPU from hammering the bus
+	 * with PCI RETRY. Note that when this does happen, the card
+	 * cannot get on the bus and the card and system hang in a
+	 * deadlock -- thus the need for this code. [TOT]
+	 */
+	while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000)
+		udelay(10);
+
+	__raw_writel(C2_HINT_MAKE(mq_index, shared),
+		     c2dev->regs + PCI_BAR0_ADAPTER_HINT);
+}
+
+/*
+ * Function: qp_wr_post
+ *
+ * Description:
+ * This in-line function allocates a MQ msg, then moves the host-copy of
+ * the completed WR into msg.  Then it posts the message.
+ *
+ * IN:
+ * q		- ptr to user MQ.
+ * wr		- ptr to host-copy of the WR.
+ * qp		- ptr to user qp
+ * size		- Number of bytes to post.  Assumed to be divisible by 4.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
+{
+	union c2wr *msg;
+
+	msg = c2_mq_alloc(q);
+	if (msg == NULL) {
+		return -EINVAL;
+	}
+#ifdef CCMSGMAGIC
+	((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+
+	/*
+	 * Since all header fields in the WR are the same as the
+	 * CQE, set the following so the adapter need not.
+	 */
+	c2_wr_set_result(wr, CCERR_PENDING);
+
+	/*
+	 * Copy the wr down to the adapter
+	 */
+	memcpy((void *) msg, (void *) wr, size);
+
+	c2_mq_produce(q);
+	return 0;
+}
+
+
+int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+		 struct ib_send_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	unsigned long lock_flags;
+	int err = 0;
+
+	u32 flags;
+	u32 tot_len;
+	u8 actual_sge_count;
+	u32 msg_size;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	while (ib_wr) {
+
+		flags = 0;
+		wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		if (ib_wr->send_flags & IB_SEND_SIGNALED) {
+			flags |= SQ_SIGNALED;
+		}
+
+		switch (ib_wr->opcode) {
+		case IB_WR_SEND:
+			if (ib_wr->send_flags & IB_SEND_SOLICITED) {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
+				msg_size = sizeof(struct c2wr_send_req);
+			} else {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
+				msg_size = sizeof(struct c2wr_send_req);
+			}
+
+			wr.sqwr.send.remote_stag = 0;
+			msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge;
+			if (ib_wr->num_sge > qp->send_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_WRITE:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
+			msg_size = sizeof(struct c2wr_rdma_write_req) +
+			    (sizeof(struct c2_data_addr) * ib_wr->num_sge);
+			if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			wr.sqwr.rdma_write.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_write.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			err = move_sgl((struct c2_data_addr *)
+				       & (wr.sqwr.rdma_write.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_READ:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
+			msg_size = sizeof(struct c2wr_rdma_read_req);
+
+			/* IWarp only suppots 1 sge for RDMA reads */
+			if (ib_wr->num_sge > 1) {
+				err = -EINVAL;
+				break;
+			}
+
+			/*
+			 * Move the local and remote stag/to/len into the WR.
+			 */
+			wr.sqwr.rdma_read.local_stag =
+			    cpu_to_be32(ib_wr->sg_list->lkey);
+			wr.sqwr.rdma_read.local_to =
+			    cpu_to_be64(ib_wr->sg_list->addr);
+			wr.sqwr.rdma_read.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_read.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			wr.sqwr.rdma_read.length =
+			    cpu_to_be32(ib_wr->sg_list->length);
+			break;
+		default:
+			/* error */
+			msg_size = 0;
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Store flags
+		 */
+		c2_wr_set_flags(&wr, flags);
+
+		/*
+		 * Post the puppy!
+		 */
+		spin_lock_irqsave(&qp->lock, lock_flags);
+		err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
+		if (err) {
+			spin_unlock_irqrestore(&qp->lock, lock_flags);
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO.
+		 */
+		c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
+		spin_unlock_irqrestore(&qp->lock, lock_flags);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+		    struct ib_recv_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	unsigned long lock_flags;
+	int err = 0;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	/*
+	 * Try and post each work request
+	 */
+	while (ib_wr) {
+		u32 tot_len;
+		u8 actual_sge_count;
+
+		if (ib_wr->num_sge > qp->recv_sgl_depth) {
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * Create local host-copy of the WR
+		 */
+		wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		c2_wr_set_id(&wr, CCWR_RECV);
+		c2_wr_set_flags(&wr, 0);
+
+		/* sge_count is limited to eight bits. */
+		BUG_ON(ib_wr->num_sge >= 256);
+		err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
+			       ib_wr->sg_list,
+			       ib_wr->num_sge, &tot_len, &actual_sge_count);
+		c2_wr_set_sge_count(&wr, actual_sge_count);
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		spin_lock_irqsave(&qp->lock, lock_flags);
+		err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
+		if (err) {
+			spin_unlock_irqrestore(&qp->lock, lock_flags);
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO
+		 */
+		c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
+		spin_unlock_irqrestore(&qp->lock, lock_flags);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+void __devinit c2_init_qp_table(struct c2_dev *c2dev)
+{
+	spin_lock_init(&c2dev->qp_table.lock);
+	idr_init(&c2dev->qp_table.idr);
+}
+
+void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
+{
+	idr_destroy(&c2dev->qp_table.idr);
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -0,0 +1,654 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/inet.h>
+#include <linux/vmalloc.h>
+
+#include <linux/route.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_vq.h"
+
+/* Device capabilities */
+#define C2_MIN_PAGESIZE  1024
+
+#define C2_MAX_MRS       32768
+#define C2_MAX_QPS       16000
+#define C2_MAX_WQE_SZ    256
+#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
+#define C2_MAX_SGES      4
+#define C2_MAX_SGE_RD    1
+#define C2_MAX_CQS       32768
+#define C2_MAX_CQES      4096
+#define C2_MAX_PDS       16384
+
+/*
+ * Send the adapter INIT message to the amso1100
+ */
+static int c2_adapter_init(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+	int err;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_INIT);
+	wr.hdr.context = 0;
+	wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
+	wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
+	wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
+	wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
+	wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
+	wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
+
+	/* Post the init message */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+
+	return err;
+}
+
+/*
+ * Send the adapter TERM message to the amso1100
+ */
+static void c2_adapter_term(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_TERM);
+	wr.hdr.context = 0;
+
+	/* Post the init message */
+	vq_send_wr(c2dev, (union c2wr *) & wr);
+	c2dev->init = 0;
+
+	return;
+}
+
+/*
+ * Query the adapter
+ */
+static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_query_req wr;
+	struct c2wr_rnic_query_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply)
+		err = -ENOMEM;
+	else
+		err = c2_errno(reply);
+	if (err)
+		goto bail2;
+
+	props->fw_ver =
+		((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
+		((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) |
+		(be32_to_cpu(reply->fw_ver_patch) & 0xFFFF);
+	memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
+	props->max_mr_size         = 0xFFFFFFFF;
+	props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
+	props->vendor_id           = be32_to_cpu(reply->vendor_id);
+	props->vendor_part_id      = be32_to_cpu(reply->part_number);
+	props->hw_ver              = be32_to_cpu(reply->hw_version);
+	props->max_qp              = be32_to_cpu(reply->max_qps);
+	props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
+	props->device_cap_flags    = c2dev->device_cap_flags;
+	props->max_sge             = C2_MAX_SGES;
+	props->max_sge_rd          = C2_MAX_SGE_RD;
+	props->max_cq              = be32_to_cpu(reply->max_cqs);
+	props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
+	props->max_mr              = be32_to_cpu(reply->max_mrs);
+	props->max_pd              = be32_to_cpu(reply->max_pds);
+	props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
+	props->max_ee_rd_atom      = 0;
+	props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
+	props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
+	props->max_ee_init_rd_atom = 0;
+	props->atomic_cap          = IB_ATOMIC_NONE;
+	props->max_ee              = 0;
+	props->max_rdd             = 0;
+	props->max_mw              = be32_to_cpu(reply->max_mws);
+	props->max_raw_ipv6_qp     = 0;
+	props->max_raw_ethy_qp     = 0;
+	props->max_mcast_grp       = 0;
+	props->max_mcast_qp_attach = 0;
+	props->max_total_mcast_qp_attach = 0;
+	props->max_ah              = 0;
+	props->max_fmr             = 0;
+	props->max_map_per_fmr     = 0;
+	props->max_srq             = 0;
+	props->max_srq_wr          = 0;
+	props->max_srq_sge         = 0;
+	props->max_pkeys           = 0;
+	props->local_ca_ack_delay  = 0;
+
+ bail2:
+	vq_repbuf_free(c2dev, reply);
+
+ bail1:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Add an IP address to the RNIC interface
+ */
+int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Delete an IP address from the RNIC interface
+ */
+int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Open a single RNIC instance to use with all
+ * low level openib calls
+ */
+static int c2_rnic_open(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_open_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
+	wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
+	wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
+	wr.rnic_open.req.port_num = cpu_to_be16(0);
+	wr.rnic_open.req.user_context = (unsigned long) c2dev;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = reply->rnic_handle;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Close the RNIC instance
+ */
+static int c2_rnic_close(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_close_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
+	wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
+	wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = 0;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Called by c2_probe to initialize the RNIC. This principally
+ * involves initalizing the various limits and resouce pools that
+ * comprise the RNIC instance.
+ */
+int __devinit c2_rnic_init(struct c2_dev *c2dev)
+{
+	int err;
+	u32 qsize, msgsize;
+	void *q1_pages;
+	void *q2_pages;
+	void __iomem *mmio_regs;
+
+	/* Device capabilities */
+	c2dev->device_cap_flags =
+	    (IB_DEVICE_RESIZE_MAX_WR |
+	     IB_DEVICE_CURR_QP_STATE_MOD |
+	     IB_DEVICE_SYS_IMAGE_GUID |
+	     IB_DEVICE_ZERO_STAG |
+	     IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+
+	/* Allocate the qptr_array */
+	c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
+	if (!c2dev->qptr_array) {
+		return -ENOMEM;
+	}
+
+	/* Inialize the qptr_array */
+	memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
+	c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
+	c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
+	c2dev->qptr_array[2] = (void *) &c2dev->aeq;
+
+	/* Initialize data structures */
+	init_waitqueue_head(&c2dev->req_vq_wo);
+	spin_lock_init(&c2dev->vqlock);
+	spin_lock_init(&c2dev->lock);
+
+	/* Allocate MQ shared pointer pool for kernel clients. User
+	 * mode client pools are hung off the user context
+	 */
+	err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
+	if (err) {
+		goto bail0;
+	}
+
+	/* Allocate shared pointers for Q0, Q1, and Q2 from
+	 * the shared pointer pool.
+	 */
+
+	c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->hint_count_dma,
+					     GFP_KERNEL);
+	c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->req_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->rep_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					  &c2dev->aeq.shared_dma, GFP_KERNEL);
+	if (!c2dev->hint_count || !c2dev->req_vq.shared ||
+	    !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	mmio_regs = c2dev->kva;
+	/* Initialize the Verbs Request Queue */
+	c2_mq_req_init(&c2dev->req_vq, 0,
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)),
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)),
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the Verbs Reply Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
+	q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
+				      &c2dev->rep_vq.host_dma, GFP_KERNEL);
+	if (!q1_pages) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
+	pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
+		 (unsigned long long) c2dev->rep_vq.host_dma);
+	c2_mq_rep_init(&c2dev->rep_vq,
+		   1,
+		   qsize,
+		   msgsize,
+		   q1_pages,
+		   mmio_regs +
+		   be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)),
+		   C2_MQ_HOST_TARGET);
+
+	/* Initialize the Asynchronus Event Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
+	q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
+				      &c2dev->aeq.host_dma, GFP_KERNEL);
+	if (!q2_pages) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+	pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
+	pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q2_pages,
+		 (unsigned long long) c2dev->aeq.host_dma);
+	c2_mq_rep_init(&c2dev->aeq,
+		       2,
+		       qsize,
+		       msgsize,
+		       q2_pages,
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)),
+		       C2_MQ_HOST_TARGET);
+
+	/* Initialize the verbs request allocator */
+	err = vq_init(c2dev);
+	if (err)
+		goto bail3;
+
+	/* Enable interrupts on the adapter */
+	writel(0, c2dev->regs + C2_IDIS);
+
+	/* create the WR init message */
+	err = c2_adapter_init(c2dev);
+	if (err)
+		goto bail4;
+	c2dev->init++;
+
+	/* open an adapter instance */
+	err = c2_rnic_open(c2dev);
+	if (err)
+		goto bail4;
+
+	/* Initialize cached the adapter limits */
+	if (c2_rnic_query(c2dev, &c2dev->props))
+		goto bail5;
+
+	/* Initialize the PD pool */
+	err = c2_init_pd_table(c2dev);
+	if (err)
+		goto bail5;
+
+	/* Initialize the QP pool */
+	c2_init_qp_table(c2dev);
+	return 0;
+
+      bail5:
+	c2_rnic_close(c2dev);
+      bail4:
+	vq_term(c2dev);
+      bail3:
+	dma_free_coherent(&c2dev->pcidev->dev,
+			  c2dev->aeq.q_size * c2dev->aeq.msg_size,
+			  q2_pages, pci_unmap_addr(&c2dev->aeq, mapping));
+      bail2:
+	dma_free_coherent(&c2dev->pcidev->dev,
+			  c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+			  q1_pages, pci_unmap_addr(&c2dev->rep_vq, mapping));
+      bail1:
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+      bail0:
+	vfree(c2dev->qptr_array);
+
+	return err;
+}
+
+/*
+ * Called by c2_remove to cleanup the RNIC resources.
+ */
+void __devexit c2_rnic_term(struct c2_dev *c2dev)
+{
+
+	/* Close the open adapter instance */
+	c2_rnic_close(c2dev);
+
+	/* Send the TERM message to the adapter */
+	c2_adapter_term(c2dev);
+
+	/* Disable interrupts on the adapter */
+	writel(1, c2dev->regs + C2_IDIS);
+
+	/* Free the QP pool */
+	c2_cleanup_qp_table(c2dev);
+
+	/* Free the PD pool */
+	c2_cleanup_pd_table(c2dev);
+
+	/* Free the verbs request allocator */
+	vq_term(c2dev);
+
+	/* Free the asynchronus event queue */
+	dma_free_coherent(&c2dev->pcidev->dev,
+			  c2dev->aeq.q_size * c2dev->aeq.msg_size,
+			  c2dev->aeq.msg_pool.host,
+			  pci_unmap_addr(&c2dev->aeq, mapping));
+
+	/* Free the verbs reply queue */
+	dma_free_coherent(&c2dev->pcidev->dev,
+			  c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+			  c2dev->rep_vq.msg_pool.host,
+			  pci_unmap_addr(&c2dev->rep_vq, mapping));
+
+	/* Free the MQ shared pointer pool */
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+
+	/* Free the qptr_array */
+	vfree(c2dev->qptr_array);
+
+	return;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_status.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_status.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef	_C2_STATUS_H_
+#define _C2_STATUS_H_
+
+/*
+ * Verbs Status Codes
+ */
+enum c2_status {
+	C2_OK = 0,		/* This must be zero */
+	CCERR_INSUFFICIENT_RESOURCES = 1,
+	CCERR_INVALID_MODIFIER = 2,
+	CCERR_INVALID_MODE = 3,
+	CCERR_IN_USE = 4,
+	CCERR_INVALID_RNIC = 5,
+	CCERR_INTERRUPTED_OPERATION = 6,
+	CCERR_INVALID_EH = 7,
+	CCERR_INVALID_CQ = 8,
+	CCERR_CQ_EMPTY = 9,
+	CCERR_NOT_IMPLEMENTED = 10,
+	CCERR_CQ_DEPTH_TOO_SMALL = 11,
+	CCERR_PD_IN_USE = 12,
+	CCERR_INVALID_PD = 13,
+	CCERR_INVALID_SRQ = 14,
+	CCERR_INVALID_ADDRESS = 15,
+	CCERR_INVALID_NETMASK = 16,
+	CCERR_INVALID_QP = 17,
+	CCERR_INVALID_QP_STATE = 18,
+	CCERR_TOO_MANY_WRS_POSTED = 19,
+	CCERR_INVALID_WR_TYPE = 20,
+	CCERR_INVALID_SGL_LENGTH = 21,
+	CCERR_INVALID_SQ_DEPTH = 22,
+	CCERR_INVALID_RQ_DEPTH = 23,
+	CCERR_INVALID_ORD = 24,
+	CCERR_INVALID_IRD = 25,
+	CCERR_QP_ATTR_CANNOT_CHANGE = 26,
+	CCERR_INVALID_STAG = 27,
+	CCERR_QP_IN_USE = 28,
+	CCERR_OUTSTANDING_WRS = 29,
+	CCERR_STAG_IN_USE = 30,
+	CCERR_INVALID_STAG_INDEX = 31,
+	CCERR_INVALID_SGL_FORMAT = 32,
+	CCERR_ADAPTER_TIMEOUT = 33,
+	CCERR_INVALID_CQ_DEPTH = 34,
+	CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
+	CCERR_INVALID_EP = 36,
+	CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
+	CCERR_FLUSHED = 38,
+	CCERR_INVALID_WQE = 39,
+	CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
+	CCERR_REMOTE_TERMINATION_ERROR = 41,
+	CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
+	CCERR_ACCESS_VIOLATION = 43,
+	CCERR_INVALID_PD_ID = 44,
+	CCERR_WRAP_ERROR = 45,
+	CCERR_INV_STAG_ACCESS_ERROR = 46,
+	CCERR_ZERO_RDMA_READ_RESOURCES = 47,
+	CCERR_QP_NOT_PRIVILEGED = 48,
+	CCERR_STAG_STATE_NOT_INVALID = 49,
+	CCERR_INVALID_PAGE_SIZE = 50,
+	CCERR_INVALID_BUFFER_SIZE = 51,
+	CCERR_INVALID_PBE = 52,
+	CCERR_INVALID_FBO = 53,
+	CCERR_INVALID_LENGTH = 54,
+	CCERR_INVALID_ACCESS_RIGHTS = 55,
+	CCERR_PBL_TOO_BIG = 56,
+	CCERR_INVALID_VA = 57,
+	CCERR_INVALID_REGION = 58,
+	CCERR_INVALID_WINDOW = 59,
+	CCERR_TOTAL_LENGTH_TOO_BIG = 60,
+	CCERR_INVALID_QP_ID = 61,
+	CCERR_ADDR_IN_USE = 62,
+	CCERR_ADDR_NOT_AVAIL = 63,
+	CCERR_NET_DOWN = 64,
+	CCERR_NET_UNREACHABLE = 65,
+	CCERR_CONN_ABORTED = 66,
+	CCERR_CONN_RESET = 67,
+	CCERR_NO_BUFS = 68,
+	CCERR_CONN_TIMEDOUT = 69,
+	CCERR_CONN_REFUSED = 70,
+	CCERR_HOST_UNREACHABLE = 71,
+	CCERR_INVALID_SEND_SGL_DEPTH = 72,
+	CCERR_INVALID_RECV_SGL_DEPTH = 73,
+	CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
+	CCERR_INSUFFICIENT_PRIVILEGES = 75,
+	CCERR_STACK_ERROR = 76,
+	CCERR_INVALID_VERSION = 77,
+	CCERR_INVALID_MTU = 78,
+	CCERR_INVALID_IMAGE = 79,
+	CCERR_PENDING = 98,	/* not an error; user internally by adapter */
+	CCERR_DEFER = 99,	/* not an error; used internally by adapter */
+	CCERR_FAILED_WRITE = 100,
+	CCERR_FAILED_ERASE = 101,
+	CCERR_FAILED_VERIFICATION = 102,
+	CCERR_NOT_FOUND = 103,
+
+};
+
+/*
+ * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
+ */
+enum c2_connect_status {
+	C2_CONN_STATUS_SUCCESS = C2_OK,
+	C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
+	C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
+	C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
+	C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
+	C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
+	C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
+	C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
+	C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
+	C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
+	C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
+};
+
+/*
+ * Flash programming status codes.
+ */
+enum c2_flash_status {
+	C2_FLASH_STATUS_SUCCESS = 0x0000,
+	C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
+	C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
+	C2_FLASH_STATUS_ECLBS = 0x0400,
+	C2_FLASH_STATUS_PSLBS = 0x0800,
+	C2_FLASH_STATUS_VPENS = 0x1000,
+};
+
+#endif				/* _C2_STATUS_H_ */
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_user.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_user.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_USER_H
+#define C2_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct c2_alloc_ucontext_resp {
+	__u32 qp_tab_size;
+	__u32 uarc_size;
+};
+
+struct c2_alloc_pd_resp {
+	__u32 pdn;
+	__u32 reserved;
+};
+
+struct c2_create_cq {
+	__u32 lkey;
+	__u32 pdn;
+	__u64 arm_db_page;
+	__u64 set_db_page;
+	__u32 arm_db_index;
+	__u32 set_db_index;
+};
+
+struct c2_create_cq_resp {
+	__u32 cqn;
+	__u32 reserved;
+};
+
+struct c2_create_qp {
+	__u32 lkey;
+	__u32 reserved;
+	__u64 sq_db_page;
+	__u64 rq_db_page;
+	__u32 sq_db_index;
+	__u32 rq_db_index;
+};
+
+#endif				/* C2_USER_H */
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_vq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_vq.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "c2_vq.h"
+#include "c2_provider.h"
+
+/*
+ * Verbs Request Objects:
+ *
+ * VQ Request Objects are allocated by the kernel verbs handlers.
+ * They contain a wait object, a refcnt, an atomic bool indicating that the
+ * adapter has replied, and a copy of the verb reply work request.
+ * A pointer to the VQ Request Object is passed down in the context
+ * field of the work request message, and reflected back by the adapter
+ * in the verbs reply message.  The function handle_vq() in the interrupt
+ * path will use this pointer to:
+ * 	1) append a copy of the verbs reply message
+ * 	2) mark that the reply is ready
+ * 	3) wake up the kernel verbs handler blocked awaiting the reply.
+ *
+ *
+ * The kernel verbs handlers do a "get" to put a 2nd reference on the
+ * VQ Request object.  If the kernel verbs handler exits before the adapter
+ * can respond, this extra reference will keep the VQ Request object around
+ * until the adapter's reply can be processed.  The reason we need this is
+ * because a pointer to this object is stuffed into the context field of
+ * the verbs work request message, and reflected back in the reply message.
+ * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
+ * kernel verb handler that is blocked awaiting the verb reply.
+ * So handle_vq() will do a "put" on the object when it's done accessing it.
+ * NOTE:  If we guarantee that the kernel verb handler will never bail before
+ *        getting the reply, then we don't need these refcnts.
+ *
+ *
+ * VQ Request objects are freed by the kernel verbs handlers only
+ * after the verb has been processed, or when the adapter fails and
+ * does not reply.
+ *
+ *
+ * Verbs Reply Buffers:
+ *
+ * VQ Reply bufs are local host memory copies of a
+ * outstanding Verb Request reply
+ * message.  The are always allocated by the kernel verbs handlers, and _may_ be
+ * freed by either the kernel verbs handler -or- the interrupt handler.  The
+ * kernel verbs handler _must_ free the repbuf, then free the vq request object
+ * in that order.
+ */
+
+int vq_init(struct c2_dev *c2dev)
+{
+	sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
+		(char) ('0' + c2dev->devnum));
+	c2dev->host_msg_cache =
+	    kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
+			      SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (c2dev->host_msg_cache == NULL) {
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void vq_term(struct c2_dev *c2dev)
+{
+	kmem_cache_destroy(c2dev->host_msg_cache);
+}
+
+/* vq_req_alloc - allocate a VQ Request Object and initialize it.
+ * The refcnt is set to 1.
+ */
+struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *r;
+
+	r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
+	if (r) {
+		init_waitqueue_head(&r->wait_object);
+		r->reply_msg = (u64) NULL;
+		r->event = 0;
+		r->cm_id = NULL;
+		r->qp = NULL;
+		atomic_set(&r->refcnt, 1);
+		atomic_set(&r->reply_ready, 0);
+	}
+	return r;
+}
+
+
+/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
+ * has already free the VQ Reply Buffer if it existed.
+ */
+void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	r->reply_msg = (u64) NULL;
+	if (atomic_dec_and_test(&r->refcnt)) {
+		kfree(r);
+	}
+}
+
+/* vq_req_get - reference a VQ Request Object.  Done
+ * only in the kernel verbs handlers.
+ */
+void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	atomic_inc(&r->refcnt);
+}
+
+
+/* vq_req_put - dereference and potentially free a VQ Request Object.
+ *
+ * This is only called by handle_vq() on the
+ * interrupt when it is done processing
+ * a verb reply message.  If the associated
+ * kernel verbs handler has already bailed,
+ * then this put will actually free the VQ
+ * Request object _and_ the VQ Reply Buffer
+ * if it exists.
+ */
+void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	if (atomic_dec_and_test(&r->refcnt)) {
+		if (r->reply_msg != (u64) NULL)
+			vq_repbuf_free(c2dev,
+				       (void *) (unsigned long) r->reply_msg);
+		kfree(r);
+	}
+}
+
+
+/*
+ * vq_repbuf_alloc - allocate a VQ Reply Buffer.
+ */
+void *vq_repbuf_alloc(struct c2_dev *c2dev)
+{
+	return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC);
+}
+
+/*
+ * vq_send_wr - post a verbs request message to the Verbs Request Queue.
+ * If a message is not available in the MQ, then block until one is available.
+ * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
+ * When the adapter drains the Verbs Request Queue,
+ * it inserts MQ index 0 in to the
+ * adapter->host activity fifo and interrupts the host.
+ */
+int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
+{
+	void *msg;
+	wait_queue_t __wait;
+
+	/*
+	 * grab adapter vq lock
+	 */
+	spin_lock(&c2dev->vqlock);
+
+	/*
+	 * allocate msg
+	 */
+	msg = c2_mq_alloc(&c2dev->req_vq);
+
+	/*
+	 * If we cannot get a msg, then we'll wait
+	 * When a messages are available, the int handler will wake_up()
+	 * any waiters.
+	 */
+	while (msg == NULL) {
+		pr_debug("%s:%d no available msg in VQ, waiting...\n",
+		       __FUNCTION__, __LINE__);
+		init_waitqueue_entry(&__wait, current);
+		add_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_unlock(&c2dev->vqlock);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!c2_mq_full(&c2dev->req_vq)) {
+				break;
+			}
+			if (!signal_pending(current)) {
+				schedule_timeout(1 * HZ);	/* 1 second... */
+				continue;
+			}
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+			return -EINTR;
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_lock(&c2dev->vqlock);
+		msg = c2_mq_alloc(&c2dev->req_vq);
+	}
+
+	/*
+	 * copy wr into adapter msg
+	 */
+	memcpy(msg, wr, c2dev->req_vq.msg_size);
+
+	/*
+	 * post msg
+	 */
+	c2_mq_produce(&c2dev->req_vq);
+
+	/*
+	 * release adapter vq lock
+	 */
+	spin_unlock(&c2dev->vqlock);
+	return 0;
+}
+
+
+/*
+ * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
+ */
+int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
+{
+	if (!wait_event_timeout(req->wait_object,
+				atomic_read(&req->reply_ready),
+				60*HZ))
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/*
+ * vq_repbuf_free - Free a Verbs Reply Buffer.
+ */
+void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
+{
+	kmem_cache_free(c2dev->host_msg_cache, reply);
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_vq.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_vq.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_VQ_H_
+#define _C2_VQ_H_
+#include <linux/sched.h>
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_provider.h"
+
+struct c2_vq_req {
+	u64 reply_msg;		/* ptr to reply msg */
+	wait_queue_head_t wait_object;	/* wait object for vq reqs */
+	atomic_t reply_ready;	/* set when reply is ready */
+	atomic_t refcnt;	/* used to cancel WRs... */
+	int event;
+	struct iw_cm_id *cm_id;
+	struct c2_qp *qp;
+};
+
+extern int vq_init(struct c2_dev *c2dev);
+extern void vq_term(struct c2_dev *c2dev);
+
+extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
+extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
+
+extern void *vq_repbuf_alloc(struct c2_dev *c2dev);
+extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
+
+extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
+#endif				/* _C2_VQ_H_ */
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/c2_wr.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/c2_wr.h
@@ -0,0 +1,1520 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_WR_H_
+#define _C2_WR_H_
+
+#ifdef CCDEBUG
+#define CCWR_MAGIC		0xb07700b0
+#endif
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+/* Maximum allowed size in bytes of private_data exchange
+ * on connect.
+ */
+#define C2_MAX_PRIVATE_DATA_SIZE 200
+
+/*
+ * These types are shared among the adapter, host, and CCIL consumer.
+ */
+enum c2_cq_notification_type {
+	C2_CQ_NOTIFICATION_TYPE_NONE = 1,
+	C2_CQ_NOTIFICATION_TYPE_NEXT,
+	C2_CQ_NOTIFICATION_TYPE_NEXT_SE
+};
+
+enum c2_setconfig_cmd {
+	C2_CFG_ADD_ADDR = 1,
+	C2_CFG_DEL_ADDR = 2,
+	C2_CFG_ADD_ROUTE = 3,
+	C2_CFG_DEL_ROUTE = 4
+};
+
+enum c2_getconfig_cmd {
+	C2_GETCONFIG_ROUTES = 1,
+	C2_GETCONFIG_ADDRS
+};
+
+/*
+ *  CCIL Work Request Identifiers
+ */
+enum c2wr_ids {
+	CCWR_RNIC_OPEN = 1,
+	CCWR_RNIC_QUERY,
+	CCWR_RNIC_SETCONFIG,
+	CCWR_RNIC_GETCONFIG,
+	CCWR_RNIC_CLOSE,
+	CCWR_CQ_CREATE,
+	CCWR_CQ_QUERY,
+	CCWR_CQ_MODIFY,
+	CCWR_CQ_DESTROY,
+	CCWR_QP_CONNECT,
+	CCWR_PD_ALLOC,
+	CCWR_PD_DEALLOC,
+	CCWR_SRQ_CREATE,
+	CCWR_SRQ_QUERY,
+	CCWR_SRQ_MODIFY,
+	CCWR_SRQ_DESTROY,
+	CCWR_QP_CREATE,
+	CCWR_QP_QUERY,
+	CCWR_QP_MODIFY,
+	CCWR_QP_DESTROY,
+	CCWR_NSMR_STAG_ALLOC,
+	CCWR_NSMR_REGISTER,
+	CCWR_NSMR_PBL,
+	CCWR_STAG_DEALLOC,
+	CCWR_NSMR_REREGISTER,
+	CCWR_SMR_REGISTER,
+	CCWR_MR_QUERY,
+	CCWR_MW_ALLOC,
+	CCWR_MW_QUERY,
+	CCWR_EP_CREATE,
+	CCWR_EP_GETOPT,
+	CCWR_EP_SETOPT,
+	CCWR_EP_DESTROY,
+	CCWR_EP_BIND,
+	CCWR_EP_CONNECT,
+	CCWR_EP_LISTEN,
+	CCWR_EP_SHUTDOWN,
+	CCWR_EP_LISTEN_CREATE,
+	CCWR_EP_LISTEN_DESTROY,
+	CCWR_EP_QUERY,
+	CCWR_CR_ACCEPT,
+	CCWR_CR_REJECT,
+	CCWR_CONSOLE,
+	CCWR_TERM,
+	CCWR_FLASH_INIT,
+	CCWR_FLASH,
+	CCWR_BUF_ALLOC,
+	CCWR_BUF_FREE,
+	CCWR_FLASH_WRITE,
+	CCWR_INIT,		/* WARNING: Don't move this ever again! */
+
+
+
+	/* Add new IDs here */
+
+
+
+	/*
+	 * WARNING: CCWR_LAST must always be the last verbs id defined!
+	 *          All the preceding IDs are fixed, and must not change.
+	 *          You can add new IDs, but must not remove or reorder
+	 *          any IDs. If you do, YOU will ruin any hope of
+	 *          compatability between versions.
+	 */
+	CCWR_LAST,
+
+	/*
+	 * Start over at 1 so that arrays indexed by user wr id's
+	 * begin at 1.  This is OK since the verbs and user wr id's
+	 * are always used on disjoint sets of queues.
+	 */
+	/*
+	 * The order of the CCWR_SEND_XX verbs must
+	 * match the order of the RDMA_OPs
+	 */
+	CCWR_SEND = 1,
+	CCWR_SEND_INV,
+	CCWR_SEND_SE,
+	CCWR_SEND_SE_INV,
+	CCWR_RDMA_WRITE,
+	CCWR_RDMA_READ,
+	CCWR_RDMA_READ_INV,
+	CCWR_MW_BIND,
+	CCWR_NSMR_FASTREG,
+	CCWR_STAG_INVALIDATE,
+	CCWR_RECV,
+	CCWR_NOP,
+	CCWR_UNIMPL,
+/* WARNING: This must always be the last user wr id defined! */
+};
+#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
+
+/*
+ * SQ/RQ Work Request Types
+ */
+enum c2_wr_type {
+	C2_WR_TYPE_SEND = CCWR_SEND,
+	C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
+	C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
+	C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
+	C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
+	C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
+	C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
+	C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
+	C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
+	C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
+	C2_WR_TYPE_RECV = CCWR_RECV,
+	C2_WR_TYPE_NOP = CCWR_NOP,
+};
+
+struct c2_netaddr {
+	u32 ip_addr;
+	u32 netmask;
+	u32 mtu;
+};
+
+struct c2_route {
+	u32 ip_addr;		/* 0 indicates the default route */
+	u32 netmask;		/* netmask associated with dst */
+	u32 flags;
+	union {
+		u32 ipaddr;	/* address of the nexthop interface */
+		u8 enaddr[6];
+	} nexthop;
+};
+
+/*
+ * A Scatter Gather Entry.
+ */
+struct c2_data_addr {
+	u32 stag;
+	u32 length;
+	u64 to;
+};
+
+/*
+ * MR and MW flags used by the consumer, RI, and RNIC.
+ */
+enum c2_mm_flags {
+	MEM_REMOTE = 0x0001,	/* allow mw binds with remote access. */
+	MEM_VA_BASED = 0x0002,	/* Not Zero-based */
+	MEM_PBL_COMPLETE = 0x0004,	/* PBL array is complete in this msg */
+	MEM_LOCAL_READ = 0x0008,	/* allow local reads */
+	MEM_LOCAL_WRITE = 0x0010,	/* allow local writes */
+	MEM_REMOTE_READ = 0x0020,	/* allow remote reads */
+	MEM_REMOTE_WRITE = 0x0040,	/* allow remote writes */
+	MEM_WINDOW_BIND = 0x0080,	/* binds allowed */
+	MEM_SHARED = 0x0100,	/* set if MR is shared */
+	MEM_STAG_VALID = 0x0200	/* set if STAG is in valid state */
+};
+
+/*
+ * CCIL API ACF flags defined in terms of the low level mem flags.
+ * This minimizes translation needed in the user API
+ */
+enum c2_acf {
+	C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
+	C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
+	C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
+	C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
+	C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
+};
+
+/*
+ * Image types of objects written to flash
+ */
+#define C2_FLASH_IMG_BITFILE 1
+#define C2_FLASH_IMG_OPTION_ROM 2
+#define C2_FLASH_IMG_VPD 3
+
+/*
+ *  to fix bug 1815 we define the max size allowable of the
+ *  terminate message (per the IETF spec).Refer to the IETF
+ *  protocal specification, section 12.1.6, page 64)
+ *  The message is prefixed by 20 types of DDP info.
+ *
+ *  Then the message has 6 bytes for the terminate control
+ *  and DDP segment length info plus a DDP header (either
+ *  14 or 18 byts) plus 28 bytes for the RDMA header.
+ *  Thus the max size in:
+ *  20 + (6 + 18 + 28) = 72
+ */
+#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
+
+/*
+ * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
+ */
+#define WR_BUILD_STR_LEN 64
+
+/*
+ * WARNING:  All of these structs need to align any 64bit types on
+ * 64 bit boundaries!  64bit types include u64 and u64.
+ */
+
+/*
+ * Clustercore Work Request Header.  Be sensitive to field layout
+ * and alignment.
+ */
+struct c2wr_hdr {
+	/* wqe_count is part of the cqe.  It is put here so the
+	 * adapter can write to it while the wr is pending without
+	 * clobbering part of the wr.  This word need not be dma'd
+	 * from the host to adapter by libccil, but we copy it anyway
+	 * to make the memcpy to the adapter better aligned.
+	 */
+	u32 wqe_count;
+
+	/* Put these fields next so that later 32- and 64-bit
+	 * quantities are naturally aligned.
+	 */
+	u8 id;
+	u8 result;		/* adapter -> host */
+	u8 sge_count;		/* host -> adapter */
+	u8 flags;		/* host -> adapter */
+
+	u64 context;
+#ifdef CCMSGMAGIC
+	u32 magic;
+	u32 pad;
+#endif
+} __attribute__((packed));
+
+/*
+ *------------------------ RNIC ------------------------
+ */
+
+/*
+ * WR_RNIC_OPEN
+ */
+
+/*
+ * Flags for the RNIC WRs
+ */
+enum c2_rnic_flags {
+	RNIC_IRD_STATIC = 0x0001,
+	RNIC_ORD_STATIC = 0x0002,
+	RNIC_QP_STATIC = 0x0004,
+	RNIC_SRQ_SUPPORTED = 0x0008,
+	RNIC_PBL_BLOCK_MODE = 0x0010,
+	RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
+	RNIC_CQ_OVF_DETECTED = 0x0040,
+	RNIC_PRIV_MODE = 0x0080
+};
+
+struct c2wr_rnic_open_req {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u16 flags;		/* See enum c2_rnic_flags */
+	u16 port_num;
+} __attribute__((packed));
+
+struct c2wr_rnic_open_rep {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+union c2wr_rnic_open {
+	struct c2wr_rnic_open_req req;
+	struct c2wr_rnic_open_rep rep;
+} __attribute__((packed));
+
+struct c2wr_rnic_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_QUERY
+ */
+struct c2wr_rnic_query_rep {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u32 vendor_id;
+	u32 part_number;
+	u32 hw_version;
+	u32 fw_ver_major;
+	u32 fw_ver_minor;
+	u32 fw_ver_patch;
+	char fw_ver_build_str[WR_BUILD_STR_LEN];
+	u32 max_qps;
+	u32 max_qp_depth;
+	u32 max_srq_depth;
+	u32 max_send_sgl_depth;
+	u32 max_rdma_sgl_depth;
+	u32 max_cqs;
+	u32 max_cq_depth;
+	u32 max_cq_event_handlers;
+	u32 max_mrs;
+	u32 max_pbl_depth;
+	u32 max_pds;
+	u32 max_global_ird;
+	u32 max_global_ord;
+	u32 max_qp_ird;
+	u32 max_qp_ord;
+	u32 flags;
+	u32 max_mws;
+	u32 pbe_range_low;
+	u32 pbe_range_high;
+	u32 max_srqs;
+	u32 page_size;
+} __attribute__((packed));
+
+union c2wr_rnic_query {
+	struct c2wr_rnic_query_req req;
+	struct c2wr_rnic_query_rep rep;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_GETCONFIG
+ */
+
+struct c2wr_rnic_getconfig_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 option;		/* see c2_getconfig_cmd_t */
+	u64 reply_buf;
+	u32 reply_buf_len;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_getconfig_rep {
+	struct c2wr_hdr hdr;
+	u32 option;		/* see c2_getconfig_cmd_t */
+	u32 count_len;		/* length of the number of addresses configured */
+} __attribute__((packed)) ;
+
+union c2wr_rnic_getconfig {
+	struct c2wr_rnic_getconfig_req req;
+	struct c2wr_rnic_getconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_SETCONFIG
+ */
+struct c2wr_rnic_setconfig_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 option;		/* See c2_setconfig_cmd_t */
+	/* variable data and pad. See c2_netaddr and c2_route */
+	u8 data[0];
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_setconfig_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_setconfig {
+	struct c2wr_rnic_setconfig_req req;
+	struct c2wr_rnic_setconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_CLOSE
+ */
+struct c2wr_rnic_close_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_close_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_close {
+	struct c2wr_rnic_close_req req;
+	struct c2wr_rnic_close_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ CQ ------------------------
+ */
+struct c2wr_cq_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_ht;
+	u64 user_context;
+	u64 msg_pool;
+	u32 rnic_handle;
+	u32 msg_size;
+	u32 depth;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_create_rep {
+	struct c2wr_hdr hdr;
+	u32 mq_index;
+	u32 adapter_shared;
+	u32 cq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_cq_create {
+	struct c2wr_cq_create_req req;
+	struct c2wr_cq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 cq_handle;
+	u32 new_depth;
+	u64 new_msg_pool;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_modify {
+	struct c2wr_cq_modify_req req;
+	struct c2wr_cq_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 cq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_destroy {
+	struct c2wr_cq_destroy_req req;
+	struct c2wr_cq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ PD ------------------------
+ */
+struct c2wr_pd_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_alloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_alloc {
+	struct c2wr_pd_alloc_req req;
+	struct c2wr_pd_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_dealloc {
+	struct c2wr_pd_dealloc_req req;
+	struct c2wr_pd_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ SRQ ------------------------
+ */
+struct c2wr_srq_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_ht;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 srq_depth;
+	u32 srq_limit;
+	u32 sgl_depth;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_create_rep {
+	struct c2wr_hdr hdr;
+	u32 srq_depth;
+	u32 sgl_depth;
+	u32 msg_size;
+	u32 mq_index;
+	u32 mq_start;
+	u32 srq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_srq_create {
+	struct c2wr_srq_create_req req;
+	struct c2wr_srq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 srq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_srq_destroy {
+	struct c2wr_srq_destroy_req req;
+	struct c2wr_srq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ QP ------------------------
+ */
+enum c2wr_qp_flags {
+	QP_RDMA_READ = 0x00000001,	/* RDMA read enabled? */
+	QP_RDMA_WRITE = 0x00000002,	/* RDMA write enabled? */
+	QP_MW_BIND = 0x00000004,	/* MWs enabled */
+	QP_ZERO_STAG = 0x00000008,	/* enabled? */
+	QP_REMOTE_TERMINATION = 0x00000010,	/* remote end terminated */
+	QP_RDMA_READ_RESPONSE = 0x00000020	/* Remote RDMA read  */
+	    /* enabled? */
+};
+
+struct c2wr_qp_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_sq_ht;
+	u64 shared_rq_ht;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 sq_cq_handle;
+	u32 rq_cq_handle;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 srq_handle;
+	u32 srq_limit;
+	u32 flags;		/* see enum c2wr_qp_flags */
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_create_rep {
+	struct c2wr_hdr hdr;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u32 sq_msg_size;
+	u32 sq_mq_index;
+	u32 sq_mq_start;
+	u32 rq_msg_size;
+	u32 rq_mq_index;
+	u32 rq_mq_start;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+union c2wr_qp_create {
+	struct c2wr_qp_create_req req;
+	struct c2wr_qp_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_rep {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 send_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u16 qp_state;
+	u16 flags;		/* see c2wr_qp_flags_t */
+	u32 qp_id;
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+	u32 terminate_msg_length;	/* 0 if not present */
+	u8 data[0];
+	/* Terminate Message in-line here. */
+} __attribute__((packed)) ;
+
+union c2wr_qp_query {
+	struct c2wr_qp_query_req req;
+	struct c2wr_qp_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_req {
+	struct c2wr_hdr hdr;
+	u64 stream_msg;
+	u32 stream_msg_length;
+	u32 rnic_handle;
+	u32 qp_handle;
+	u32 next_qp_state;
+	u32 ord;
+	u32 ird;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 llp_ep_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_rep {
+	struct c2wr_hdr hdr;
+	u32 ord;
+	u32 ird;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 sq_msg_size;
+	u32 sq_mq_index;
+	u32 sq_mq_start;
+	u32 rq_msg_size;
+	u32 rq_mq_index;
+	u32 rq_mq_start;
+} __attribute__((packed)) ;
+
+union c2wr_qp_modify {
+	struct c2wr_qp_modify_req req;
+	struct c2wr_qp_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_qp_destroy {
+	struct c2wr_qp_destroy_req req;
+	struct c2wr_qp_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
+ * only be posted when a QP is in IDLE state.  After the connect request is
+ * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
+ * No synchronous reply from adapter to this WR.  The results of
+ * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
+ * See c2wr_ae_active_connect_results_t
+ */
+struct c2wr_qp_connect_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+	u32 remote_addr;
+	u16 remote_port;
+	u16 pad;
+	u32 private_data_length;
+	u8 private_data[0];	/* Private data in-line. */
+} __attribute__((packed)) ;
+
+struct c2wr_qp_connect {
+	struct c2wr_qp_connect_req req;
+	/* no synchronous reply.         */
+} __attribute__((packed)) ;
+
+
+/*
+ *------------------------ MM ------------------------
+ */
+
+struct c2wr_nsmr_stag_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pbl_depth;
+	u32 pd_id;
+	u32 flags;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_stag_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_stag_alloc {
+	struct c2wr_nsmr_stag_alloc_req req;
+	struct c2wr_nsmr_stag_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 pd_id;
+	u32 pbl_depth;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_register {
+	struct c2wr_nsmr_register_req req;
+	struct c2wr_nsmr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 flags;
+	u32 stag_index;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_pbl {
+	struct c2wr_nsmr_pbl_req req;
+	struct c2wr_nsmr_pbl_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_rep {
+	struct c2wr_hdr hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 pd_id;
+	u32 flags;
+	u32 pbl_depth;
+} __attribute__((packed)) ;
+
+union c2wr_mr_query {
+	struct c2wr_mr_query_req req;
+	struct c2wr_mr_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_rep {
+	struct c2wr_hdr hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 pd_id;
+	u32 flags;
+} __attribute__((packed)) ;
+
+union c2wr_mw_query {
+	struct c2wr_mw_query_req req;
+	struct c2wr_mw_query_rep rep;
+} __attribute__((packed)) ;
+
+
+struct c2wr_stag_dealloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_stag_dealloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_stag_dealloc {
+	struct c2wr_stag_dealloc_req req;
+	struct c2wr_stag_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 stag_index;
+	u32 pd_id;
+	u32 pbl_depth;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	u32 pad1;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_reregister {
+	struct c2wr_nsmr_reregister_req req;
+	struct c2wr_nsmr_reregister_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 stag_index;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_rep {
+	struct c2wr_hdr hdr;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_smr_register {
+	struct c2wr_smr_register_req req;
+	struct c2wr_smr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_mw_alloc {
+	struct c2wr_mw_alloc_req req;
+	struct c2wr_mw_alloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ WRs -----------------------
+ */
+
+struct c2wr_user_hdr {
+	struct c2wr_hdr hdr;		/* Has status and WR Type */
+} __attribute__((packed)) ;
+
+enum c2_qp_state {
+	C2_QP_STATE_IDLE = 0x01,
+	C2_QP_STATE_CONNECTING = 0x02,
+	C2_QP_STATE_RTS = 0x04,
+	C2_QP_STATE_CLOSING = 0x08,
+	C2_QP_STATE_TERMINATE = 0x10,
+	C2_QP_STATE_ERROR = 0x20,
+};
+
+/* Completion queue entry. */
+struct c2wr_ce {
+	struct c2wr_hdr hdr;		/* Has status and WR Type */
+	u64 qp_user_context;	/* c2_user_qp_t * */
+	u32 qp_state;		/* Current QP State */
+	u32 handle;		/* QPID or EP Handle */
+	u32 bytes_rcvd;		/* valid for RECV WCs */
+	u32 stag;
+} __attribute__((packed)) ;
+
+
+/*
+ * Flags used for all post-sq WRs.  These must fit in the flags
+ * field of the struct c2wr_hdr (eight bits).
+ */
+enum {
+	SQ_SIGNALED = 0x01,
+	SQ_READ_FENCE = 0x02,
+	SQ_FENCE = 0x04,
+};
+
+/*
+ * Common fields for all post-sq WRs.  Namely the standard header and a
+ * secondary header with fields common to all post-sq WRs.
+ */
+struct c2_sq_hdr {
+	struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * Same as above but for post-rq WRs.
+ */
+struct c2_rq_hdr {
+	struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * use the same struct for all sends.
+ */
+struct c2wr_send_req {
+	struct c2_sq_hdr sq_hdr;
+	u32 sge_len;
+	u32 remote_stag;
+	u8 data[0];		/* SGE array */
+} __attribute__((packed));
+
+union c2wr_send {
+	struct c2wr_send_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_write_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 remote_to;
+	u32 remote_stag;
+	u32 sge_len;
+	u8 data[0];		/* SGE array */
+} __attribute__((packed));
+
+union c2wr_rdma_write {
+	struct c2wr_rdma_write_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_read_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 local_to;
+	u64 remote_to;
+	u32 local_stag;
+	u32 remote_stag;
+	u32 length;
+} __attribute__((packed));
+
+union c2wr_rdma_read {
+	struct c2wr_rdma_read_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_mw_bind_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 va;
+	u8 stag_key;
+	u8 pad[3];
+	u32 mw_stag_index;
+	u32 mr_stag_index;
+	u32 length;
+	u32 flags;
+} __attribute__((packed));
+
+union c2wr_mw_bind {
+	struct c2wr_mw_bind_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_nsmr_fastreg_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 va;
+	u8 stag_key;
+	u8 pad[3];
+	u32 stag_index;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed));
+
+union c2wr_nsmr_fastreg {
+	struct c2wr_nsmr_fastreg_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_stag_invalidate_req {
+	struct c2_sq_hdr sq_hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 stag_index;
+} __attribute__((packed));
+
+union c2wr_stag_invalidate {
+	struct c2wr_stag_invalidate_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+union c2wr_sqwr {
+	struct c2_sq_hdr sq_hdr;
+	struct c2wr_send_req send;
+	struct c2wr_send_req send_se;
+	struct c2wr_send_req send_inv;
+	struct c2wr_send_req send_se_inv;
+	struct c2wr_rdma_write_req rdma_write;
+	struct c2wr_rdma_read_req rdma_read;
+	struct c2wr_mw_bind_req mw_bind;
+	struct c2wr_nsmr_fastreg_req nsmr_fastreg;
+	struct c2wr_stag_invalidate_req stag_inv;
+} __attribute__((packed));
+
+
+/*
+ * RQ WRs
+ */
+struct c2wr_rqwr {
+	struct c2_rq_hdr rq_hdr;
+	u8 data[0];		/* array of SGEs */
+} __attribute__((packed));
+
+union c2wr_recv {
+	struct c2wr_rqwr req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+/*
+ * All AEs start with this header.  Most AEs only need to convey the
+ * information in the header.  Some, like LLP connection events, need
+ * more info.  The union typdef c2wr_ae_t has all the possible AEs.
+ *
+ * hdr.context is the user_context from the rnic_open WR.  NULL If this
+ * is not affiliated with an rnic
+ *
+ * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
+ * CCAE_LLP_CLOSE_COMPLETE)
+ *
+ * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
+ *
+ * user_context is the context passed down when the host created the resource.
+ */
+struct c2wr_ae_hdr {
+	struct c2wr_hdr hdr;
+	u64 user_context;	/* user context for this res. */
+	u32 resource_type;	/* see enum c2_resource_indicator */
+	u32 resource;		/* handle for resource */
+	u32 qp_state;		/* current QP State */
+} __attribute__((packed));
+
+/*
+ * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
+ * the adapter moves the QP into RTS state
+ */
+struct c2wr_ae_active_connect_results {
+	struct c2wr_ae_hdr ae_hdr;
+	u32 laddr;
+	u32 raddr;
+	u16 lport;
+	u16 rport;
+	u32 private_data_length;
+	u8 private_data[0];	/* data is in-line in the msg. */
+} __attribute__((packed));
+
+/*
+ * When connections are established by the stack (and the private data
+ * MPA frame is received), the adapter will generate an event to the host.
+ * The details of the connection, any private data, and the new connection
+ * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
+ * AE queue:
+ */
+struct c2wr_ae_connection_request {
+	struct c2wr_ae_hdr ae_hdr;
+	u32 cr_handle;		/* connreq handle (sock ptr) */
+	u32 laddr;
+	u32 raddr;
+	u16 lport;
+	u16 rport;
+	u32 private_data_length;
+	u8 private_data[0];	/* data is in-line in the msg. */
+} __attribute__((packed));
+
+union c2wr_ae {
+	struct c2wr_ae_hdr ae_generic;
+	struct c2wr_ae_active_connect_results ae_active_connect_results;
+	struct c2wr_ae_connection_request ae_connection_request;
+} __attribute__((packed));
+
+struct c2wr_init_req {
+	struct c2wr_hdr hdr;
+	u64 hint_count;
+	u64 q0_host_shared;
+	u64 q1_host_shared;
+	u64 q1_host_msg_pool;
+	u64 q2_host_shared;
+	u64 q2_host_msg_pool;
+} __attribute__((packed));
+
+struct c2wr_init_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_init {
+	struct c2wr_init_req req;
+	struct c2wr_init_rep rep;
+} __attribute__((packed));
+
+/*
+ * For upgrading flash.
+ */
+
+struct c2wr_flash_init_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+struct c2wr_flash_init_rep {
+	struct c2wr_hdr hdr;
+	u32 adapter_flash_buf_offset;
+	u32 adapter_flash_len;
+} __attribute__((packed));
+
+union c2wr_flash_init {
+	struct c2wr_flash_init_req req;
+	struct c2wr_flash_init_rep rep;
+} __attribute__((packed));
+
+struct c2wr_flash_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 len;
+} __attribute__((packed));
+
+struct c2wr_flash_rep {
+	struct c2wr_hdr hdr;
+	u32 status;
+} __attribute__((packed));
+
+union c2wr_flash {
+	struct c2wr_flash_req req;
+	struct c2wr_flash_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 size;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 offset;		/* 0 if mem not available */
+	u32 size;		/* 0 if mem not available */
+} __attribute__((packed));
+
+union c2wr_buf_alloc {
+	struct c2wr_buf_alloc_req req;
+	struct c2wr_buf_alloc_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_free_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 offset;		/* Must match value from alloc */
+	u32 size;		/* Must match value from alloc */
+} __attribute__((packed));
+
+struct c2wr_buf_free_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_buf_free {
+	struct c2wr_buf_free_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_flash_write_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 offset;
+	u32 size;
+	u32 type;
+	u32 flags;
+} __attribute__((packed));
+
+struct c2wr_flash_write_rep {
+	struct c2wr_hdr hdr;
+	u32 status;
+} __attribute__((packed));
+
+union c2wr_flash_write {
+	struct c2wr_flash_write_req req;
+	struct c2wr_flash_write_rep rep;
+} __attribute__((packed));
+
+/*
+ * Messages for LLP connection setup.
+ */
+
+/*
+ * Listen Request.  This allocates a listening endpoint to allow passive
+ * connection setup.  Newly established LLP connections are passed up
+ * via an AE.  See c2wr_ae_connection_request_t
+ */
+struct c2wr_ep_listen_create_req {
+	struct c2wr_hdr hdr;
+	u64 user_context;	/* returned in AEs. */
+	u32 rnic_handle;
+	u32 local_addr;		/* local addr, or 0  */
+	u16 local_port;		/* 0 means "pick one" */
+	u16 pad;
+	u32 backlog;		/* tradional tcp listen bl */
+} __attribute__((packed));
+
+struct c2wr_ep_listen_create_rep {
+	struct c2wr_hdr hdr;
+	u32 ep_handle;		/* handle to new listening ep */
+	u16 local_port;		/* resulting port... */
+	u16 pad;
+} __attribute__((packed));
+
+union c2wr_ep_listen_create {
+	struct c2wr_ep_listen_create_req req;
+	struct c2wr_ep_listen_create_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_ep_listen_destroy {
+	struct c2wr_ep_listen_destroy_req req;
+	struct c2wr_ep_listen_destroy_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_query_rep {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+} __attribute__((packed));
+
+union c2wr_ep_query {
+	struct c2wr_ep_query_req req;
+	struct c2wr_ep_query_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * The host passes this down to indicate acceptance of a pending iWARP
+ * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
+ * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
+ */
+struct c2wr_cr_accept_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;		/* QP to bind to this LLP conn */
+	u32 ep_handle;		/* LLP  handle to accept */
+	u32 private_data_length;
+	u8 private_data[0];	/* data in-line in msg. */
+} __attribute__((packed));
+
+/*
+ * adapter sends reply when private data is successfully submitted to
+ * the LLP.
+ */
+struct c2wr_cr_accept_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_accept {
+	struct c2wr_cr_accept_req req;
+	struct c2wr_cr_accept_rep rep;
+} __attribute__((packed));
+
+/*
+ * The host sends this down if a given iWARP connection request was
+ * rejected by the consumer.  The cr_handle was obtained from a
+ * previous c2wr_ae_connection_request_t AE sent by the adapter.
+ */
+struct  c2wr_cr_reject_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;		/* LLP handle to reject */
+} __attribute__((packed));
+
+/*
+ * Dunno if this is needed, but we'll add it for now.  The adapter will
+ * send the reject_reply after the LLP endpoint has been destroyed.
+ */
+struct  c2wr_cr_reject_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_reject {
+	struct c2wr_cr_reject_req req;
+	struct c2wr_cr_reject_rep rep;
+} __attribute__((packed));
+
+/*
+ * console command.  Used to implement a debug console over the verbs
+ * request and reply queues.
+ */
+
+/*
+ * Console request message.  It contains:
+ *	- message hdr with id = CCWR_CONSOLE
+ *	- the physaddr/len of host memory to be used for the reply.
+ *	- the command string.  eg:  "netstat -s" or "zoneinfo"
+ */
+struct c2wr_console_req {
+	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
+	u64 reply_buf;		/* pinned host buf for reply */
+	u32 reply_buf_len;	/* length of reply buffer */
+	u8 command[0];		/* NUL terminated ascii string */
+	/* containing the command req */
+} __attribute__((packed));
+
+/*
+ * flags used in the console reply.
+ */
+enum c2_console_flags {
+	CONS_REPLY_TRUNCATED = 0x00000001	/* reply was truncated */
+} __attribute__((packed));
+
+/*
+ * Console reply message.
+ * hdr.result contains the c2_status_t error if the reply was _not_ generated,
+ * or C2_OK if the reply was generated.
+ */
+struct c2wr_console_rep {
+	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
+	u32 flags;
+} __attribute__((packed));
+
+union c2wr_console {
+	struct c2wr_console_req req;
+	struct c2wr_console_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * Giant union with all WRs.  Makes life easier...
+ */
+union c2wr {
+	struct c2wr_hdr hdr;
+	struct c2wr_user_hdr user_hdr;
+	union c2wr_rnic_open rnic_open;
+	union c2wr_rnic_query rnic_query;
+	union c2wr_rnic_getconfig rnic_getconfig;
+	union c2wr_rnic_setconfig rnic_setconfig;
+	union c2wr_rnic_close rnic_close;
+	union c2wr_cq_create cq_create;
+	union c2wr_cq_modify cq_modify;
+	union c2wr_cq_destroy cq_destroy;
+	union c2wr_pd_alloc pd_alloc;
+	union c2wr_pd_dealloc pd_dealloc;
+	union c2wr_srq_create srq_create;
+	union c2wr_srq_destroy srq_destroy;
+	union c2wr_qp_create qp_create;
+	union c2wr_qp_query qp_query;
+	union c2wr_qp_modify qp_modify;
+	union c2wr_qp_destroy qp_destroy;
+	struct c2wr_qp_connect qp_connect;
+	union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
+	union c2wr_nsmr_register nsmr_register;
+	union c2wr_nsmr_pbl nsmr_pbl;
+	union c2wr_mr_query mr_query;
+	union c2wr_mw_query mw_query;
+	union c2wr_stag_dealloc stag_dealloc;
+	union c2wr_sqwr sqwr;
+	struct c2wr_rqwr rqwr;
+	struct c2wr_ce ce;
+	union c2wr_ae ae;
+	union c2wr_init init;
+	union c2wr_ep_listen_create ep_listen_create;
+	union c2wr_ep_listen_destroy ep_listen_destroy;
+	union c2wr_cr_accept cr_accept;
+	union c2wr_cr_reject cr_reject;
+	union c2wr_console console;
+	union c2wr_flash_init flash_init;
+	union c2wr_flash flash;
+	union c2wr_buf_alloc buf_alloc;
+	union c2wr_buf_free buf_free;
+	union c2wr_flash_write flash_write;
+} __attribute__((packed));
+
+
+/*
+ * Accessors for the wr fields that are packed together tightly to
+ * reduce the wr message size.  The wr arguments are void* so that
+ * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
+ * in the struct c2wr union can be passed in.
+ */
+static __inline__ u8 c2_wr_get_id(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->id;
+}
+static __inline__ void c2_wr_set_id(void *wr, u8 id)
+{
+	((struct c2wr_hdr *) wr)->id = id;
+}
+static __inline__ u8 c2_wr_get_result(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->result;
+}
+static __inline__ void c2_wr_set_result(void *wr, u8 result)
+{
+	((struct c2wr_hdr *) wr)->result = result;
+}
+static __inline__ u8 c2_wr_get_flags(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->flags;
+}
+static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
+{
+	((struct c2wr_hdr *) wr)->flags = flags;
+}
+static __inline__ u8 c2_wr_get_sge_count(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->sge_count;
+}
+static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
+{
+	((struct c2wr_hdr *) wr)->sge_count = sge_count;
+}
+static __inline__ u32 c2_wr_get_wqe_count(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->wqe_count;
+}
+static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
+{
+	((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
+}
+
+#endif				/* _C2_WR_H_ */
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/Kbuild
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/Kbuild
@@ -0,0 +1,8 @@
+ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
+
+iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
+	c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
--- linux-2.6.18.noarch/drivers/infiniband/hw/amso1100/Kconfig
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/amso1100/Kconfig
@@ -0,0 +1,15 @@
+config INFINIBAND_AMSO1100
+	tristate "Ammasso 1100 HCA support"
+	depends on PCI && INET && INFINIBAND
+	---help---
+	  This is a low-level driver for the Ammasso 1100 host
+	  channel adapter (HCA).
+
+config INFINIBAND_AMSO1100_DEBUG
+	bool "Verbose debugging output"
+	depends on INFINIBAND_AMSO1100
+	default n
+	---help---
+	  This option causes the amso1100 driver to produce a bunch of
+	  debug messages.  Select this if you are developing the driver
+	  or trying to diagnose a problem.
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/core/cxio_dbg.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/core/cxio_dbg.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef DEBUG
+#include <linux/types.h>
+#include "common.h"
+#include "cxgb3_ioctl.h"
+#include "cxio_hal.h"
+#include "cxio_wr.h"
+
+void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
+{
+	struct ch_mem_range *m;
+	u64 *data;
+	int rc;
+	int size = 32;
+
+	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+	if (!m) {
+		PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+		return;
+	}
+	m->mem_id = MEM_PMRX;
+	m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
+	m->len = size;
+	PDBG("%s TPT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len);
+	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+	if (rc) {
+		PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+		kfree(m);
+		return;
+	}
+
+	data = (u64 *)m->buf;
+	while (size > 0) {
+		PDBG("TPT %08x: %016llx\n", m->addr, (u64)*data);
+		size -= 8;
+		data++;
+		m->addr += 8;
+	}
+	kfree(m);
+}
+
+void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
+{
+	struct ch_mem_range *m;
+	u64 *data;
+	int rc;
+	int size, npages;
+
+	shift += 12;
+	npages = (len + (1ULL << shift) - 1) >> shift;
+	size = npages * sizeof(u64);
+
+	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+	if (!m) {
+		PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+		return;
+	}
+	m->mem_id = MEM_PMRX;
+	m->addr = pbl_addr;
+	m->len = size;
+	PDBG("%s PBL addr 0x%x len %d depth %d\n",
+		__FUNCTION__, m->addr, m->len, npages);
+	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+	if (rc) {
+		PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+		kfree(m);
+		return;
+	}
+
+	data = (u64 *)m->buf;
+	while (size > 0) {
+		PDBG("PBL %08x: %016llx\n", m->addr, (u64)*data);
+		size -= 8;
+		data++;
+		m->addr += 8;
+	}
+	kfree(m);
+}
+
+void cxio_dump_wqe(union t3_wr *wqe)
+{
+	__be64 *data = (__be64 *)wqe;
+	uint size = (uint)(be64_to_cpu(*data) & 0xff);
+
+	if (size == 0)
+		size = 8;
+	while (size > 0) {
+		PDBG("WQE %p: %016llx\n", data, be64_to_cpu(*data));
+		size--;
+		data++;
+	}
+}
+
+void cxio_dump_wce(struct t3_cqe *wce)
+{
+	__be64 *data = (__be64 *)wce;
+	int size = sizeof(*wce);
+
+	while (size > 0) {
+		PDBG("WCE %p: %016llx\n", data, be64_to_cpu(*data));
+		size -= 8;
+		data++;
+	}
+}
+
+void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
+{
+	struct ch_mem_range *m;
+	int size = nents * 64;
+	u64 *data;
+	int rc;
+
+	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+	if (!m) {
+		PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+		return;
+	}
+	m->mem_id = MEM_PMRX;
+	m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
+	m->len = size;
+	PDBG("%s RQT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len);
+	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+	if (rc) {
+		PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+		kfree(m);
+		return;
+	}
+
+	data = (u64 *)m->buf;
+	while (size > 0) {
+		PDBG("RQT %08x: %016llx\n", m->addr, (u64)*data);
+		size -= 8;
+		data++;
+		m->addr += 8;
+	}
+	kfree(m);
+}
+
+void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
+{
+	struct ch_mem_range *m;
+	int size = TCB_SIZE;
+	u32 *data;
+	int rc;
+
+	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+	if (!m) {
+		PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+		return;
+	}
+	m->mem_id = MEM_CM;
+	m->addr = hwtid * size;
+	m->len = size;
+	PDBG("%s TCB %d len %d\n", __FUNCTION__, m->addr, m->len);
+	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+	if (rc) {
+		PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+		kfree(m);
+		return;
+	}
+
+	data = (u32 *)m->buf;
+	while (size > 0) {
+		printk("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
+			m->addr,
+			*(data+2), *(data+3), *(data),*(data+1),
+			*(data+6), *(data+7), *(data+4), *(data+5));
+		size -= 32;
+		data += 8;
+		m->addr += 32;
+	}
+	kfree(m);
+}
+#endif
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/core/cxio_hal.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/core/cxio_hal.c
@@ -0,0 +1,1305 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <asm/semaphore.h>
+#include <asm/delay.h>
+
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+
+#include "cxio_resource.h"
+#include "cxio_hal.h"
+#include "cxgb3_offload.h"
+#include "sge_defs.h"
+
+static struct cxio_rdev *rdev_tbl[T3_MAX_NUM_RNIC];
+static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
+
+static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
+{
+	int i;
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++)
+		if (rdev_tbl[i])
+			if (!strcmp(rdev_tbl[i]->dev_name, dev_name))
+				return rdev_tbl[i];
+	return NULL;
+}
+
+static inline struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev
+							     *tdev)
+{
+	int i;
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++)
+		if (rdev_tbl[i])
+			if (rdev_tbl[i]->t3cdev_p == tdev)
+				return rdev_tbl[i];
+	return NULL;
+}
+
+static inline int cxio_hal_add_rdev(struct cxio_rdev *rdev_p)
+{
+	int i;
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++)
+		if (!rdev_tbl[i]) {
+			rdev_tbl[i] = rdev_p;
+			break;
+		}
+	return (i == T3_MAX_NUM_RNIC);
+}
+
+static inline void cxio_hal_delete_rdev(struct cxio_rdev *rdev_p)
+{
+	int i;
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++)
+		if (rdev_tbl[i] == rdev_p) {
+			rdev_tbl[i] = NULL;
+			break;
+		}
+}
+
+int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
+		   enum t3_cq_opcode op, u32 credit)
+{
+	int ret;
+	struct t3_cqe *cqe;
+	u32 rptr;
+
+	struct rdma_cq_op setup;
+	setup.id = cq->cqid;
+	setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
+	setup.op = op;
+	ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
+
+	if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
+		return ret;
+
+	/*
+	 * If the rearm returned an index other than our current index,
+	 * then there might be CQE's in flight (being DMA'd).  We must wait
+	 * here for them to complete or the consumer can miss a notification.
+	 */
+	if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
+		int i=0;
+
+		rptr = cq->rptr;
+
+		/*
+		 * Keep the generation correct by bumping rptr until it
+		 * matches the index returned by the rearm - 1.
+	 	 */
+		while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
+			rptr++;
+
+		/*
+		 * Now rptr is the index for the (last) cqe that was
+	 	 * in-flight at the time the HW rearmed the CQ.  We
+		 * spin until that CQE is valid.
+	 	 */
+		cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
+		while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
+			udelay(1);
+			if (i++ > 1000000) {
+				BUG_ON(1);
+				printk(KERN_ERR "%s: stalled rnic\n",
+				       rdev_p->dev_name);
+				return -EIO;
+			}
+		}
+	}
+	return 0;
+}
+
+static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
+{
+	struct rdma_cq_setup setup;
+	setup.id = cqid;
+	setup.base_addr = 0;	/* NULL address */
+	setup.size = 0;		/* disaable the CQ */
+	setup.credits = 0;
+	setup.credit_thres = 0;
+	setup.ovfl_mode = 0;
+	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
+{
+	u64 sge_cmd;
+	struct t3_modify_qp_wr *wqe;
+	struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
+	if (!skb) {
+		PDBG("%s alloc_skb failed\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
+	memset(wqe, 0, sizeof(*wqe));
+	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7);
+	wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
+	sge_cmd = qpid << 8 | 3;
+	wqe->sge_cmd = cpu_to_be64(sge_cmd);
+	skb->priority = CPL_PRIORITY_CONTROL;
+	return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+}
+
+int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+	struct rdma_cq_setup setup;
+	int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
+
+	cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
+	if (!cq->cqid)
+		return -ENOMEM;
+	cq->sw_queue = kzalloc(size, GFP_KERNEL);
+	if (!cq->sw_queue)
+		return -ENOMEM;
+	cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+					     (1UL << (cq->size_log2)) *
+					     sizeof(struct t3_cqe),
+					     &(cq->dma_addr), GFP_KERNEL);
+	if (!cq->queue) {
+		kfree(cq->sw_queue);
+		return -ENOMEM;
+	}
+	pci_unmap_addr_set(cq, mapping, cq->dma_addr);
+	memset(cq->queue, 0, size);
+	setup.id = cq->cqid;
+	setup.base_addr = (u64) (cq->dma_addr);
+	setup.size = 1UL << cq->size_log2;
+	setup.credits = 65535;
+	setup.credit_thres = 1;
+	if (rdev_p->t3cdev_p->type == T3B)
+		setup.ovfl_mode = 0;
+	else
+		setup.ovfl_mode = 1;
+	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+	struct rdma_cq_setup setup;
+	setup.id = cq->cqid;
+	setup.base_addr = (u64) (cq->dma_addr);
+	setup.size = 1UL << cq->size_log2;
+	setup.credits = setup.size;
+	setup.credit_thres = setup.size;	/* TBD: overflow recovery */
+	setup.ovfl_mode = 1;
+	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+	struct cxio_qpid_list *entry;
+	u32 qpid;
+	int i;
+
+	mutex_lock(&uctx->lock);
+	if (!list_empty(&uctx->qpids)) {
+		entry = list_entry(uctx->qpids.next, struct cxio_qpid_list,
+				   entry);
+		list_del(&entry->entry);
+		qpid = entry->qpid;
+		kfree(entry);
+	} else {
+		qpid = cxio_hal_get_qpid(rdev_p->rscp);
+		if (!qpid)
+			goto out;
+		for (i = qpid+1; i & rdev_p->qpmask; i++) {
+			entry = kmalloc(sizeof *entry, GFP_KERNEL);
+			if (!entry)
+				break;
+			entry->qpid = i;
+			list_add_tail(&entry->entry, &uctx->qpids);
+		}
+	}
+out:
+	mutex_unlock(&uctx->lock);
+	PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+	return qpid;
+}
+
+static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
+		     struct cxio_ucontext *uctx)
+{
+	struct cxio_qpid_list *entry;
+	
+	entry = kmalloc(sizeof *entry, GFP_KERNEL);
+	if (!entry)
+		return;
+	PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+	entry->qpid = qpid;
+	mutex_lock(&uctx->lock);
+	list_add_tail(&entry->entry, &uctx->qpids);
+	mutex_unlock(&uctx->lock);
+}
+
+void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+	struct list_head *pos, *nxt;
+	struct cxio_qpid_list *entry;
+
+	mutex_lock(&uctx->lock);
+	list_for_each_safe(pos, nxt, &uctx->qpids) {
+		entry = list_entry(pos, struct cxio_qpid_list, entry);
+		list_del_init(&entry->entry);
+		if (!(entry->qpid & rdev_p->qpmask))
+			cxio_hal_put_qpid(rdev_p->rscp, entry->qpid);
+		kfree(entry);
+	}
+	mutex_unlock(&uctx->lock);
+}
+
+void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+	INIT_LIST_HEAD(&uctx->qpids);
+	mutex_init(&uctx->lock);
+}
+
+int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
+		   struct t3_wq *wq, struct cxio_ucontext *uctx)
+{
+	int depth = 1UL << wq->size_log2;
+	int rqsize = 1UL << wq->rq_size_log2;
+
+	wq->qpid = get_qpid(rdev_p, uctx);
+	if (!wq->qpid)
+		return -ENOMEM;
+
+	wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL);
+	if (!wq->rq)
+		goto err1;
+
+	wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
+	if (!wq->rq_addr)
+		goto err2;
+
+	wq->sq = kzalloc(depth * sizeof(struct t3_swsq), GFP_KERNEL);
+	if (!wq->sq)
+		goto err3;
+	
+	wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+					     depth * sizeof(union t3_wr),
+					     &(wq->dma_addr), GFP_KERNEL);
+	if (!wq->queue)
+		goto err4;
+
+	memset(wq->queue, 0, depth * sizeof(union t3_wr));
+	pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+	wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
+	if (!kernel_domain)
+		wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
+					(wq->qpid << rdev_p->qpshift);
+	PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __FUNCTION__,
+	     wq->qpid, wq->doorbell, wq->udb);
+	return 0;
+err4:
+	kfree(wq->sq);
+err3:
+	cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
+err2:
+	kfree(wq->rq);
+err1:
+	put_qpid(rdev_p, wq->qpid, uctx);
+	return -ENOMEM;
+}
+
+int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+	int err;
+	err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
+	kfree(cq->sw_queue);
+	dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+			  (1UL << (cq->size_log2))
+			  * sizeof(struct t3_cqe), cq->queue,
+			  pci_unmap_addr(cq, mapping));
+	cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
+	return err;
+}
+
+int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
+		    struct cxio_ucontext *uctx)
+{
+	dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+			  (1UL << (wq->size_log2))
+			  * sizeof(union t3_wr), wq->queue,
+			  pci_unmap_addr(wq, mapping));
+	kfree(wq->sq);
+	cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
+	kfree(wq->rq);
+	put_qpid(rdev_p, wq->qpid, uctx);
+	return 0;
+}
+
+static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
+{
+	struct t3_cqe cqe;
+
+	PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__,
+	     wq, cq, cq->sw_rptr, cq->sw_wptr);
+	memset(&cqe, 0, sizeof(cqe));
+	cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
+			         V_CQE_OPCODE(T3_SEND) |
+		         	 V_CQE_TYPE(0) |
+		         	 V_CQE_SWCQE(1) |
+		         	 V_CQE_QPID(wq->qpid) |
+		         	 V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
+						       cq->size_log2)));
+	*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+	cq->sw_wptr++;
+}
+
+void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
+{
+	u32 ptr;
+
+	PDBG("%s wq %p cq %p\n", __FUNCTION__, wq, cq);
+
+	/* flush RQ */
+	PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __FUNCTION__,
+	    wq->rq_rptr, wq->rq_wptr, count);
+	ptr = wq->rq_rptr + count;
+	while (ptr++ != wq->rq_wptr)
+		insert_recv_cqe(wq, cq);
+}
+
+static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
+		          struct t3_swsq *sqp)
+{
+	struct t3_cqe cqe;
+
+	PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__,
+	     wq, cq, cq->sw_rptr, cq->sw_wptr);
+	memset(&cqe, 0, sizeof(cqe));
+	cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
+			         V_CQE_OPCODE(sqp->opcode) |
+			         V_CQE_TYPE(1) |
+			         V_CQE_SWCQE(1) |
+			         V_CQE_QPID(wq->qpid) |
+			         V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
+						       cq->size_log2)));
+	cqe.u.scqe.wrid_hi = sqp->sq_wptr;
+
+	*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+	cq->sw_wptr++;
+}
+
+void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
+{
+	__u32 ptr;
+	struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
+
+	ptr = wq->sq_rptr + count;
+	sqp += count;
+	while (ptr != wq->sq_wptr) {
+		insert_sq_cqe(wq, cq, sqp);
+		sqp++;
+		ptr++;
+	}
+}
+
+/*
+ * Move all CQEs from the HWCQ into the SWCQ.
+ */
+void cxio_flush_hw_cq(struct t3_cq *cq)
+{
+	struct t3_cqe *cqe, *swcqe;
+
+	PDBG("%s cq %p cqid 0x%x\n", __FUNCTION__, cq, cq->cqid);
+	cqe = cxio_next_hw_cqe(cq);
+	while (cqe) {
+		PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
+		     __FUNCTION__, cq->rptr, cq->sw_wptr);
+		swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
+		*swcqe = *cqe;
+		swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
+		cq->sw_wptr++;
+		cq->rptr++;
+		cqe = cxio_next_hw_cqe(cq);
+	}
+}
+
+static inline int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
+{
+	if (CQE_OPCODE(*cqe) == T3_TERMINATE)
+		return 0;
+
+	if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
+		return 0;
+
+	if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
+		return 0;
+
+	if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) &&
+	    Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
+		return 0;
+
+	return 1;
+}
+
+void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
+{
+	struct t3_cqe *cqe;
+	u32 ptr;
+
+	*count = 0;
+	ptr = cq->sw_rptr;
+	while (!Q_EMPTY(ptr, cq->sw_wptr)) {
+		cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
+		if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) &&
+		    (CQE_QPID(*cqe) == wq->qpid))
+			(*count)++;
+		ptr++;
+	}	
+	PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count);
+}
+
+void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
+{
+	struct t3_cqe *cqe;
+	u32 ptr;
+
+	*count = 0;
+	PDBG("%s count zero %d\n", __FUNCTION__, *count);
+	ptr = cq->sw_rptr;
+	while (!Q_EMPTY(ptr, cq->sw_wptr)) {
+		cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
+		if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
+		    (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
+			(*count)++;
+		ptr++;
+	}	
+	PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count);
+}
+
+static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
+{
+	struct rdma_cq_setup setup;
+	setup.id = 0;
+	setup.base_addr = 0;	/* NULL address */
+	setup.size = 1;		/* enable the CQ */
+	setup.credits = 0;
+
+	/* force SGE to redirect to RspQ and interrupt */
+	setup.credit_thres = 0;	
+	setup.ovfl_mode = 1;
+	return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+	int err;
+	u64 sge_cmd, ctx0, ctx1;
+	u64 base_addr;
+	struct t3_modify_qp_wr *wqe;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
+	if (!skb) {
+		PDBG("%s alloc_skb failed\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	err = cxio_hal_init_ctrl_cq(rdev_p);
+	if (err) {
+		PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err);
+		goto err;
+	}
+	rdev_p->ctrl_qp.workq = dma_alloc_coherent(
+					&(rdev_p->rnic_info.pdev->dev),
+					(1 << T3_CTRL_QP_SIZE_LOG2) *
+					sizeof(union t3_wr),
+					&(rdev_p->ctrl_qp.dma_addr),
+					GFP_KERNEL);
+	if (!rdev_p->ctrl_qp.workq) {
+		PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__);
+		err = -ENOMEM;
+		goto err;
+	}
+	pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
+			   rdev_p->ctrl_qp.dma_addr);
+	rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
+	memset(rdev_p->ctrl_qp.workq, 0,
+	       (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
+
+	init_MUTEX(&rdev_p->ctrl_qp.sem);
+	init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
+
+	/* update HW Ctrl QP context */
+	base_addr = rdev_p->ctrl_qp.dma_addr;
+	base_addr >>= 12;
+	ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
+		V_EC_BASE_LO((u32) base_addr & 0xffff));
+	ctx0 <<= 32;
+	ctx0 |= V_EC_CREDITS(FW_WR_NUM);
+	base_addr >>= 16;
+	ctx1 = (u32) base_addr;
+	base_addr >>= 32;
+	ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
+			V_EC_TYPE(0) | V_EC_GEN(1) |
+			V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
+	wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
+	memset(wqe, 0, sizeof(*wqe));
+	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1,
+		       T3_CTL_QP_TID, 7);
+	wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
+	sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
+	wqe->sge_cmd = cpu_to_be64(sge_cmd);
+	wqe->ctx1 = cpu_to_be64(ctx1);
+	wqe->ctx0 = cpu_to_be64(ctx0);
+	PDBG("CtrlQP dma_addr 0x%llx workq %p size %d\n",
+	     (u64) rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq,
+	     1 << T3_CTRL_QP_SIZE_LOG2);
+	skb->priority = CPL_PRIORITY_CONTROL;
+	return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+err:
+	kfree_skb(skb);
+	return err;
+}
+
+static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+	dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+			  (1UL << T3_CTRL_QP_SIZE_LOG2)
+			  * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
+			  pci_unmap_addr(&rdev_p->ctrl_qp, mapping));
+	return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
+}
+
+/* write len bytes of data into addr (32B aligned address)
+ * If data is NULL, clear len byte of memory to zero.
+ * caller aquires the sem before the call
+ */
+static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
+				      u32 len, void *data, int completion)
+{
+	u32 i, nr_wqe, copy_len;
+	u8 *copy_data;
+	u8 wr_len, utx_len;	/* lenght in 8 byte flit */
+	enum t3_wr_flags flag;
+	__be64 *wqe;
+	u64 utx_cmd;
+	addr &= 0x7FFFFFF;
+	nr_wqe = len % 96 ? len / 96 + 1 : len / 96;	/* 96B max per WQE */
+	PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
+	     __FUNCTION__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
+	     nr_wqe, data, addr);
+	utx_len = 3;		/* in 32B unit */
+	for (i = 0; i < nr_wqe; i++) {
+		if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
+		           T3_CTRL_QP_SIZE_LOG2)) {
+			PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
+			     "wait for more space i %d\n", __FUNCTION__,
+			     rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
+			if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+					     !Q_FULL(rdev_p->ctrl_qp.rptr,
+						     rdev_p->ctrl_qp.wptr,
+						     T3_CTRL_QP_SIZE_LOG2))) {
+				PDBG("%s ctrl_qp workq interrupted\n",
+				     __FUNCTION__);
+				return -ERESTARTSYS;
+			}
+			PDBG("%s ctrl_qp wakeup, continue posting work request "
+			     "i %d\n", __FUNCTION__, i);
+		}
+		wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+						(1 << T3_CTRL_QP_SIZE_LOG2)));
+		flag = 0;
+		if (i == (nr_wqe - 1)) {
+			/* last WQE */
+			flag = completion ? T3_COMPLETION_FLAG : 0;
+			if (len % 32)
+				utx_len = len / 32 + 1;
+			else
+				utx_len = len / 32;
+		}
+
+		/*
+		 * Force a CQE to return the credit to the workq in case
+		 * we posted more than half the max QP size of WRs
+		 */
+		if ((i != 0) &&
+		    (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
+			flag = T3_COMPLETION_FLAG;
+			PDBG("%s force completion at i %d\n", __FUNCTION__, i);
+		}
+
+		/* build the utx mem command */
+		wqe += (sizeof(struct t3_bypass_wr) >> 3);
+		utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
+		utx_cmd <<= 32;
+		utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
+		*wqe = cpu_to_be64(utx_cmd);
+		wqe++;
+		copy_data = (u8 *) data + i * 96;
+		copy_len = len > 96 ? 96 : len;
+
+		/* clear memory content if data is NULL */
+		if (data)
+			memcpy(wqe, copy_data, copy_len);
+		else
+			memset(wqe, 0, copy_len);
+		if (copy_len % 32)
+			memset(((u8 *) wqe) + copy_len, 0,
+			       32 - (copy_len % 32));
+		wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
+			 (utx_len << 2);
+		wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+			      (1 << T3_CTRL_QP_SIZE_LOG2)));
+
+		/* wptr in the WRID[31:0] */
+		((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
+
+		/*
+		 * This must be the last write with a memory barrier
+		 * for the genbit
+		 */
+		build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
+			       Q_GENBIT(rdev_p->ctrl_qp.wptr,
+					T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
+			       wr_len);
+		if (flag == T3_COMPLETION_FLAG)
+			ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
+		len -= 96;
+		rdev_p->ctrl_qp.wptr++;
+	}
+	return 0;
+}
+
+/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
+ * OUT: stag index, actual pbl_size, pbl_addr allocated.
+ * TBD: shared memory region support
+ */
+static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
+			 u32 *stag, u8 stag_state, u32 pdid,
+			 enum tpt_mem_type type, enum tpt_mem_perm perm,
+			 u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
+			 u32 *pbl_size, u32 *pbl_addr)
+{
+	int err;
+	struct tpt_entry tpt;
+	u32 stag_idx;
+	u32 wptr;
+	int rereg = (*stag != T3_STAG_UNSET);
+
+	stag_state = stag_state > 0;
+	stag_idx = (*stag) >> 8;
+
+	if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
+		stag_idx = cxio_hal_get_stag(rdev_p->rscp);
+		if (!stag_idx)
+			return -ENOMEM;
+		*stag = (stag_idx << 8) | ((*stag) & 0xFF);
+	}
+	PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+	     __FUNCTION__, stag_state, type, pdid, stag_idx);
+	
+	if (reset_tpt_entry)
+		cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
+	else if (!rereg) {
+		*pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
+		if (!*pbl_addr) {
+			return -ENOMEM;
+		}
+	}
+
+	down_interruptible(&rdev_p->ctrl_qp.sem);
+
+	/* write PBL first if any - update pbl only if pbl list exist */
+	if (pbl) {
+
+		PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+		     __FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base,
+		     *pbl_size);
+		err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+				(*pbl_addr >> 5),
+				(*pbl_size << 3), pbl, 0);
+		if (err)
+			goto ret;
+	}
+
+	/* write TPT entry */
+	if (reset_tpt_entry)
+		memset(&tpt, 0, sizeof(tpt));
+	else {
+		tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID |
+				V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
+				V_TPT_STAG_STATE(stag_state) |
+				V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
+		BUG_ON(page_size >= 28);
+		tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) |
+			    	F_TPT_MW_BIND_ENABLE |
+				V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
+				V_TPT_PAGE_SIZE(page_size));
+		tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
+				    cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
+		tpt.len = cpu_to_be32(len);
+		tpt.va_hi = cpu_to_be32((u32) (to >> 32));
+		tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
+		tpt.rsvd_bind_cnt_or_pstag = 0;
+		tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
+				  cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
+	}
+	err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+				       stag_idx +
+				       (rdev_p->rnic_info.tpt_base >> 5),
+				       sizeof(tpt), &tpt, 1);
+
+	/* release the stag index to free pool */
+	if (reset_tpt_entry)
+		cxio_hal_put_stag(rdev_p->rscp, stag_idx);
+ret:	
+	wptr = rdev_p->ctrl_qp.wptr;
+	up(&rdev_p->ctrl_qp.sem);
+	if (!err)
+		if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+					     SEQ32_GE(rdev_p->ctrl_qp.rptr,
+						      wptr)))
+			return -ERESTARTSYS;
+	return err;
+}
+
+/* IN : stag key, pdid, pbl_size
+ * Out: stag index, actaul pbl_size, and pbl_addr allocated.
+ */
+int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid,
+		       enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr)
+{
+	*stag = T3_STAG_UNSET;
+	return (__cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
+			      perm, 0, 0ULL, 0, 0, NULL, pbl_size, pbl_addr));
+}
+
+int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+			   u8 page_size, __be64 *pbl, u32 *pbl_size,
+			   u32 *pbl_addr)
+{
+	*stag = T3_STAG_UNSET;
+	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+			     zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+}
+
+int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+			   u8 page_size, __be64 *pbl, u32 *pbl_size,
+			   u32 *pbl_addr)
+{
+	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+			     zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+}
+
+int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
+		   u32 pbl_addr)
+{
+	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+			     &pbl_size, &pbl_addr);
+}
+
+int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
+{
+	u32 pbl_size = 0;
+	*stag = T3_STAG_UNSET;
+	return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
+			     NULL, &pbl_size, NULL);
+}
+
+int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
+{
+	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+			     NULL, NULL);
+}
+
+int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+{
+	struct t3_rdma_init_wr *wqe;
+	struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+	PDBG("%s rdev_p %p\n", __FUNCTION__, rdev_p);
+	wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
+	wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
+	wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
+					   V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
+	wqe->wrid.id1 = 0;
+	wqe->qpid = cpu_to_be32(attr->qpid);
+	wqe->pdid = cpu_to_be32(attr->pdid);
+	wqe->scqid = cpu_to_be32(attr->scqid);
+	wqe->rcqid = cpu_to_be32(attr->rcqid);
+	wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
+	wqe->rq_size = cpu_to_be32(attr->rq_size);
+	wqe->mpaattrs = attr->mpaattrs;
+	wqe->qpcaps = attr->qpcaps;
+	wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
+	wqe->flags = cpu_to_be32(attr->flags);
+	wqe->ord = cpu_to_be32(attr->ord);
+	wqe->ird = cpu_to_be32(attr->ird);
+	wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
+	wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
+	wqe->irs = cpu_to_be32(attr->irs);
+	skb->priority = 0;	/* 0=>ToeQ; 1=>CtrlQ */
+	return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+}
+
+void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+	cxio_ev_cb = ev_cb;
+}
+
+void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+	cxio_ev_cb = NULL;
+}
+
+static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
+{
+	static int cnt;
+	struct cxio_rdev *rdev_p = NULL;
+	struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
+	PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x"
+	     " se %0x notify %0x cqbranch %0x creditth %0x\n",
+	     cnt, __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
+	     RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
+	     RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
+	     RSPQ_CREDIT_THRESH(rsp_msg));
+	PDBG("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d "
+	     "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+	     CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
+	     CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+	     CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
+	     CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+	rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
+	if (!rdev_p) {
+		PDBG("%s called by t3cdev %p with null ulp\n", __FUNCTION__,
+		     t3cdev_p);
+		return 0;
+	}
+	if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
+		rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
+		wake_up_interruptible(&rdev_p->ctrl_qp.waitq);
+		dev_kfree_skb_irq(skb);
+	} else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
+		dev_kfree_skb_irq(skb);
+	else if (cxio_ev_cb)
+		(*cxio_ev_cb) (rdev_p, skb);
+	else
+		dev_kfree_skb_irq(skb);
+	cnt++;
+	return 0;
+}
+
+/* Caller takes care of locking if needed */
+int cxio_rdev_open(struct cxio_rdev *rdev_p)
+{
+	struct net_device *netdev_p = NULL;
+	int err = 0;
+	if (strlen(rdev_p->dev_name)) {
+		if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
+			return -EBUSY;
+		}
+		netdev_p = dev_get_by_name(rdev_p->dev_name);
+		if (!netdev_p) {
+			return -EINVAL;
+		}
+		dev_put(netdev_p);
+	} else if (rdev_p->t3cdev_p) {
+		if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) {
+			return -EBUSY;
+		}
+		netdev_p = rdev_p->t3cdev_p->lldev;
+		strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
+			T3_MAX_DEV_NAME_LEN);
+	} else {
+		PDBG("%s t3cdev_p or dev_name must be set\n", __FUNCTION__);
+		return -EINVAL;
+	}
+
+	if (cxio_hal_add_rdev(rdev_p))
+		return -ENOMEM;
+
+	PDBG("%s opening rnic dev %s\n", __FUNCTION__, rdev_p->dev_name);
+	memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
+	if (!rdev_p->t3cdev_p)
+		rdev_p->t3cdev_p = T3CDEV(netdev_p);
+	rdev_p->t3cdev_p->ulp = (void *) rdev_p;
+	err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
+					 &(rdev_p->rnic_info));
+	if (err) {
+		printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
+		     __FUNCTION__, rdev_p->t3cdev_p, err);
+		goto err1;
+	}
+	err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
+				    &(rdev_p->port_info));
+	if (err) {
+		printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
+		     __FUNCTION__, rdev_p->t3cdev_p, err);
+		goto err1;
+	}
+
+	/*
+	 * qpshift is the number of bits to shift the qpid left in order
+	 * to get the correct address of the doorbell for that qp.
+	 */
+	cxio_init_ucontext(rdev_p, &rdev_p->uctx);
+	rdev_p->qpshift = PAGE_SHIFT -
+			  ilog2(65536 >>
+			            ilog2(rdev_p->rnic_info.udbell_len >>
+					      PAGE_SHIFT));
+	rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
+	rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
+	PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d "
+	     "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
+	     __FUNCTION__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+  	     rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
+  	     rdev_p->rnic_info.pbl_base,
+  	     rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
+  	     rdev_p->rnic_info.rqt_top);
+	PDBG("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
+	     "qpnr %d qpmask 0x%x\n",
+	     rdev_p->rnic_info.udbell_len,
+	     rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
+	     rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
+
+	err = cxio_hal_init_ctrl_qp(rdev_p);
+	if (err) {
+		printk(KERN_ERR "%s error %d initializing ctrl_qp.\n",
+		       __FUNCTION__, err);
+		goto err1;
+	}
+ 	err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
+				     0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
+				     T3_MAX_NUM_PD);
+	if (err) {
+		printk(KERN_ERR "%s error %d initializing hal resources.\n",
+		       __FUNCTION__, err);
+		goto err2;
+	}
+ 	err = cxio_hal_pblpool_create(rdev_p);
+ 	if (err) {
+ 		printk(KERN_ERR "%s error %d initializing pbl mem pool.\n",
+ 		       __FUNCTION__, err);
+ 		goto err3;
+ 	}
+ 	err = cxio_hal_rqtpool_create(rdev_p);
+ 	if (err) {
+ 		printk(KERN_ERR "%s error %d initializing rqt mem pool.\n",
+ 		       __FUNCTION__, err);
+ 		goto err4;
+ 	}
+  	return 0;
+err4:
+ 	cxio_hal_pblpool_destroy(rdev_p);
+err3:
+ 	cxio_hal_destroy_resource(rdev_p->rscp);
+err2:
+	cxio_hal_destroy_ctrl_qp(rdev_p);
+err1:
+	cxio_hal_delete_rdev(rdev_p);
+	return err;
+}
+
+void cxio_rdev_close(struct cxio_rdev *rdev_p)
+{
+	if (rdev_p) {
+		cxio_hal_pblpool_destroy(rdev_p);
+		cxio_hal_rqtpool_destroy(rdev_p);
+		cxio_hal_delete_rdev(rdev_p);
+		rdev_p->t3cdev_p->ulp = NULL;
+		cxio_hal_destroy_ctrl_qp(rdev_p);
+		cxio_hal_destroy_resource(rdev_p->rscp);
+	}
+}
+
+int __init cxio_hal_init(void)
+{
+	if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
+		return -ENOMEM;
+	memset(rdev_tbl, 0, T3_MAX_NUM_RNIC * sizeof(void *));
+	t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
+	return 0;
+}
+
+void __exit cxio_hal_exit(void)
+{
+	int i;
+	t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
+	for (i = 0; i < T3_MAX_NUM_RNIC; i++)
+		cxio_rdev_close(rdev_tbl[i]);
+	cxio_hal_destroy_rhdl_resource();
+}
+
+static inline void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
+{
+	struct t3_swsq *sqp;
+	__u32 ptr = wq->sq_rptr;
+	int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
+	
+	sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+	while (count--)
+		if (!sqp->signaled) {
+			ptr++;
+			sqp = wq->sq + Q_PTR2IDX(ptr,  wq->sq_size_log2);
+		} else if (sqp->complete) {
+
+			/*
+			 * Insert this completed cqe into the swcq.
+			 */
+			PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
+			     __FUNCTION__, Q_PTR2IDX(ptr,  wq->sq_size_log2),
+			     Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
+			sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
+			*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
+				= sqp->cqe;
+			cq->sw_wptr++;
+			sqp->signaled = 0;
+			break;
+		} else
+			break;
+}
+
+static inline void create_read_req_cqe(struct t3_wq *wq,
+				       struct t3_cqe *hw_cqe,
+				       struct t3_cqe *read_cqe)
+{
+	read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
+	read_cqe->len = wq->oldest_read->read_len;
+	read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
+				 V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
+				 V_CQE_OPCODE(T3_READ_REQ) |
+				 V_CQE_TYPE(1));
+}
+
+/*
+ * Return a ptr to the next read wr in the SWSQ or NULL.
+ */
+static inline void advance_oldest_read(struct t3_wq *wq)
+{
+
+	u32 rptr = wq->oldest_read - wq->sq + 1;
+	u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
+
+	while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
+		wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
+
+		if (wq->oldest_read->opcode == T3_READ_REQ)
+			return;
+		rptr++;
+	}
+	wq->oldest_read = NULL;
+}
+
+/*
+ * cxio_poll_cq
+ *
+ * Caller must:
+ *     check the validity of the first CQE,
+ *     supply the wq assicated with the qpid.
+ *
+ * credit: cq credit to return to sge.
+ * cqe_flushed: 1 iff the CQE is flushed.
+ * cqe: copy of the polled CQE.
+ *
+ * return value:
+ *     0       CQE returned,
+ *    -1       CQE skipped, try again.
+ */
+int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
+		     u8 *cqe_flushed, u64 *cookie, u32 *credit)
+{
+	int ret = 0;
+	struct t3_cqe *hw_cqe, read_cqe;
+
+	*cqe_flushed = 0;
+	*credit = 0;
+	hw_cqe = cxio_next_cqe(cq);
+
+	PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x"
+	     " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+	     __FUNCTION__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
+	     CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
+	     CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
+	     CQE_WRID_LOW(*hw_cqe));
+
+	/*
+	 * skip cqe's not affiliated with a QP.
+	 */
+	if (wq == NULL) {
+		ret = -1;
+		goto skip_cqe;
+	}
+
+	/*
+	 * Gotta tweak READ completions:
+	 * 	1) the cqe doesn't contain the sq_wptr from the wr.
+	 *	2) opcode not reflected from the wr.
+	 *	3) read_len not reflected from the wr.
+	 *	4) cq_type is RQ_TYPE not SQ_TYPE.
+	 */
+	if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
+		
+		/*
+	 	 * Don't write to the HWCQ, so create a new read req CQE
+		 * in local memory.
+		 */
+		create_read_req_cqe(wq, hw_cqe, &read_cqe);
+		hw_cqe = &read_cqe;
+		advance_oldest_read(wq);
+	}
+
+	/*
+ 	 * T3A: Discard TERMINATE CQEs.
+	 */
+	if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
+		ret = -1;
+		wq->error = 1;
+		goto skip_cqe;
+	}
+
+	if (CQE_STATUS(*hw_cqe) || wq->error) {
+		*cqe_flushed = wq->error;
+		wq->error = 1;
+	
+		/*
+		 * T3A inserts errors into the CQE.  We cannot return
+	 	 * these as work completions.
+	 	 */
+		/* incoming write failures */
+		if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
+		     && RQ_TYPE(*hw_cqe)) {
+			ret = -1;
+			goto skip_cqe;
+		}
+		/* incoming read request failures */
+		if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
+			ret = -1;
+			goto skip_cqe;
+		}
+
+		/* incoming SEND with no receive posted failures */
+		if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) &&
+		    Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
+			ret = -1;
+			goto skip_cqe;
+		}
+		goto proc_cqe;
+	}
+
+	/*
+	 * RECV completion.
+	 */
+	if (RQ_TYPE(*hw_cqe)) {
+
+		/*
+		 * HW only validates 4 bits of MSN.  So we must validate that
+		 * the MSN in the SEND is the next expected MSN.  If its not,
+		 * then we complete this with TPT_ERR_MSN and mark the wq in
+		 * error.
+		 */
+		if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
+			wq->error = 1;
+			hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
+			goto proc_cqe;
+		}
+		goto proc_cqe;
+	}
+
+	/*
+ 	 * If we get here its a send completion.
+	 *
+	 * Handle out of order completion. These get stuffed
+	 * in the SW SQ. Then the SW SQ is walked to move any
+	 * now in-order completions into the SW CQ.  This handles
+	 * 2 cases:
+	 * 	1) reaping unsignaled WRs when the first subsequent
+	 *	   signaled WR is completed.
+	 *	2) out of order read completions.
+	 */
+	if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
+		struct t3_swsq *sqp;
+
+		PDBG("%s out of order completion going in swsq at idx %ld\n",
+		     __FUNCTION__,
+		     Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
+		sqp = wq->sq +
+		      Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
+		sqp->cqe = *hw_cqe;
+		sqp->complete = 1;
+		ret = -1;
+		goto flush_wq;
+	}
+	
+proc_cqe:
+	*cqe = *hw_cqe;
+
+	/*
+	 * Reap the associated WR(s) that are freed up with this
+	 * completion.
+	 */
+	if (SQ_TYPE(*hw_cqe)) {
+		wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
+		PDBG("%s completing sq idx %ld\n", __FUNCTION__,
+		     Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
+		*cookie = (wq->sq +
+			   Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
+		wq->sq_rptr++;
+	} else {
+		PDBG("%s completing rq idx %ld\n", __FUNCTION__,
+		     Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+		*cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+		wq->rq_rptr++;
+	}
+
+flush_wq:
+	/*
+	 * Flush any completed cqes that are now in-order.
+	 */
+	flush_completed_wrs(wq, cq);
+
+skip_cqe:
+	if (SW_CQE(*hw_cqe)) {
+		PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
+		     __FUNCTION__, cq, cq->cqid, cq->sw_rptr);
+		++cq->sw_rptr;
+	} else {
+		PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
+		     __FUNCTION__, cq, cq->cqid, cq->rptr);
+		++cq->rptr;
+
+		/*
+		 * T3A: compute credits.
+		 */
+		if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
+		    || ((cq->rptr - cq->wptr) >= 128)) {
+			*credit = cq->rptr - cq->wptr;
+			cq->wptr = cq->rptr;
+		}
+	}
+	return ret;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/core/cxio_hal.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/core/cxio_hal.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef  __CXIO_HAL_H__
+#define  __CXIO_HAL_H__
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#include "t3_cpl.h"
+#include "t3cdev.h"
+#include "cxgb3_ctl_defs.h"
+#include "cxio_wr.h"
+
+#define T3_CTRL_QP_ID    FW_RI_SGEEC_START
+#define T3_CTL_QP_TID	 FW_RI_TID_START
+#define T3_CTRL_QP_SIZE_LOG2  8
+#define T3_CTRL_CQ_ID    0
+
+/* TBD */
+#define T3_MAX_NUM_RNIC  8
+#define T3_MAX_NUM_RI (1<<15)
+#define T3_MAX_NUM_QP (1<<15)
+#define T3_MAX_NUM_CQ (1<<15)
+#define T3_MAX_NUM_PD (1<<15)
+#define T3_MAX_PBL_SIZE 256
+#define T3_MAX_RQ_SIZE 1024
+#define T3_MAX_NUM_STAG (1<<15)
+
+#define T3_STAG_UNSET 0xffffffff
+
+#define T3_MAX_DEV_NAME_LEN 32
+
+struct cxio_hal_ctrl_qp {
+	u32 wptr;
+	u32 rptr;
+	struct semaphore sem;	/* for the wtpr, can sleep */
+	wait_queue_head_t waitq;	/* wait for RspQ/CQE msg */
+	union t3_wr *workq;	/* the work request queue */
+	dma_addr_t dma_addr;	/* pci bus address of the workq */
+	DECLARE_PCI_UNMAP_ADDR(mapping)
+	void __iomem *doorbell;
+};
+
+struct cxio_hal_resource {
+	struct kfifo *tpt_fifo;
+	spinlock_t tpt_fifo_lock;
+	struct kfifo *qpid_fifo;
+	spinlock_t qpid_fifo_lock;
+	struct kfifo *cqid_fifo;
+	spinlock_t cqid_fifo_lock;
+	struct kfifo *pdid_fifo;
+	spinlock_t pdid_fifo_lock;
+};
+
+struct cxio_qpid_list {
+	struct list_head entry;
+	u32 qpid;
+};
+
+struct cxio_ucontext {
+	struct list_head qpids;
+	struct mutex lock;
+};
+
+struct cxio_rdev {
+	char dev_name[T3_MAX_DEV_NAME_LEN];
+	struct t3cdev *t3cdev_p;
+	struct rdma_info rnic_info;
+	struct adap_ports port_info;
+	struct cxio_hal_resource *rscp;
+	struct cxio_hal_ctrl_qp ctrl_qp;
+	void *ulp;
+	unsigned long qpshift;
+	u32 qpnr;
+	u32 qpmask;
+	struct cxio_ucontext uctx;
+	struct gen_pool *pbl_pool;
+	struct gen_pool *rqt_pool;
+};
+
+static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
+{
+	return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
+}
+
+typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
+					     struct sk_buff * skb);
+
+#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff)
+#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff)
+#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1)
+#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1)
+#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1)
+#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1)
+#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1)
+#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1)
+#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1)
+
+struct respQ_msg_t {
+	__be32 flags;		/* flit 0 */
+	__be32 cq_ptrid;
+	__be64 rsvd;		/* flit 1 */
+	struct t3_cqe cqe;	/* flits 2-3 */
+};
+
+enum t3_cq_opcode {
+	CQ_ARM_AN = 0x2,
+	CQ_ARM_SE = 0x6,
+	CQ_FORCE_AN = 0x3,
+	CQ_CREDIT_UPDATE = 0x7
+};
+
+int cxio_rdev_open(struct cxio_rdev *rdev);
+void cxio_rdev_close(struct cxio_rdev *rdev);
+int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
+	 	   enum t3_cq_opcode op, u32 credit);
+int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev, u32 qpid);
+int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
+void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
+int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
+		   struct cxio_ucontext *uctx);
+int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
+		    struct cxio_ucontext *uctx);
+int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
+int cxio_allocate_stag(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+		       enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr);
+int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+			   u8 page_size, __be64 *pbl, u32 *pbl_size,
+			   u32 *pbl_addr);
+int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+			   u8 page_size, __be64 *pbl, u32 *pbl_size,
+			   u32 *pbl_addr);
+int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
+		   u32 pbl_addr);
+int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
+int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
+int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
+void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+u32 cxio_hal_get_rhdl(void);
+void cxio_hal_put_rhdl(u32 rhdl);
+u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
+void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
+int __init cxio_hal_init(void);
+void __exit cxio_hal_exit(void);
+void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+void cxio_flush_hw_cq(struct t3_cq *cq);
+int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
+		     u8 *cqe_flushed, u64 *cookie, u32 *credit);
+
+#define MOD "iw_cxgb3: "
+#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
+
+#ifdef DEBUG
+void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
+void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift);
+void cxio_dump_wqe(union t3_wr *wqe);
+void cxio_dump_wce(struct t3_cqe *wce);
+void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
+void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
+#endif
+
+#endif
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/core/cxio_resource.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/core/cxio_resource.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/* Crude resource management */
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/kfifo.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include "cxio_resource.h"
+#include "cxio_hal.h"
+
+static struct kfifo *rhdl_fifo;
+static spinlock_t rhdl_fifo_lock;
+
+#define RANDOM_SIZE 16
+
+static int __cxio_init_resource_fifo(struct kfifo **fifo,
+				   spinlock_t *fifo_lock,
+				   u32 nr, u32 skip_low,
+				   u32 skip_high,
+				   int random)
+{
+	u32 i, j, entry = 0, idx;
+	u32 random_bytes;
+	u32 rarray[16];
+	spin_lock_init(fifo_lock);
+
+	*fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock);
+	if (IS_ERR(*fifo))
+		return -ENOMEM;
+
+	for (i = 0; i < skip_low + skip_high; i++)
+		__kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32));
+	if (random) {
+		j = 0;
+		random_bytes = net_random();
+		for (i = 0; i < RANDOM_SIZE; i++)
+			rarray[i] = i + skip_low;
+		for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
+			if (j >= RANDOM_SIZE) {
+				j = 0;
+				random_bytes = net_random();
+			}
+			idx = (random_bytes >> (j * 2)) & 0xF;
+			__kfifo_put(*fifo,
+				(unsigned char *) &rarray[idx],
+				sizeof(u32));
+			rarray[idx] = i;
+			j++;	
+		}
+		for (i = 0; i < RANDOM_SIZE; i++)
+			__kfifo_put(*fifo,
+				(unsigned char *) &rarray[i],
+				sizeof(u32));
+	} else
+		for (i = skip_low; i < nr - skip_high; i++)
+			__kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32));
+
+	for (i = 0; i < skip_low + skip_high; i++)
+		kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32));
+	return 0;
+}
+
+static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock,
+				   u32 nr, u32 skip_low, u32 skip_high)
+{
+	return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
+					  skip_high, 0));
+}
+
+static int cxio_init_resource_fifo_random(struct kfifo **fifo,
+				   spinlock_t * fifo_lock,
+				   u32 nr, u32 skip_low, u32 skip_high)
+{
+
+	return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
+					  skip_high, 1));
+}
+
+static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
+{
+	u32 i;
+
+	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
+
+	rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32),
+					      GFP_KERNEL,
+					      &rdev_p->rscp->qpid_fifo_lock);
+	if (IS_ERR(rdev_p->rscp->qpid_fifo))
+		return -ENOMEM;
+
+	for (i = 16; i < T3_MAX_NUM_QP; i++)
+		if (!(i & rdev_p->qpmask))
+			__kfifo_put(rdev_p->rscp->qpid_fifo,
+				    (unsigned char *) &i, sizeof(u32));
+	return 0;
+}
+
+int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
+{
+	return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1,
+				       0);
+}
+
+void cxio_hal_destroy_rhdl_resource(void)
+{
+	kfifo_free(rhdl_fifo);
+}
+
+/* nr_* must be power of 2 */
+int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
+			   u32 nr_tpt, u32 nr_pbl,
+			   u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid)
+{
+	int err = 0;
+	struct cxio_hal_resource *rscp;
+
+	rscp = kmalloc(sizeof(*rscp), GFP_KERNEL);
+	if (!rscp)
+		return -ENOMEM;
+	rdev_p->rscp = rscp;
+	err = cxio_init_resource_fifo_random(&rscp->tpt_fifo,
+				      &rscp->tpt_fifo_lock,
+				      nr_tpt, 1, 0);
+	if (err)
+		goto tpt_err;
+	err = cxio_init_qpid_fifo(rdev_p);
+	if (err)
+		goto qpid_err;
+	err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock,
+				      nr_cqid, 1, 0);
+	if (err)
+		goto cqid_err;
+	err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock,
+				      nr_pdid, 1, 0);
+	if (err)
+		goto pdid_err;
+	return 0;
+pdid_err:
+	kfifo_free(rscp->cqid_fifo);
+cqid_err:
+	kfifo_free(rscp->qpid_fifo);
+qpid_err:
+	kfifo_free(rscp->tpt_fifo);
+tpt_err:
+	return -ENOMEM;
+}
+
+/*
+ * returns 0 if no resource available
+ */
+static inline u32 cxio_hal_get_resource(struct kfifo *fifo)
+{
+	u32 entry;
+	if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
+		return entry;
+	else
+		return 0;	/* fifo emptry */
+}
+
+static inline void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
+{
+	BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
+}
+
+u32 cxio_hal_get_rhdl(void)
+{
+	return cxio_hal_get_resource(rhdl_fifo);
+}
+
+void cxio_hal_put_rhdl(u32 rhdl)
+{
+	cxio_hal_put_resource(rhdl_fifo, rhdl);
+}
+
+u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
+{
+	return cxio_hal_get_resource(rscp->tpt_fifo);
+}
+
+void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
+{
+	cxio_hal_put_resource(rscp->tpt_fifo, stag);
+}
+
+u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
+{
+	u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
+	PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+	return qpid;
+}
+
+void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
+{
+	PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+	cxio_hal_put_resource(rscp->qpid_fifo, qpid);
+}
+
+u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
+{
+	return cxio_hal_get_resource(rscp->cqid_fifo);
+}
+
+void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
+{
+	cxio_hal_put_resource(rscp->cqid_fifo, cqid);
+}
+
+u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
+{
+	return cxio_hal_get_resource(rscp->pdid_fifo);
+}
+
+void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
+{
+	cxio_hal_put_resource(rscp->pdid_fifo, pdid);
+}
+
+void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
+{
+	kfifo_free(rscp->tpt_fifo);
+	kfifo_free(rscp->cqid_fifo);
+	kfifo_free(rscp->qpid_fifo);
+	kfifo_free(rscp->pdid_fifo);
+	kfree(rscp);
+}
+
+/*
+ * PBL Memory Manager.  Uses Linux generic allocator.
+ */
+
+#define MIN_PBL_SHIFT 8			/* 256B == min PBL size (32 entries) */
+#define PBL_CHUNK 2*1024*1024 		
+
+u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
+{
+	unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
+	PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size);
+	return (u32)addr;
+}
+
+void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
+{
+	PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size);
+	gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
+}
+
+int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
+{
+	unsigned long i;
+	rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
+	if (rdev_p->pbl_pool)
+		for (i = rdev_p->rnic_info.pbl_base;
+		     i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1;
+		     i += PBL_CHUNK)
+			gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1);
+	return rdev_p->pbl_pool ? 0 : -ENOMEM;
+}
+
+void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
+{
+	gen_pool_destroy(rdev_p->pbl_pool);
+}
+
+/*
+ * RQT Memory Manager.  Uses Linux generic allocator.
+ */
+
+#define MIN_RQT_SHIFT 10	/* 1KB == mini RQT size (16 entries) */
+#define RQT_CHUNK 2*1024*1024 		
+
+u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
+{
+	unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
+	PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size << 6);
+	return (u32)addr;
+}
+
+void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
+{
+	PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size << 6);
+	gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
+}
+
+int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p)
+{
+	unsigned long i;
+	rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
+	if (rdev_p->rqt_pool)
+		for (i = rdev_p->rnic_info.rqt_base;
+		     i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1;
+		     i += RQT_CHUNK)
+			gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1);
+	return rdev_p->rqt_pool ? 0 : -ENOMEM;
+}
+
+void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
+{
+	gen_pool_destroy(rdev_p->rqt_pool);
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/core/cxio_resource.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/core/cxio_resource.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __CXIO_RESOURCE_H__
+#define __CXIO_RESOURCE_H__
+
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/kfifo.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/genalloc.h>
+#include "cxio_hal.h"
+
+extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
+extern void cxio_hal_destroy_rhdl_resource(void);
+extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
+				  u32 nr_tpt, u32 nr_pbl,
+				  u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
+				  u32 nr_pdid);
+extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
+extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
+extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
+extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
+
+#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base )
+extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p);
+extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p);
+extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size);
+extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
+
+#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base )
+extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p);
+extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p);
+extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size);
+extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
+#endif
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/core/cxio_wr.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/core/cxio_wr.h
@@ -0,0 +1,686 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __CXIO_WR_H__
+#define __CXIO_WR_H__
+
+#include <asm/io.h>
+#include <linux/pci.h>
+#include <linux/timer.h>
+#include "firmware_exports.h"
+
+#define T3_MAX_SGE      4
+#define T3_MAX_INLINE	64
+
+#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
+#define Q_FULL(rptr,wptr,size_log2)  ( (((wptr)-(rptr))>>(size_log2)) && \
+				       ((rptr)!=(wptr)) )
+#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1))
+#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr)))
+#define Q_COUNT(rptr,wptr) ((wptr)-(rptr))
+#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1))
+
+static inline void ring_doorbell(void __iomem *doorbell, u32 qpid)
+{
+	writel(((1<<31) | qpid), doorbell);
+}
+
+#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 ))
+
+enum t3_wr_flags {
+	T3_COMPLETION_FLAG = 0x01,
+	T3_NOTIFY_FLAG = 0x02,
+	T3_SOLICITED_EVENT_FLAG = 0x04,
+	T3_READ_FENCE_FLAG = 0x08,
+	T3_LOCAL_FENCE_FLAG = 0x10
+} __attribute__ ((packed));
+
+enum t3_wr_opcode {
+	T3_WR_BP = FW_WROPCODE_RI_BYPASS,
+	T3_WR_SEND = FW_WROPCODE_RI_SEND,
+	T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE,
+	T3_WR_READ = FW_WROPCODE_RI_RDMA_READ,
+	T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV,
+	T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
+	T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
+	T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
+	T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP
+} __attribute__ ((packed));
+
+enum t3_rdma_opcode {
+	T3_RDMA_WRITE,		/* IETF RDMAP v1.0 ... */
+	T3_READ_REQ,
+	T3_READ_RESP,
+	T3_SEND,
+	T3_SEND_WITH_INV,
+	T3_SEND_WITH_SE,
+	T3_SEND_WITH_SE_INV,
+	T3_TERMINATE,
+	T3_RDMA_INIT,		/* CHELSIO RI specific ... */
+	T3_BIND_MW,
+	T3_FAST_REGISTER,
+	T3_LOCAL_INV,
+	T3_QP_MOD,
+	T3_BYPASS
+} __attribute__ ((packed));
+
+static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
+{
+	switch (wrop) {
+		case T3_WR_BP: return T3_BYPASS;
+		case T3_WR_SEND: return T3_SEND;
+		case T3_WR_WRITE: return T3_RDMA_WRITE;
+		case T3_WR_READ: return T3_READ_REQ;
+		case T3_WR_INV_STAG: return T3_LOCAL_INV;
+		case T3_WR_BIND: return T3_BIND_MW;
+		case T3_WR_INIT: return T3_RDMA_INIT;
+		case T3_WR_QP_MOD: return T3_QP_MOD;
+		default: break;
+	}
+	return -1;
+}
+
+
+/* Work request id */
+union t3_wrid {
+	struct {
+		u32 hi;
+		u32 low;
+	} id0;
+	u64 id1;
+};
+
+#define WRID(wrid)      	(wrid.id1)
+#define WRID_GEN(wrid)		(wrid.id0.wr_gen)
+#define WRID_IDX(wrid)		(wrid.id0.wr_idx)
+#define WRID_LO(wrid)		(wrid.id0.wr_lo)
+
+struct fw_riwrh {
+	__be32 op_seop_flags;
+	__be32 gen_tid_len;
+};
+
+#define S_FW_RIWR_OP		24
+#define M_FW_RIWR_OP		0xff
+#define V_FW_RIWR_OP(x)		((x) << S_FW_RIWR_OP)
+#define G_FW_RIWR_OP(x)   	((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP)
+
+#define S_FW_RIWR_SOPEOP	22
+#define M_FW_RIWR_SOPEOP	0x3
+#define V_FW_RIWR_SOPEOP(x)	((x) << S_FW_RIWR_SOPEOP)
+
+#define S_FW_RIWR_FLAGS		8
+#define M_FW_RIWR_FLAGS		0x3fffff
+#define V_FW_RIWR_FLAGS(x)	((x) << S_FW_RIWR_FLAGS)
+#define G_FW_RIWR_FLAGS(x)   	((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS)
+
+#define S_FW_RIWR_TID		8
+#define V_FW_RIWR_TID(x)	((x) << S_FW_RIWR_TID)
+
+#define S_FW_RIWR_LEN		0
+#define V_FW_RIWR_LEN(x)	((x) << S_FW_RIWR_LEN)
+
+#define S_FW_RIWR_GEN           31
+#define V_FW_RIWR_GEN(x)        ((x)  << S_FW_RIWR_GEN)
+
+struct t3_sge {
+	__be32 stag;
+	__be32 len;
+	__be64 to;
+};
+
+/* If num_sgle is zero, flit 5+ contains immediate data.*/
+struct t3_send_wr {
+	struct fw_riwrh wrh;	/* 0 */
+	union t3_wrid wrid;	/* 1 */
+
+	u8 rdmaop;		/* 2 */
+	u8 reserved[3];
+	__be32 rem_stag;	
+	__be32 plen;		/* 3 */
+	__be32 num_sgle;
+	struct t3_sge sgl[T3_MAX_SGE];	/* 4+ */
+};
+
+struct t3_local_inv_wr {
+	struct fw_riwrh wrh;	/* 0 */
+	union t3_wrid wrid;	/* 1 */
+	__be32 stag;		/* 2 */
+	__be32 reserved3;
+};
+
+struct t3_rdma_write_wr {
+	struct fw_riwrh wrh;	/* 0 */
+	union t3_wrid wrid;	/* 1 */
+	u8 rdmaop;		/* 2 */
+	u8 reserved[3];
+	__be32 stag_sink;
+	__be64 to_sink;		/* 3 */
+	__be32 plen;		/* 4 */
+	__be32 num_sgle;
+	struct t3_sge sgl[T3_MAX_SGE];	/* 5+ */
+};
+
+struct t3_rdma_read_wr {
+	struct fw_riwrh wrh;	/* 0 */
+	union t3_wrid wrid;	/* 1 */
+	u8 rdmaop;		/* 2 */
+	u8 reserved[3];
+	__be32 rem_stag;
+	__be64 rem_to;		/* 3 */
+	__be32 local_stag;	/* 4 */
+	__be32 local_len;
+	__be64 local_to;	/* 5 */
+};
+
+enum t3_addr_type {
+	T3_VA_BASED_TO = 0x0,
+	T3_ZERO_BASED_TO = 0x1
+} __attribute__ ((packed));
+
+enum t3_mem_perms {
+	T3_MEM_ACCESS_LOCAL_READ = 0x1,
+	T3_MEM_ACCESS_LOCAL_WRITE = 0x2,
+	T3_MEM_ACCESS_REM_READ = 0x4,
+	T3_MEM_ACCESS_REM_WRITE = 0x8
+} __attribute__ ((packed));
+
+struct t3_bind_mw_wr {
+	struct fw_riwrh wrh;	/* 0 */
+	union t3_wrid wrid;	/* 1 */
+	u16 reserved;		/* 2 */
+	u8 type;
+	u8 perms;
+	__be32 mr_stag;
+	__be32 mw_stag;		/* 3 */
+	__be32 mw_len;
+	__be64 mw_va;		/* 4 */
+	__be32 mr_pbl_addr;	/* 5 */
+	u8 reserved2[3];
+	u8 mr_pagesz;
+};
+
+struct t3_receive_wr {
+	struct fw_riwrh wrh;	/* 0 */
+	union t3_wrid wrid;	/* 1 */
+	u8 pagesz[T3_MAX_SGE];
+	__be32 num_sgle;		/* 2 */
+	struct t3_sge sgl[T3_MAX_SGE];	/* 3+ */
+	__be32 pbl_addr[T3_MAX_SGE];
+};
+
+struct t3_bypass_wr {
+	struct fw_riwrh wrh;
+	union t3_wrid wrid;	/* 1 */
+};
+
+struct t3_modify_qp_wr {
+	struct fw_riwrh wrh;	/* 0 */
+	union t3_wrid wrid;	/* 1 */
+	__be32 flags;		/* 2 */
+	__be32 quiesce;		/* 2 */
+	__be32 max_ird;		/* 3 */
+	__be32 max_ord;		/* 3 */
+	__be64 sge_cmd;		/* 4 */
+	__be64 ctx1;		/* 5 */
+	__be64 ctx0;		/* 6 */
+};
+
+enum t3_modify_qp_flags {
+	MODQP_QUIESCE  = 0x01,
+	MODQP_MAX_IRD  = 0x02,
+	MODQP_MAX_ORD  = 0x04,
+	MODQP_WRITE_EC = 0x08,
+	MODQP_READ_EC  = 0x10,
+};
+	
+
+enum t3_mpa_attrs {
+	uP_RI_MPA_RX_MARKER_ENABLE = 0x1,
+	uP_RI_MPA_TX_MARKER_ENABLE = 0x2,
+	uP_RI_MPA_CRC_ENABLE = 0x4,
+	uP_RI_MPA_IETF_ENABLE = 0x8
+} __attribute__ ((packed));
+
+enum t3_qp_caps {
+	uP_RI_QP_RDMA_READ_ENABLE = 0x01,
+	uP_RI_QP_RDMA_WRITE_ENABLE = 0x02,
+	uP_RI_QP_BIND_ENABLE = 0x04,
+	uP_RI_QP_FAST_REGISTER_ENABLE = 0x08,
+	uP_RI_QP_STAG0_ENABLE = 0x10
+} __attribute__ ((packed));
+
+struct t3_rdma_init_attr {
+	u32 tid;
+	u32 qpid;
+	u32 pdid;
+	u32 scqid;
+	u32 rcqid;
+	u32 rq_addr;
+	u32 rq_size;
+	enum t3_mpa_attrs mpaattrs;
+	enum t3_qp_caps qpcaps;
+	u16 tcp_emss;
+	u32 ord;
+	u32 ird;
+	u64 qp_dma_addr;
+	u32 qp_dma_size;
+	u32 flags;
+	u32 irs;
+};
+
+struct t3_rdma_init_wr {
+	struct fw_riwrh wrh;	/* 0 */
+	union t3_wrid wrid;	/* 1 */
+	__be32 qpid;		/* 2 */
+	__be32 pdid;
+	__be32 scqid;		/* 3 */
+	__be32 rcqid;
+	__be32 rq_addr;		/* 4 */
+	__be32 rq_size;
+	u8 mpaattrs;		/* 5 */
+	u8 qpcaps;
+	__be16 ulpdu_size;
+	__be32 flags;		/* bits 31-1 - reservered */
+				/* bit     0 - set if RECV posted */
+	__be32 ord;		/* 6 */
+	__be32 ird;
+	__be64 qp_dma_addr;	/* 7 */
+	__be32 qp_dma_size;	/* 8 */
+	u32 irs;
+};
+
+struct t3_genbit {
+	u64 flit[15];
+	__be64 genbit;
+};
+
+enum rdma_init_wr_flags {
+	RECVS_POSTED = 1,
+};
+
+union t3_wr {
+	struct t3_send_wr send;
+	struct t3_rdma_write_wr write;
+	struct t3_rdma_read_wr read;
+	struct t3_receive_wr recv;
+	struct t3_local_inv_wr local_inv;
+	struct t3_bind_mw_wr bind;
+	struct t3_bypass_wr bypass;
+	struct t3_rdma_init_wr init;
+	struct t3_modify_qp_wr qp_mod;
+	struct t3_genbit genbit;
+	u64 flit[16];
+};
+
+#define T3_SQ_CQE_FLIT 	  13
+#define T3_SQ_COOKIE_FLIT 14
+
+#define T3_RQ_COOKIE_FLIT 13
+#define T3_RQ_CQE_FLIT 	  14
+
+static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
+{
+	return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
+}
+
+static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
+				  enum t3_wr_flags flags, u8 genbit, u32 tid,
+				  u8 len)
+{
+	wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
+					 V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) |
+					 V_FW_RIWR_FLAGS(flags));
+	wmb();
+	wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
+				       V_FW_RIWR_TID(tid) |
+				       V_FW_RIWR_LEN(len));
+	/* 2nd gen bit... */
+        ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit);
+}
+
+/*
+ * T3 ULP2_TX commands
+ */
+enum t3_utx_mem_op {
+	T3_UTX_MEM_READ = 2,
+	T3_UTX_MEM_WRITE = 3
+};
+
+/* T3 MC7 RDMA TPT entry format */
+
+enum tpt_mem_type {
+	TPT_NON_SHARED_MR = 0x0,
+	TPT_SHARED_MR = 0x1,
+	TPT_MW = 0x2,
+	TPT_MW_RELAXED_PROTECTION = 0x3
+};
+
+enum tpt_addr_type {
+	TPT_ZBTO = 0,
+	TPT_VATO = 1
+};
+
+enum tpt_mem_perm {
+	TPT_LOCAL_READ = 0x8,
+	TPT_LOCAL_WRITE = 0x4,
+	TPT_REMOTE_READ = 0x2,
+	TPT_REMOTE_WRITE = 0x1
+};
+
+struct tpt_entry {
+	__be32 valid_stag_pdid;
+	__be32 flags_pagesize_qpid;
+
+	__be32 rsvd_pbl_addr;
+	__be32 len;
+	__be32 va_hi;
+	__be32 va_low_or_fbo;
+
+	__be32 rsvd_bind_cnt_or_pstag;
+	__be32 rsvd_pbl_size;
+};
+
+#define S_TPT_VALID		31
+#define V_TPT_VALID(x)		((x) << S_TPT_VALID)
+#define F_TPT_VALID		V_TPT_VALID(1U)
+
+#define S_TPT_STAG_KEY		23
+#define M_TPT_STAG_KEY		0xFF
+#define V_TPT_STAG_KEY(x)	((x) << S_TPT_STAG_KEY)
+#define G_TPT_STAG_KEY(x)	(((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY)
+
+#define S_TPT_STAG_STATE	22
+#define V_TPT_STAG_STATE(x)	((x) << S_TPT_STAG_STATE)
+#define F_TPT_STAG_STATE	V_TPT_STAG_STATE(1U)
+
+#define S_TPT_STAG_TYPE		20
+#define M_TPT_STAG_TYPE		0x3
+#define V_TPT_STAG_TYPE(x)	((x) << S_TPT_STAG_TYPE)
+#define G_TPT_STAG_TYPE(x)	(((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE)
+
+#define S_TPT_PDID		0
+#define M_TPT_PDID		0xFFFFF
+#define V_TPT_PDID(x)		((x) << S_TPT_PDID)
+#define G_TPT_PDID(x)		(((x) >> S_TPT_PDID) & M_TPT_PDID)
+
+#define S_TPT_PERM		28
+#define M_TPT_PERM		0xF
+#define V_TPT_PERM(x)		((x) << S_TPT_PERM)
+#define G_TPT_PERM(x)		(((x) >> S_TPT_PERM) & M_TPT_PERM)
+
+#define S_TPT_REM_INV_DIS	27
+#define V_TPT_REM_INV_DIS(x)	((x) << S_TPT_REM_INV_DIS)
+#define F_TPT_REM_INV_DIS	V_TPT_REM_INV_DIS(1U)
+
+#define S_TPT_ADDR_TYPE		26
+#define V_TPT_ADDR_TYPE(x)	((x) << S_TPT_ADDR_TYPE)
+#define F_TPT_ADDR_TYPE		V_TPT_ADDR_TYPE(1U)
+
+#define S_TPT_MW_BIND_ENABLE	25
+#define V_TPT_MW_BIND_ENABLE(x)	((x) << S_TPT_MW_BIND_ENABLE)
+#define F_TPT_MW_BIND_ENABLE    V_TPT_MW_BIND_ENABLE(1U)
+
+#define S_TPT_PAGE_SIZE		20
+#define M_TPT_PAGE_SIZE		0x1F
+#define V_TPT_PAGE_SIZE(x)	((x) << S_TPT_PAGE_SIZE)
+#define G_TPT_PAGE_SIZE(x)	(((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE)
+
+#define S_TPT_PBL_ADDR		0
+#define M_TPT_PBL_ADDR		0x1FFFFFFF
+#define V_TPT_PBL_ADDR(x)	((x) << S_TPT_PBL_ADDR)
+#define G_TPT_PBL_ADDR(x)       (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR)
+
+#define S_TPT_QPID		0
+#define M_TPT_QPID		0xFFFFF
+#define V_TPT_QPID(x)		((x) << S_TPT_QPID)
+#define G_TPT_QPID(x)		(((x) >> S_TPT_QPID) & M_TPT_QPID)
+
+#define S_TPT_PSTAG		0
+#define M_TPT_PSTAG		0xFFFFFF
+#define V_TPT_PSTAG(x)		((x) << S_TPT_PSTAG)
+#define G_TPT_PSTAG(x)		(((x) >> S_TPT_PSTAG) & M_TPT_PSTAG)
+
+#define S_TPT_PBL_SIZE		0
+#define M_TPT_PBL_SIZE		0xFFFFF
+#define V_TPT_PBL_SIZE(x)	((x) << S_TPT_PBL_SIZE)
+#define G_TPT_PBL_SIZE(x)	(((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE)
+
+/*
+ * CQE defs
+ */
+struct t3_cqe {
+	__be32 header;
+	__be32 len;
+	union {
+		struct {
+			__be32 stag;
+			__be32 msn;
+		} rcqe;
+		struct {
+			u32 wrid_hi;	
+			u32 wrid_low;
+		} scqe;
+	} u;
+};
+
+#define S_CQE_OOO	  31
+#define M_CQE_OOO	  0x1
+#define G_CQE_OOO(x)	  ((((x) >> S_CQE_OOO)) & M_CQE_OOO)
+#define V_CEQ_OOO(x)	  ((x)<<S_CQE_OOO)
+
+#define S_CQE_QPID        12
+#define M_CQE_QPID        0x7FFFF
+#define G_CQE_QPID(x)     ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
+#define V_CQE_QPID(x) 	  ((x)<<S_CQE_QPID)
+
+#define S_CQE_SWCQE       11
+#define M_CQE_SWCQE       0x1
+#define G_CQE_SWCQE(x)    ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
+#define V_CQE_SWCQE(x) 	  ((x)<<S_CQE_SWCQE)
+
+#define S_CQE_GENBIT      10
+#define M_CQE_GENBIT      0x1
+#define G_CQE_GENBIT(x)   (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
+#define V_CQE_GENBIT(x)	  ((x)<<S_CQE_GENBIT)
+
+#define S_CQE_STATUS      5
+#define M_CQE_STATUS      0x1F
+#define G_CQE_STATUS(x)   ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
+#define V_CQE_STATUS(x)   ((x)<<S_CQE_STATUS)
+
+#define S_CQE_TYPE        4
+#define M_CQE_TYPE        0x1
+#define G_CQE_TYPE(x)     ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
+#define V_CQE_TYPE(x)     ((x)<<S_CQE_TYPE)
+
+#define S_CQE_OPCODE      0
+#define M_CQE_OPCODE      0xF
+#define G_CQE_OPCODE(x)   ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
+#define V_CQE_OPCODE(x)   ((x)<<S_CQE_OPCODE)
+
+#define SW_CQE(x)         (G_CQE_SWCQE(be32_to_cpu((x).header)))
+#define CQE_OOO(x)        (G_CQE_OOO(be32_to_cpu((x).header)))
+#define CQE_QPID(x)       (G_CQE_QPID(be32_to_cpu((x).header)))
+#define CQE_GENBIT(x)     (G_CQE_GENBIT(be32_to_cpu((x).header)))
+#define CQE_TYPE(x)       (G_CQE_TYPE(be32_to_cpu((x).header)))
+#define SQ_TYPE(x)	  (CQE_TYPE((x)))
+#define RQ_TYPE(x)	  (!CQE_TYPE((x)))
+#define CQE_STATUS(x)     (G_CQE_STATUS(be32_to_cpu((x).header)))
+#define CQE_OPCODE(x)     (G_CQE_OPCODE(be32_to_cpu((x).header)))
+
+#define CQE_LEN(x)        (be32_to_cpu((x).len))
+
+/* used for RQ completion processing */
+#define CQE_WRID_STAG(x)  (be32_to_cpu((x).u.rcqe.stag))
+#define CQE_WRID_MSN(x)   (be32_to_cpu((x).u.rcqe.msn))
+
+/* used for SQ completion processing */
+#define CQE_WRID_SQ_WPTR(x)	((x).u.scqe.wrid_hi)
+#define CQE_WRID_WPTR(x)   	((x).u.scqe.wrid_low)
+
+/* generic accessor macros */
+#define CQE_WRID_HI(x)		((x).u.scqe.wrid_hi)
+#define CQE_WRID_LOW(x) 	((x).u.scqe.wrid_low)
+
+#define TPT_ERR_SUCCESS                     0x0
+#define TPT_ERR_STAG                        0x1	 /* STAG invalid: either the */
+						 /* STAG is offlimt, being 0, */
+						 /* or STAG_key mismatch */
+#define TPT_ERR_PDID                        0x2	 /* PDID mismatch */
+#define TPT_ERR_QPID                        0x3	 /* QPID mismatch */
+#define TPT_ERR_ACCESS                      0x4	 /* Invalid access right */
+#define TPT_ERR_WRAP                        0x5	 /* Wrap error */
+#define TPT_ERR_BOUND                       0x6	 /* base and bounds voilation */
+#define TPT_ERR_INVALIDATE_SHARED_MR        0x7	 /* attempt to invalidate a  */
+						 /* shared memory region */
+#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8	 /* attempt to invalidate a  */
+						 /* shared memory region */
+#define TPT_ERR_ECC                         0x9	 /* ECC error detected */
+#define TPT_ERR_ECC_PSTAG                   0xA	 /* ECC error detected when  */
+						 /* reading PSTAG for a MW  */
+						 /* Invalidate */
+#define TPT_ERR_PBL_ADDR_BOUND              0xB	 /* pbl addr out of bounds:  */
+						 /* software error */
+#define TPT_ERR_SWFLUSH			    0xC	 /* SW FLUSHED */
+#define TPT_ERR_CRC                         0x10 /* CRC error */
+#define TPT_ERR_MARKER                      0x11 /* Marker error */
+#define TPT_ERR_PDU_LEN_ERR                 0x12 /* invalid PDU length */
+#define TPT_ERR_OUT_OF_RQE                  0x13 /* out of RQE */
+#define TPT_ERR_DDP_VERSION                 0x14 /* wrong DDP version */
+#define TPT_ERR_RDMA_VERSION                0x15 /* wrong RDMA version */
+#define TPT_ERR_OPCODE                      0x16 /* invalid rdma opcode */
+#define TPT_ERR_DDP_QUEUE_NUM               0x17 /* invalid ddp queue number */
+#define TPT_ERR_MSN                         0x18 /* MSN error */
+#define TPT_ERR_TBIT                        0x19 /* tag bit not set correctly */
+#define TPT_ERR_MO                          0x1A /* MO not 0 for TERMINATE  */
+						 /* or READ_REQ */
+#define TPT_ERR_MSN_GAP                     0x1B
+#define TPT_ERR_MSN_RANGE                   0x1C
+#define TPT_ERR_IRD_OVERFLOW                0x1D
+#define TPT_ERR_RQE_ADDR_BOUND              0x1E /* RQE addr out of bounds:  */
+						 /* software error */
+#define TPT_ERR_INTERNAL_ERR                0x1F /* internal error (opcode  */
+						 /* mismatch) */
+
+struct t3_swsq {
+	__u64 			wr_id;
+	struct t3_cqe 		cqe;
+	__u32			sq_wptr;
+	__be32			read_len;
+	int 			opcode;
+	int			complete;
+	int			signaled;	
+};
+
+/*
+ * A T3 WQ implements both the SQ and RQ.
+ */
+struct t3_wq {
+	union t3_wr *queue;		/* DMA accessable memory */
+	dma_addr_t dma_addr;		/* DMA address for HW */
+	DECLARE_PCI_UNMAP_ADDR(mapping)	/* unmap kruft */
+	u32 error;			/* 1 once we go to ERROR */
+	u32 qpid;
+	u32 wptr;			/* idx to next available WR slot */
+	u32 size_log2;			/* total wq size */
+	struct t3_swsq *sq;		/* SW SQ */
+	struct t3_swsq *oldest_read;	/* tracks oldest pending read */
+	u32 sq_wptr;			/* sq_wptr - sq_rptr == count of */
+	u32 sq_rptr;			/* pending wrs */
+	u32 sq_size_log2;		/* sq size */
+	u64 *rq;			/* SW RQ (holds consumer wr_ids */
+	u32 rq_wptr;			/* rq_wptr - rq_rptr == count of */
+	u32 rq_rptr;			/* pending wrs */
+	u64 *rq_oldest_wr;		/* oldest wr on the SW RQ */
+	u32 rq_size_log2;		/* rq size */
+	u32 rq_addr;			/* rq adapter address */
+	void __iomem *doorbell;		/* kernel db */
+	u64 udb;			/* user db if any */
+};
+
+struct t3_cq {
+	u32 cqid;
+	u32 rptr;
+	u32 wptr;
+	u32 size_log2;
+	dma_addr_t dma_addr;
+	DECLARE_PCI_UNMAP_ADDR(mapping)
+	struct t3_cqe *queue;
+	struct t3_cqe *sw_queue;
+	u32 sw_rptr;
+	u32 sw_wptr;
+};
+
+#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
+					 CQE_GENBIT(*cqe))
+
+static inline void cxio_set_wq_in_error(struct t3_wq *wq)
+{
+	wq->queue->flit[13] = 1;
+}
+
+static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
+{
+	struct t3_cqe *cqe;
+
+	cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
+	if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
+		return cqe;
+	return NULL;
+}
+
+static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq)
+{
+	struct t3_cqe *cqe;
+
+	if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
+		cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
+		return cqe;
+	}
+	return NULL;
+}
+
+static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq)
+{
+	struct t3_cqe *cqe;
+
+	if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
+		cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
+		return cqe;
+	}
+	cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
+	if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
+		return cqe;
+	return NULL;
+}
+
+#endif
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "cxgb3_offload.h"
+#include "iwch_provider.h"
+#include "iwch_user.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+
+#define DRV_VERSION "1.0-ofed"
+
+MODULE_AUTHOR("Boyd Faulkner, Steve Wise");
+MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+
+static void open_rnic_dev(struct t3cdev *);
+static void close_rnic_dev(struct t3cdev *);
+
+struct cxgb3_client t3c_client = {
+	.name = "iw_cxgb3",
+	.add = open_rnic_dev,
+	.remove = close_rnic_dev,
+	.handlers = t3c_handlers,
+	.redirect = iwch_ep_redirect
+};
+
+static LIST_HEAD(dev_list);
+static DEFINE_MUTEX(dev_mutex);
+
+static void rnic_init(struct iwch_dev *rnicp)
+{
+	PDBG("%s iwch_dev %p\n", __FUNCTION__,  rnicp);
+	idr_init(&rnicp->cqidr);
+	idr_init(&rnicp->qpidr);
+	idr_init(&rnicp->mmidr);
+	spin_lock_init(&rnicp->lock);
+
+	rnicp->attr.vendor_id = 0x168;
+	rnicp->attr.vendor_part_id = 7;
+	rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
+	rnicp->attr.max_wrs = (1UL << 24) - 1;
+	rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
+	rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
+	rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
+	rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1;
+	rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
+	rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
+	rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
+	rnicp->attr.mem_pgsizes_bitmask = 0x7FFF;	/* 4KB-128MB */
+	rnicp->attr.can_resize_wq = 0;
+	rnicp->attr.max_rdma_reads_per_qp = 8;
+	rnicp->attr.max_rdma_read_resources =
+	    rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps;
+	rnicp->attr.max_rdma_read_qp_depth = 8;	/* IRD */
+	rnicp->attr.max_rdma_read_depth =
+	    rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps;
+	rnicp->attr.rq_overflow_handled = 0;
+	rnicp->attr.can_modify_ird = 0;
+	rnicp->attr.can_modify_ord = 0;
+	rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1;
+	rnicp->attr.stag0_value = 1;
+	rnicp->attr.zbva_support = 1;
+	rnicp->attr.local_invalidate_fence = 1;
+	rnicp->attr.cq_overflow_detection = 1;
+	return;
+}
+
+static void open_rnic_dev(struct t3cdev *tdev)
+{
+	struct iwch_dev *rnicp;
+	static int vers_printed;
+
+	PDBG("%s t3cdev %p\n", __FUNCTION__,  tdev);
+	if (!vers_printed++)
+		printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
+		       DRV_VERSION);
+	rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
+	if (!rnicp) {
+		printk(KERN_ERR MOD "Cannot allocate ib device\n");
+		return;
+	}
+	rnicp->rdev.ulp = rnicp;
+	rnicp->rdev.t3cdev_p = tdev;
+
+	if (cxio_rdev_open(&rnicp->rdev)) {
+		printk(KERN_ERR MOD "Unable to open CXIO rdev\n");
+		ib_dealloc_device(&rnicp->ibdev);
+		return;
+	}
+
+	rnic_init(rnicp);
+
+	mutex_lock(&dev_mutex);
+	list_add_tail(&rnicp->entry, &dev_list);
+	mutex_unlock(&dev_mutex);
+
+	if (iwch_register_device(rnicp)) {
+		printk(KERN_ERR MOD "Unable to register device\n");
+		close_rnic_dev(tdev);
+	}
+	printk(KERN_INFO MOD "Initialized device %s\n",
+	       pci_name(rnicp->rdev.rnic_info.pdev));
+	return;
+}
+
+static void close_rnic_dev(struct t3cdev *tdev)
+{
+	struct iwch_dev *dev, *tmp;
+	PDBG("%s t3cdev %p\n", __FUNCTION__,  tdev);
+	mutex_lock(&dev_mutex);
+	list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
+		if (dev->rdev.t3cdev_p == tdev) {
+			list_del(&dev->entry);
+			iwch_unregister_device(dev);
+			cxio_rdev_close(&dev->rdev);
+			idr_destroy(&dev->cqidr);
+			idr_destroy(&dev->qpidr);
+			idr_destroy(&dev->mmidr);
+			ib_dealloc_device(&dev->ibdev);
+			break;
+		}
+	}
+	mutex_unlock(&dev_mutex);
+}
+
+extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb);
+
+static int __init iwch_init_module(void)
+{
+	int err;
+
+	err = cxio_hal_init();
+	if (err)
+		return err;
+	err = iwch_cm_init();
+	if (err)
+		return err;
+	cxio_register_ev_cb(iwch_ev_dispatch);
+	cxgb3_register_client(&t3c_client);
+	return 0;
+}
+
+static void __exit iwch_exit_module(void)
+{
+	cxgb3_unregister_client(&t3c_client);
+	cxio_unregister_ev_cb(iwch_ev_dispatch);
+	iwch_cm_term();
+	cxio_hal_exit();
+}
+
+module_init(iwch_init_module);
+module_exit(iwch_exit_module);
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -0,0 +1,2097 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+
+#include <net/neighbour.h>
+#include <net/netevent.h>
+#include <net/route.h>
+
+#include "tcb.h"
+#include "cxgb3_offload.h"
+#include "iwch.h"
+#include "iwch_provider.h"
+#include "iwch_cm.h"
+
+char *states[] = {
+	"idle",
+	"listen",
+	"connecting",
+	"mpa_wait_req",
+	"mpa_req_sent",
+	"mpa_req_rcvd",
+	"mpa_rep_sent",
+	"fpdu_mode",
+	"aborting",
+	"closing",
+	"moribund",
+	"dead",
+	NULL,
+};
+
+static int ep_timeout_secs = 10;
+module_param(ep_timeout_secs, int, 0444);
+MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
+				   "in seconds (default=10)");
+
+static int mpa_rev = 1;
+module_param(mpa_rev, int, 0444);
+MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
+		 "1 is spec compliant. (default=1)");
+
+static int markers_enabled = 0;
+module_param(markers_enabled, int, 0444);
+MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
+
+static int crc_enabled = 1;
+module_param(crc_enabled, int, 0444);
+MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
+
+static int rcv_win = 256 * 1024;
+module_param(rcv_win, int, 0444);
+MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
+
+static int snd_win = 32 * 1024;
+module_param(snd_win, int, 0444);
+MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
+
+static unsigned int nocong = 0;
+module_param(nocong, uint, 0444);
+MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
+
+static unsigned int cong_flavor = 1;
+module_param(cong_flavor, uint, 0444);
+MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
+
+static void process_work(void *_work);
+static struct workqueue_struct *workq;
+DECLARE_WORK(skb_work, process_work, &skb_work);
+
+static struct sk_buff_head rxq;
+static cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS];
+
+static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
+static void ep_timeout(unsigned long arg);
+static void connect_reply_upcall(struct iwch_ep *ep, int status);
+
+static void start_ep_timer(struct iwch_ep *ep)
+{
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	if (timer_pending(&ep->timer)) {
+		PDBG("%s stopped / restarted timer ep %p\n", __FUNCTION__, ep);
+		del_timer_sync(&ep->timer);
+	} else
+		get_ep(&ep->com);
+	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
+	ep->timer.data = (unsigned long)ep;
+	ep->timer.function = ep_timeout;
+	add_timer(&ep->timer);
+}
+
+static void stop_ep_timer(struct iwch_ep *ep)
+{
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	del_timer_sync(&ep->timer);
+	put_ep(&ep->com);
+}
+
+static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
+{
+	struct cpl_tid_release *req;
+
+	skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+	if (!skb)
+		return;
+	req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
+	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
+	skb->priority = CPL_PRIORITY_SETUP;
+	tdev->send(tdev, skb);
+	return;
+}
+
+int iwch_quiesce_tid(struct iwch_ep *ep)
+{
+	struct cpl_set_tcb_field *req;
+	struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+
+	if (!skb)
+		return -ENOMEM;
+	req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
+	req->reply = 0;
+	req->cpu_idx = 0;
+	req->word = htons(W_TCB_RX_QUIESCE);
+	req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
+	req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
+
+	skb->priority = CPL_PRIORITY_DATA;
+	ep->com.tdev->send(ep->com.tdev, skb);
+	return 0;
+}
+
+int iwch_resume_tid(struct iwch_ep *ep)
+{
+	struct cpl_set_tcb_field *req;
+	struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+
+	if (!skb)
+		return -ENOMEM;
+	req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
+	req->reply = 0;
+	req->cpu_idx = 0;
+	req->word = htons(W_TCB_RX_QUIESCE);
+	req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
+	req->val = 0;
+
+	skb->priority = CPL_PRIORITY_DATA;
+	ep->com.tdev->send(ep->com.tdev, skb);
+	return 0;
+}
+
+static void set_emss(struct iwch_ep *ep, u16 opt)
+{
+	PDBG("%s ep %p opt %u\n", __FUNCTION__, ep, opt);
+	ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
+	if (G_TCPOPT_TSTAMP(opt))
+		ep->emss -= 12;
+	if (ep->emss < 128)
+		ep->emss = 128;
+	PDBG("emss=%d\n", ep->emss);
+}
+
+static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
+{
+	unsigned long flags;
+	enum iwch_ep_state state;
+
+	spin_lock_irqsave(&epc->lock, flags);
+	state = epc->state;
+	spin_unlock_irqrestore(&epc->lock, flags);
+	return state;
+}
+
+static inline void __state_set(struct iwch_ep_common *epc,
+			       enum iwch_ep_state new)
+{
+	epc->state = new;
+}
+
+static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&epc->lock, flags);
+	PDBG("%s - %s -> %s\n", __FUNCTION__, states[epc->state], states[new]);
+	__state_set(epc, new);
+	spin_unlock_irqrestore(&epc->lock, flags);
+	return;
+}
+
+static void *alloc_ep(int size, gfp_t gfp)
+{
+	struct iwch_ep_common *epc;
+
+	epc = kmalloc(size, gfp);
+	if (epc) {
+		memset(epc, 0, size);
+		kref_init(&epc->kref);
+		spin_lock_init(&epc->lock);
+		init_waitqueue_head(&epc->waitq);
+	}
+	PDBG("%s alloc ep %p\n", __FUNCTION__, epc);
+	return (void *) epc;
+}
+
+void __free_ep(struct kref *kref)
+{
+	struct iwch_ep_common *epc;
+	epc = container_of(kref, struct iwch_ep_common, kref);
+	PDBG("%s ep %p state %s\n", __FUNCTION__, epc, states[state_read(epc)]);
+	kfree(epc);
+}
+
+static void release_ep_resources(struct iwch_ep *ep)
+{
+	PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
+	cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
+	dst_release(ep->dst);
+	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+	if (ep->com.tdev->type == T3B)
+		release_tid(ep->com.tdev, ep->hwtid, NULL);
+	put_ep(&ep->com);
+}
+
+static void process_work(void *_work)
+{
+	struct sk_buff *skb = NULL;
+	void *ep;
+	struct t3cdev *tdev;
+	int ret;
+
+	while ((skb = skb_dequeue(&rxq))) {
+		ep = *((void **) (skb->cb));
+		tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
+		ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
+		if (ret & CPL_RET_BUF_DONE)
+			kfree_skb(skb);
+
+		/*
+		 * ep was referenced in sched(), and is freed here.
+		 */
+		put_ep((struct iwch_ep_common *)ep);
+	}
+}
+
+static int status2errno(int status)
+{
+	switch (status) {
+	case CPL_ERR_NONE:
+		return 0;
+	case CPL_ERR_CONN_RESET:
+		return -ECONNRESET;
+	case CPL_ERR_ARP_MISS:
+		return -EHOSTUNREACH;
+	case CPL_ERR_CONN_TIMEDOUT:
+		return -ETIMEDOUT;
+	case CPL_ERR_TCAM_FULL:
+		return -ENOMEM;
+	case CPL_ERR_CONN_EXIST:
+		return -EADDRINUSE;
+	default:
+		return -EIO;
+	}
+}
+
+/*
+ * Try and reuse skbs already allocated...
+ */
+static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
+{
+	if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
+		skb_trim(skb, 0);
+		skb_get(skb);
+	} else {
+		skb = alloc_skb(len, gfp);
+	}
+	return skb;
+}
+
+static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
+				 __be32 peer_ip, __be16 local_port,
+				 __be16 peer_port, u8 tos)
+{
+	struct rtable *rt;
+	struct flowi fl = {
+		.oif = 0,
+		.nl_u = {
+			 .ip4_u = {
+				   .daddr = peer_ip,
+				   .saddr = local_ip,
+				   .tos = tos}
+			 },
+		.proto = IPPROTO_TCP,
+		.uli_u = {
+			  .ports = {
+				    .sport = local_port,
+				    .dport = peer_port}
+			  }
+	};
+
+	if (ip_route_output_flow(&rt, &fl, NULL, 0))
+		return NULL;
+	return rt;
+}
+
+static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
+{
+	int i = 0;
+
+	while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
+		++i;
+	return i;
+}
+
+static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
+{
+	PDBG("%s t3cdev %p\n", __FUNCTION__, dev);
+	kfree_skb(skb);
+}
+
+/*
+ * Handle an ARP failure for an active open.
+ */
+static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
+{
+	printk(KERN_ERR MOD "ARP failure duing connect\n");
+	kfree_skb(skb);
+}
+
+/*
+ * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
+ * and send it along.
+ */
+static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
+{
+	struct cpl_abort_req *req = cplhdr(skb);
+
+	PDBG("%s t3cdev %p\n", __FUNCTION__, dev);
+	req->cmd = CPL_ABORT_NO_RST;
+	cxgb3_ofld_send(dev, skb);
+}
+
+static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
+{
+	struct cpl_close_con_req *req;
+	struct sk_buff *skb;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	skb = get_skb(NULL, sizeof(*req), gfp);
+	if (!skb) {
+		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	skb->priority = CPL_PRIORITY_DATA;
+	set_arp_failure_handler(skb, arp_failure_discard);
+	req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req));
+	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
+	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
+	l2t_send(ep->com.tdev, skb, ep->l2t);
+	return 0;
+}
+
+static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
+{
+	struct cpl_abort_req *req;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	skb = get_skb(skb, sizeof(*req), gfp);
+	if (!skb) {
+		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
+		       __FUNCTION__);
+		return -ENOMEM;
+	}
+	skb->priority = CPL_PRIORITY_DATA;
+	set_arp_failure_handler(skb, abort_arp_failure);
+	req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
+	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
+	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
+	req->cmd = CPL_ABORT_SEND_RST;
+	l2t_send(ep->com.tdev, skb, ep->l2t);
+	return 0;
+}
+
+static int send_connect(struct iwch_ep *ep)
+{
+	struct cpl_act_open_req *req;
+	struct sk_buff *skb;
+	u32 opt0h, opt0l, opt2;
+	unsigned int mtu_idx;
+	int wscale;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+	if (!skb) {
+		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
+		       __FUNCTION__);
+		return -ENOMEM;
+	}
+	mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
+	wscale = compute_wscale(rcv_win);
+	opt0h = V_NAGLE(0) |
+	    V_NO_CONG(nocong) |
+	    V_KEEP_ALIVE(1) |
+	    F_TCAM_BYPASS |
+	    V_WND_SCALE(wscale) |
+	    V_MSS_IDX(mtu_idx) |
+	    V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
+	opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
+	opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
+	skb->priority = CPL_PRIORITY_SETUP;
+	set_arp_failure_handler(skb, act_open_req_arp_failure);
+
+	req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req));
+	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
+	req->local_port = ep->com.local_addr.sin_port;
+	req->peer_port = ep->com.remote_addr.sin_port;
+	req->local_ip = ep->com.local_addr.sin_addr.s_addr;
+	req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
+	req->opt0h = htonl(opt0h);
+	req->opt0l = htonl(opt0l);
+	req->params = 0;
+	req->opt2 = htonl(opt2);
+	l2t_send(ep->com.tdev, skb, ep->l2t);
+	return 0;
+}
+
+static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
+{
+	int mpalen;
+	struct tx_data_wr *req;
+	struct mpa_message *mpa;
+	int len;
+
+	PDBG("%s ep %p pd_len %d\n", __FUNCTION__, ep, ep->plen);
+
+	BUG_ON(skb_cloned(skb));
+
+	mpalen = sizeof(*mpa) + ep->plen;
+	if (skb->data + mpalen + sizeof(*req) > skb->end) {
+		kfree_skb(skb);
+		skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
+		if (!skb) {
+			connect_reply_upcall(ep, -ENOMEM);
+			return;
+		}
+	}
+	skb_trim(skb, 0);
+	skb_reserve(skb, sizeof(*req));
+	skb_put(skb, mpalen);
+	skb->priority = CPL_PRIORITY_DATA;
+	mpa = (struct mpa_message *) skb->data;
+	memset(mpa, 0, sizeof(*mpa));
+	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
+	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
+		     (markers_enabled ? MPA_MARKERS : 0);
+	mpa->private_data_size = htons(ep->plen);
+	mpa->revision = mpa_rev;
+
+	if (ep->plen)
+		memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
+
+	/*
+	 * Reference the mpa skb.  This ensures the data area
+	 * will remain in memory until the hw acks the tx.
+	 * Function tx_ack() will deref it.
+	 */
+	skb_get(skb);
+	set_arp_failure_handler(skb, arp_failure_discard);
+	skb->h.raw = skb->data;
+	len = skb->len;
+	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
+	req->len = htonl(len);
+	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
+			   V_TX_SNDBUF(snd_win>>15));
+	req->flags = htonl(F_TX_INIT);
+	req->sndseq = htonl(ep->snd_seq);
+	BUG_ON(ep->mpa_skb);
+	ep->mpa_skb = skb;
+	l2t_send(ep->com.tdev, skb, ep->l2t);
+	start_ep_timer(ep);
+	state_set(&ep->com, MPA_REQ_SENT);
+	return;
+}
+
+static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
+{
+	int mpalen;
+	struct tx_data_wr *req;
+	struct mpa_message *mpa;
+	struct sk_buff *skb;
+
+	PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen);
+
+	mpalen = sizeof(*mpa) + plen;
+
+	skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
+	if (!skb) {
+		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	skb_reserve(skb, sizeof(*req));
+	mpa = (struct mpa_message *) skb_put(skb, mpalen);
+	memset(mpa, 0, sizeof(*mpa));
+	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+	mpa->flags = MPA_REJECT;
+	mpa->revision = mpa_rev;
+	mpa->private_data_size = htons(plen);
+	if (plen)
+		memcpy(mpa->private_data, pdata, plen);
+
+	/*
+	 * Reference the mpa skb again.  This ensures the data area
+	 * will remain in memory until the hw acks the tx.
+	 * Function tx_ack() will deref it.
+	 */
+	skb_get(skb);
+	skb->priority = CPL_PRIORITY_DATA;
+	set_arp_failure_handler(skb, arp_failure_discard);
+	skb->h.raw = skb->data;
+	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
+	req->len = htonl(mpalen);
+	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
+			   V_TX_SNDBUF(snd_win>>15));
+	req->flags = htonl(F_TX_INIT);
+	req->sndseq = htonl(ep->snd_seq);
+	BUG_ON(ep->mpa_skb);
+	ep->mpa_skb = skb;
+	l2t_send(ep->com.tdev, skb, ep->l2t);
+	return 0;
+}
+
+static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
+{
+	int mpalen;
+	struct tx_data_wr *req;
+	struct mpa_message *mpa;
+	int len;
+	struct sk_buff *skb;
+
+	PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen);
+
+	mpalen = sizeof(*mpa) + plen;
+
+	skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
+	if (!skb) {
+		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	skb->priority = CPL_PRIORITY_DATA;
+	skb_reserve(skb, sizeof(*req));
+	mpa = (struct mpa_message *) skb_put(skb, mpalen);
+	memset(mpa, 0, sizeof(*mpa));
+	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
+		     (markers_enabled ? MPA_MARKERS : 0);
+	mpa->revision = mpa_rev;
+	mpa->private_data_size = htons(plen);
+	if (plen)
+		memcpy(mpa->private_data, pdata, plen);
+
+	/*
+	 * Reference the mpa skb.  This ensures the data area
+	 * will remain in memory until the hw acks the tx.
+	 * Function tx_ack() will deref it.
+	 */
+	skb_get(skb);
+	set_arp_failure_handler(skb, arp_failure_discard);
+	skb->h.raw = skb->data;
+	len = skb->len;
+	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
+	req->len = htonl(len);
+	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
+			   V_TX_SNDBUF(snd_win>>15));
+	req->flags = htonl(F_TX_INIT);
+	req->sndseq = htonl(ep->snd_seq);
+	ep->mpa_skb = skb;
+	state_set(&ep->com, MPA_REP_SENT);
+	l2t_send(ep->com.tdev, skb, ep->l2t);
+	return 0;
+}
+
+static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *ep = ctx;
+	struct cpl_act_establish *req = cplhdr(skb);
+	unsigned int tid = GET_TID(req);
+
+	PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, tid);
+
+	dst_confirm(ep->dst);
+
+	/* setup the hwtid for this connection */
+	ep->hwtid = tid;
+	cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
+
+	ep->snd_seq = ntohl(req->snd_isn);
+	ep->rcv_seq = ntohl(req->rcv_isn);
+
+	set_emss(ep, ntohs(req->tcp_opt));
+
+	/* dealloc the atid */
+	cxgb3_free_atid(ep->com.tdev, ep->atid);
+
+	/* start MPA negotiation */
+	send_mpa_req(ep, skb);
+
+	return 0;
+}
+
+static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
+{
+	PDBG("%s ep %p\n", __FILE__, ep);
+	state_set(&ep->com, ABORTING);
+	send_abort(ep, skb, gfp);
+}
+
+static void close_complete_upcall(struct iwch_ep *ep)
+{
+	struct iw_cm_event event;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	memset(&event, 0, sizeof(event));
+	event.event = IW_CM_EVENT_CLOSE;
+	if (ep->com.cm_id) {
+		PDBG("close complete delivered ep %p cm_id %p tid %d\n",
+		     ep, ep->com.cm_id, ep->hwtid);
+		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+		ep->com.cm_id->rem_ref(ep->com.cm_id);
+		ep->com.cm_id = NULL;
+		ep->com.qp = NULL;
+	}
+}
+
+static void peer_close_upcall(struct iwch_ep *ep)
+{
+	struct iw_cm_event event;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	memset(&event, 0, sizeof(event));
+	event.event = IW_CM_EVENT_DISCONNECT;
+	if (ep->com.cm_id) {
+		PDBG("peer close delivered ep %p cm_id %p tid %d\n",
+		     ep, ep->com.cm_id, ep->hwtid);
+		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+	}
+}
+
+static void peer_abort_upcall(struct iwch_ep *ep)
+{
+	struct iw_cm_event event;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	memset(&event, 0, sizeof(event));
+	event.event = IW_CM_EVENT_CLOSE;
+	event.status = -ECONNRESET;
+	if (ep->com.cm_id) {
+		PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
+		     ep->com.cm_id, ep->hwtid);
+		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+		ep->com.cm_id->rem_ref(ep->com.cm_id);
+		ep->com.cm_id = NULL;
+		ep->com.qp = NULL;
+	}
+}
+
+static void connect_reply_upcall(struct iwch_ep *ep, int status)
+{
+	struct iw_cm_event event;
+
+	PDBG("%s ep %p status %d\n", __FUNCTION__, ep, status);
+	memset(&event, 0, sizeof(event));
+	event.event = IW_CM_EVENT_CONNECT_REPLY;
+	event.status = status;
+	event.local_addr = ep->com.local_addr;
+	event.remote_addr = ep->com.remote_addr;
+
+	if ((status == 0) || (status == -ECONNREFUSED)) {
+		event.private_data_len = ep->plen;
+		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+	}
+	if (ep->com.cm_id) {
+		PDBG("%s ep %p tid %d status %d\n", __FUNCTION__, ep,
+		     ep->hwtid, status);
+		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+	}
+	if (status < 0) {
+		ep->com.cm_id->rem_ref(ep->com.cm_id);
+		ep->com.cm_id = NULL;
+		ep->com.qp = NULL;
+	}
+}
+
+static void connect_request_upcall(struct iwch_ep *ep)
+{
+	struct iw_cm_event event;
+
+	PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
+	memset(&event, 0, sizeof(event));
+	event.event = IW_CM_EVENT_CONNECT_REQUEST;
+	event.local_addr = ep->com.local_addr;
+	event.remote_addr = ep->com.remote_addr;
+	event.private_data_len = ep->plen;
+	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+	event.provider_data = ep;
+	if (state_read(&ep->parent_ep->com) != DEAD)
+		ep->parent_ep->com.cm_id->event_handler(
+						ep->parent_ep->com.cm_id,
+						&event);
+	put_ep(&ep->parent_ep->com);
+	ep->parent_ep = NULL;
+}
+
+static void established_upcall(struct iwch_ep *ep)
+{
+	struct iw_cm_event event;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	memset(&event, 0, sizeof(event));
+	event.event = IW_CM_EVENT_ESTABLISHED;
+	if (ep->com.cm_id) {
+		PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
+		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+	}
+}
+
+static int update_rx_credits(struct iwch_ep *ep, u32 credits)
+{
+	struct cpl_rx_data_ack *req;
+	struct sk_buff *skb;
+
+	PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
+	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+	if (!skb) {
+		printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+		return 0;
+	}
+
+	req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req));
+	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
+	req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
+	skb->priority = CPL_PRIORITY_ACK;
+	ep->com.tdev->send(ep->com.tdev, skb);
+	return credits;
+}
+
+static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
+{
+	struct mpa_message *mpa;
+	u16 plen;
+	struct iwch_qp_attributes attrs;
+	enum iwch_qp_attr_mask mask;
+	int err;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+	/*
+ 	 * Stop mpa timer.  If it expired, then the state has
+	 * changed and we bail since ep_timeout already aborted
+	 * the connection.
+	 */
+	stop_ep_timer(ep);
+	if (state_read(&ep->com) != MPA_REQ_SENT)
+		return;
+
+	/*
+	 * If we get more than the supported amount of private data
+	 * then we must fail this connection.
+	 */
+	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	/*
+	 * copy the new data into our accumulation buffer.
+	 */
+	memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+	ep->mpa_pkt_len += skb->len;
+
+	/*
+	 * if we don't even have the mpa message, then bail.
+	 */
+	if (ep->mpa_pkt_len < sizeof(*mpa))
+		return;
+	mpa = (struct mpa_message *) ep->mpa_pkt;
+
+	/* Validate MPA header. */
+	if (mpa->revision != mpa_rev) {
+		err = -EPROTO;
+		goto err;
+	}
+	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
+		err = -EPROTO;
+		goto err;
+	}
+
+	plen = ntohs(mpa->private_data_size);
+
+	/*
+	 * Fail if there's too much private data.
+	 */
+	if (plen > MPA_MAX_PRIVATE_DATA) {
+		err = -EPROTO;
+		goto err;
+	}
+
+	/*
+	 * If plen does not account for pkt size
+	 */
+	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+		err = -EPROTO;
+		goto err;
+	}
+
+	ep->plen = (u8) plen;
+
+	/*
+	 * If we don't have all the pdata yet, then bail.
+	 * We'll continue process when more data arrives.
+	 */
+	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+		return;
+
+	if (mpa->flags & MPA_REJECT) {
+		err = -ECONNREFUSED;
+		goto err;
+	}
+
+	/*
+	 * If we get here we have accumulated the entire mpa
+	 * start reply message including private data. And
+	 * the MPA header is valid.
+	 */
+	state_set(&ep->com, FPDU_MODE);
+	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+	ep->mpa_attr.recv_marker_enabled = markers_enabled;
+	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+	ep->mpa_attr.version = mpa_rev;
+	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
+	     "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__,
+	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+
+	attrs.mpa_attr = ep->mpa_attr;
+	attrs.max_ird = ep->ird;
+	attrs.max_ord = ep->ord;
+	attrs.llp_stream_handle = ep;
+	attrs.next_state = IWCH_QP_STATE_RTS;
+
+	mask = IWCH_QP_ATTR_NEXT_STATE |
+	    IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
+	    IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
+
+	/* bind QP and TID with INIT_WR */
+	err = iwch_modify_qp(ep->com.qp->rhp,
+			     ep->com.qp, mask, &attrs, 1);
+	if (!err)
+		goto out;
+err:
+	abort_connection(ep, skb, GFP_KERNEL);
+out:
+	connect_reply_upcall(ep, err);
+	return;
+}
+
+static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
+{
+	struct mpa_message *mpa;
+	u16 plen;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+	/*
+ 	 * Stop mpa timer.  If it expired, then the state has
+	 * changed and we bail since ep_timeout already aborted
+	 * the connection.
+	 */
+	stop_ep_timer(ep);
+	if (state_read(&ep->com) != MPA_REQ_WAIT)
+		return;
+
+	/*
+	 * If we get more than the supported amount of private data
+	 * then we must fail this connection.
+	 */
+	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
+		abort_connection(ep, skb, GFP_KERNEL);
+		return;
+	}
+
+	PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__);
+
+	/*
+	 * Copy the new data into our accumulation buffer.
+	 */
+	memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+	ep->mpa_pkt_len += skb->len;
+
+	/*
+	 * If we don't even have the mpa message, then bail.
+	 * We'll continue process when more data arrives.
+	 */
+	if (ep->mpa_pkt_len < sizeof(*mpa))
+		return;
+	PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__);
+	mpa = (struct mpa_message *) ep->mpa_pkt;
+
+	/*
+	 * Validate MPA Header.
+	 */
+	if (mpa->revision != mpa_rev) {
+		abort_connection(ep, skb, GFP_KERNEL);
+		return;
+	}
+
+	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
+		abort_connection(ep, skb, GFP_KERNEL);
+		return;
+	}
+
+	plen = ntohs(mpa->private_data_size);
+
+	/*
+	 * Fail if there's too much private data.
+	 */
+	if (plen > MPA_MAX_PRIVATE_DATA) {
+		abort_connection(ep, skb, GFP_KERNEL);
+		return;
+	}
+
+	/*
+	 * If plen does not account for pkt size
+	 */
+	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+		abort_connection(ep, skb, GFP_KERNEL);
+		return;
+	}
+	ep->plen = (u8) plen;
+
+	/*
+	 * If we don't have all the pdata yet, then bail.
+	 */
+	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+		return;
+
+	/*
+	 * If we get here we have accumulated the entire mpa
+	 * start reply message including private data.
+	 */
+	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+	ep->mpa_attr.recv_marker_enabled = markers_enabled;
+	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+	ep->mpa_attr.version = mpa_rev;
+	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
+	     "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__,
+	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+
+	state_set(&ep->com, MPA_REQ_RCVD);
+
+	/* drive upcall */
+	connect_request_upcall(ep);
+	return;
+}
+
+static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *ep = ctx;
+	struct cpl_rx_data *hdr = cplhdr(skb);
+	unsigned int dlen = ntohs(hdr->len);
+
+	PDBG("%s ep %p dlen %u\n", __FUNCTION__, ep, dlen);
+
+	skb_pull(skb, sizeof(*hdr));
+	skb_trim(skb, dlen);
+	
+	ep->rcv_seq += dlen;
+	BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
+
+	switch (state_read(&ep->com)) {
+	case MPA_REQ_SENT:
+		process_mpa_reply(ep, skb);
+		break;
+	case MPA_REQ_WAIT:
+		process_mpa_request(ep, skb);
+		break;
+	case MPA_REP_SENT:
+		break;
+	default:
+		printk(KERN_ERR MOD "%s Unexpected streaming data."
+		       " ep %p state %d tid %d\n",
+		       __FUNCTION__, ep, state_read(&ep->com), ep->hwtid);
+
+		/*
+	 	 * The ep will timeout and inform the ULP of the failure.
+		 * See ep_timeout().
+	 	 */
+		break;
+	}
+
+	/* update RX credits */
+	update_rx_credits(ep, dlen);
+
+	return CPL_RET_BUF_DONE;
+}
+
+/*
+ * Upcall from the adapter indicating data has been transmitted.
+ * For us its just the single MPA request or reply.  We can now free
+ * the skb holding the mpa message.
+ */
+static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *ep = ctx;
+	struct cpl_wr_ack *hdr = cplhdr(skb);
+	unsigned int credits = ntohs(hdr->credits);
+
+	PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
+
+	if (credits == 0)
+		return CPL_RET_BUF_DONE;
+	BUG_ON(credits != 1);
+	BUG_ON(ep->mpa_skb == NULL);
+	kfree_skb(ep->mpa_skb);
+	ep->mpa_skb = NULL;
+	dst_confirm(ep->dst);
+	if (state_read(&ep->com) == MPA_REP_SENT) {
+		ep->com.rpl_done = 1;
+		PDBG("waking up ep %p\n", ep);
+		wake_up(&ep->com.waitq);
+	}
+	return CPL_RET_BUF_DONE;
+}
+
+static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *ep = ctx;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+	/*
+ 	 * We get 2 abort replies from the HW.  The first one must
+	 * be ignored except for scribbling that we need one more.
+	 */
+	if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) {
+		ep->flags |= ABORT_REQ_IN_PROGRESS;
+		return CPL_RET_BUF_DONE;
+	}
+
+	close_complete_upcall(ep);
+	state_set(&ep->com, DEAD);
+	release_ep_resources(ep);
+	return CPL_RET_BUF_DONE;
+}
+
+static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *ep = ctx;
+	struct cpl_act_open_rpl *rpl = cplhdr(skb);
+
+	PDBG("%s ep %p status %u errno %d\n", __FUNCTION__, ep, rpl->status,
+	     status2errno(rpl->status));
+	connect_reply_upcall(ep, status2errno(rpl->status));
+	state_set(&ep->com, DEAD);
+	if (ep->com.tdev->type == T3B)
+		release_tid(ep->com.tdev, GET_TID(rpl), NULL);
+	cxgb3_free_atid(ep->com.tdev, ep->atid);
+	dst_release(ep->dst);
+	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+	put_ep(&ep->com);
+	return CPL_RET_BUF_DONE;
+}
+
+static int listen_start(struct iwch_listen_ep *ep)
+{
+	struct sk_buff *skb;
+	struct cpl_pass_open_req *req;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+	if (!skb) {
+		printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
+		return -ENOMEM;
+	}
+
+	req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req));
+	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
+	req->local_port = ep->com.local_addr.sin_port;
+	req->local_ip = ep->com.local_addr.sin_addr.s_addr;
+	req->peer_port = 0;
+	req->peer_ip = 0;
+	req->peer_netmask = 0;
+	req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
+	req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
+	req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
+
+	skb->priority = 1;
+	ep->com.tdev->send(ep->com.tdev, skb);
+	return 0;
+}
+
+static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_listen_ep *ep = ctx;
+	struct cpl_pass_open_rpl *rpl = cplhdr(skb);
+
+	PDBG("%s ep %p status %d error %d\n", __FUNCTION__, ep,
+	     rpl->status, status2errno(rpl->status));
+	ep->com.rpl_err = status2errno(rpl->status);
+	ep->com.rpl_done = 1;
+	wake_up(&ep->com.waitq);
+
+	return CPL_RET_BUF_DONE;
+}
+
+static int listen_stop(struct iwch_listen_ep *ep)
+{
+	struct sk_buff *skb;
+	struct cpl_close_listserv_req *req;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+	if (!skb) {
+		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
+	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	req->cpu_idx = 0;
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
+	skb->priority = 1;
+	ep->com.tdev->send(ep->com.tdev, skb);
+	return 0;
+}
+
+static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
+			     void *ctx)
+{
+	struct iwch_listen_ep *ep = ctx;
+	struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	ep->com.rpl_err = status2errno(rpl->status);
+	ep->com.rpl_done = 1;
+	wake_up(&ep->com.waitq);
+	return CPL_RET_BUF_DONE;
+}
+
+static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
+{
+	struct cpl_pass_accept_rpl *rpl;
+	unsigned int mtu_idx;
+	u32 opt0h, opt0l, opt2;
+	int wscale;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	BUG_ON(skb_cloned(skb));
+	skb_trim(skb, sizeof(*rpl));
+	skb_get(skb);
+	mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
+	wscale = compute_wscale(rcv_win);
+	opt0h = V_NAGLE(0) |
+	    V_NO_CONG(nocong) |
+	    V_KEEP_ALIVE(1) |
+	    F_TCAM_BYPASS |
+	    V_WND_SCALE(wscale) |
+	    V_MSS_IDX(mtu_idx) |
+	    V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
+	opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
+	opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
+
+	rpl = cplhdr(skb);
+	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
+	rpl->peer_ip = peer_ip;
+	rpl->opt0h = htonl(opt0h);
+	rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
+	rpl->opt2 = htonl(opt2);
+	rpl->rsvd = rpl->opt2;	/* workaround for HW bug */
+	skb->priority = CPL_PRIORITY_SETUP;
+	l2t_send(ep->com.tdev, skb, ep->l2t);
+
+	return;
+}
+
+static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
+		      struct sk_buff *skb)
+{
+	PDBG("%s t3cdev %p tid %u peer_ip %x\n", __FUNCTION__, tdev, hwtid,
+	     peer_ip);
+	BUG_ON(skb_cloned(skb));
+	skb_trim(skb, sizeof(struct cpl_tid_release));
+	skb_get(skb);
+
+	if (tdev->type == T3B)
+		release_tid(tdev, hwtid, skb);
+	else {
+		struct cpl_pass_accept_rpl *rpl;
+
+		rpl = cplhdr(skb);
+		skb->priority = CPL_PRIORITY_SETUP;
+		rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+		OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+						      hwtid));
+		rpl->peer_ip = peer_ip;
+		rpl->opt0h = htonl(F_TCAM_BYPASS);
+		rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
+		rpl->opt2 = 0;
+		rpl->rsvd = rpl->opt2;
+		tdev->send(tdev, skb);
+	}
+}
+
+static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *child_ep, *parent_ep = ctx;
+	struct cpl_pass_accept_req *req = cplhdr(skb);
+	unsigned int hwtid = GET_TID(req);
+	struct dst_entry *dst;
+	struct l2t_entry *l2t;
+	struct rtable *rt;
+	struct iff_mac tim;
+
+	PDBG("%s parent ep %p tid %u\n", __FUNCTION__, parent_ep, hwtid);
+
+	if (state_read(&parent_ep->com) != LISTEN) {
+		printk(KERN_ERR "%s - listening ep not in LISTEN\n",
+		       __FUNCTION__);
+		goto reject;
+	}
+
+	/*
+	 * Find the netdev for this connection request.
+	 */
+	tim.mac_addr = req->dst_mac;
+	tim.vlan_tag = ntohs(req->vlan_tag);
+	if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
+		printk(KERN_ERR
+			"%s bad dst mac %02x %02x %02x %02x %02x %02x\n",
+			__FUNCTION__,
+			req->dst_mac[0],
+			req->dst_mac[1],
+			req->dst_mac[2],
+			req->dst_mac[3],
+			req->dst_mac[4],
+			req->dst_mac[5]);
+		goto reject;
+	}
+
+	/* Find output route */
+	rt = find_route(tdev,
+			req->local_ip,
+			req->peer_ip,
+			req->local_port,
+			req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
+	if (!rt) {
+		printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
+		       __FUNCTION__);
+		goto reject;
+	}
+	dst = &rt->u.dst;
+	l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
+	if (!l2t) {
+		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
+		       __FUNCTION__);
+		dst_release(dst);
+		goto reject;
+	}
+	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
+	if (!child_ep) {
+		printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
+		       __FUNCTION__);
+		l2t_release(L2DATA(tdev), l2t);
+		dst_release(dst);
+		goto reject;
+	}
+	state_set(&child_ep->com, CONNECTING);
+	child_ep->com.tdev = tdev;
+	child_ep->com.cm_id = NULL;
+	child_ep->com.local_addr.sin_family = PF_INET;
+	child_ep->com.local_addr.sin_port = req->local_port;
+	child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
+	child_ep->com.remote_addr.sin_family = PF_INET;
+	child_ep->com.remote_addr.sin_port = req->peer_port;
+	child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
+	get_ep(&parent_ep->com);
+	child_ep->parent_ep = parent_ep;
+	child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
+	child_ep->l2t = l2t;
+	child_ep->dst = dst;
+	child_ep->hwtid = hwtid;
+	init_timer(&child_ep->timer);
+	cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
+	accept_cr(child_ep, req->peer_ip, skb);
+	goto out;
+reject:
+	reject_cr(tdev, hwtid, req->peer_ip, skb);
+out:
+	return CPL_RET_BUF_DONE;
+}
+
+static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *ep = ctx;
+	struct cpl_pass_establish *req = cplhdr(skb);
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	ep->snd_seq = ntohl(req->snd_isn);
+	ep->rcv_seq = ntohl(req->rcv_isn);
+
+	set_emss(ep, ntohs(req->tcp_opt));
+
+	dst_confirm(ep->dst);
+	state_set(&ep->com, MPA_REQ_WAIT);
+	start_ep_timer(ep);
+
+	return CPL_RET_BUF_DONE;
+}
+
+static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *ep = ctx;
+	struct iwch_qp_attributes attrs;
+	unsigned long flags;
+	int disconnect = 1;
+	int release = 0;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	dst_confirm(ep->dst);
+
+	spin_lock_irqsave(&ep->com.lock, flags);
+	switch (ep->com.state) {
+	case MPA_REQ_WAIT:
+		__state_set(&ep->com, CLOSING);
+		break;
+	case MPA_REQ_SENT:
+		__state_set(&ep->com, CLOSING);
+		connect_reply_upcall(ep, -ECONNRESET);
+		break;
+	case MPA_REQ_RCVD:
+
+		/*
+		 * We're gonna mark this puppy DEAD, but keep
+		 * the reference on it until the ULP accepts or
+		 * rejects the CR.
+		 */
+		__state_set(&ep->com, CLOSING);
+		get_ep(&ep->com);
+		break;
+	case MPA_REP_SENT:
+		__state_set(&ep->com, CLOSING);
+		ep->com.rpl_done = 1;
+		ep->com.rpl_err = -ECONNRESET;
+		PDBG("waking up ep %p\n", ep);
+		wake_up(&ep->com.waitq);
+		break;
+	case FPDU_MODE:
+		start_ep_timer(ep);
+		__state_set(&ep->com, CLOSING);
+		attrs.next_state = IWCH_QP_STATE_CLOSING;
+		iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
+			       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
+		peer_close_upcall(ep);
+		break;
+	case ABORTING:
+		disconnect = 0;
+		break;
+	case CLOSING:
+		__state_set(&ep->com, MORIBUND);
+		disconnect = 0;
+		break;
+	case MORIBUND:
+		stop_ep_timer(ep);
+		if (ep->com.cm_id && ep->com.qp) {
+			attrs.next_state = IWCH_QP_STATE_IDLE;
+			iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
+				       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
+		}
+		close_complete_upcall(ep);
+		__state_set(&ep->com, DEAD);
+		release = 1;
+		disconnect = 0;
+		break;
+	case DEAD:
+		disconnect = 0;
+		break;
+	default:
+		BUG_ON(1);
+	}
+	spin_unlock_irqrestore(&ep->com.lock, flags);
+	if (disconnect)
+		iwch_ep_disconnect(ep, 0, GFP_KERNEL);	
+	if (release)
+		release_ep_resources(ep);
+	return CPL_RET_BUF_DONE;
+}
+
+/*
+ * Returns whether an ABORT_REQ_RSS message is a negative advice.
+ */
+static inline int is_neg_adv_abort(unsigned int status)
+{
+        return status == CPL_ERR_RTX_NEG_ADVICE ||
+               status == CPL_ERR_PERSIST_NEG_ADVICE;
+}
+
+static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct iwch_ep *ep = ctx;
+	struct cpl_abort_rpl *rpl;
+	struct sk_buff *rpl_skb;
+	struct iwch_qp_attributes attrs;
+	int ret;
+	int state;
+
+	if (is_neg_adv_abort(req->status)) {
+		PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
+		     ep->hwtid);
+		t3_l2t_send_event(ep->com.tdev, ep->l2t);
+		return CPL_RET_BUF_DONE;
+	}
+
+	/*
+ 	 * We get 2 peer aborts from the HW.  The first one must
+	 * be ignored except for scribbling that we need one more.
+	 */
+	if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) {
+		ep->flags |= PEER_ABORT_IN_PROGRESS;
+		return CPL_RET_BUF_DONE;
+	}
+
+	state = state_read(&ep->com);
+	PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state);
+	switch (state) {
+	case CONNECTING:
+		break;
+	case MPA_REQ_WAIT:
+		stop_ep_timer(ep);
+		break;
+	case MPA_REQ_SENT:
+		stop_ep_timer(ep);
+		connect_reply_upcall(ep, -ECONNRESET);
+		break;
+	case MPA_REP_SENT:
+		ep->com.rpl_done = 1;
+		ep->com.rpl_err = -ECONNRESET;
+		PDBG("waking up ep %p\n", ep);
+		wake_up(&ep->com.waitq);
+		break;
+	case MPA_REQ_RCVD:
+	
+		/*
+		 * We're gonna mark this puppy DEAD, but keep
+		 * the reference on it until the ULP accepts or
+		 * rejects the CR.
+		 */
+		get_ep(&ep->com);
+		break;
+	case MORIBUND:
+	case CLOSING:
+		stop_ep_timer(ep);
+		/*FALLTHROUGH*/
+	case FPDU_MODE:
+		if (ep->com.cm_id && ep->com.qp) {
+			attrs.next_state = IWCH_QP_STATE_ERROR;
+			ret = iwch_modify_qp(ep->com.qp->rhp,
+				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+				     &attrs, 1);
+			if (ret)
+				printk(KERN_ERR MOD
+				       "%s - qp <- error failed!\n",
+				       __FUNCTION__);
+		}
+		peer_abort_upcall(ep);
+		break;
+	case ABORTING:
+		break;
+	case DEAD:
+		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __FUNCTION__);
+		return CPL_RET_BUF_DONE;
+	default:
+		BUG_ON(1);
+		break;
+	}
+	dst_confirm(ep->dst);
+	
+	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
+	if (!rpl_skb) {
+		printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
+		       __FUNCTION__);
+		dst_release(ep->dst);
+		l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+		put_ep(&ep->com);
+		return CPL_RET_BUF_DONE;
+	}
+	rpl_skb->priority = CPL_PRIORITY_DATA;
+	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
+	rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
+	rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
+	rpl->cmd = CPL_ABORT_NO_RST;
+	ep->com.tdev->send(ep->com.tdev, rpl_skb);
+	if (state != ABORTING) {
+		state_set(&ep->com, DEAD);
+		release_ep_resources(ep);
+	}
+	return CPL_RET_BUF_DONE;
+}
+
+static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *ep = ctx;
+	struct iwch_qp_attributes attrs;
+	unsigned long flags;
+	int release = 0;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	BUG_ON(!ep);
+
+	/* The cm_id may be null if we failed to connect */
+	spin_lock_irqsave(&ep->com.lock, flags);
+	switch (ep->com.state) {
+	case CLOSING:
+		__state_set(&ep->com, MORIBUND);
+		break;
+	case MORIBUND:
+		stop_ep_timer(ep);
+		if ((ep->com.cm_id) && (ep->com.qp)) {
+			attrs.next_state = IWCH_QP_STATE_IDLE;
+			iwch_modify_qp(ep->com.qp->rhp,
+					     ep->com.qp,
+					     IWCH_QP_ATTR_NEXT_STATE,
+					     &attrs, 1);
+		}
+		close_complete_upcall(ep);
+		__state_set(&ep->com, DEAD);
+		release = 1;
+		break;
+	case ABORTING:
+		break;
+	case DEAD:
+	default:
+		BUG_ON(1);
+		break;
+	}
+	spin_unlock_irqrestore(&ep->com.lock, flags);
+	if (release)
+		release_ep_resources(ep);
+	return CPL_RET_BUF_DONE;
+}
+
+/*
+ * T3A does 3 things when a TERM is received:
+ * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
+ * 2) generate an async event on the QP with the TERMINATE opcode
+ * 3) post a TERMINATE opcde cqe into the associated CQ.
+ *
+ * For (1), we save the message in the qp for later consumer consumption.
+ * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
+ * For (3), we toss the CQE in cxio_poll_cq().
+ *
+ * terminate() handles case (1)...
+ */
+static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep *ep = ctx;
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	skb_pull(skb, sizeof(struct cpl_rdma_terminate));
+	PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
+	memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len);
+	ep->com.qp->attr.terminate_msg_len = skb->len;
+	ep->com.qp->attr.is_terminate_local = 0;
+	return CPL_RET_BUF_DONE;
+}
+
+static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct cpl_rdma_ec_status *rep = cplhdr(skb);
+	struct iwch_ep *ep = ctx;
+
+	PDBG("%s ep %p tid %u status %d\n", __FUNCTION__, ep, ep->hwtid,
+	     rep->status);
+	if (rep->status) {
+		struct iwch_qp_attributes attrs;
+
+		printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
+		       __FUNCTION__, ep->hwtid);
+		stop_ep_timer(ep);
+		attrs.next_state = IWCH_QP_STATE_ERROR;
+		iwch_modify_qp(ep->com.qp->rhp,
+			       ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+			       &attrs, 1);
+		abort_connection(ep, NULL, GFP_KERNEL);
+	}
+	return CPL_RET_BUF_DONE;
+}
+
+static void ep_timeout(unsigned long arg)
+{
+	struct iwch_ep *ep = (struct iwch_ep *)arg;
+	struct iwch_qp_attributes attrs;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ep->com.lock, flags);
+	PDBG("%s ep %p tid %u state %d\n", __FUNCTION__, ep, ep->hwtid,
+	     ep->com.state);
+	switch (ep->com.state) {
+	case MPA_REQ_SENT:
+		connect_reply_upcall(ep, -ETIMEDOUT);
+		break;
+	case MPA_REQ_WAIT:
+		break;
+	case CLOSING:
+	case MORIBUND:
+		if (ep->com.cm_id && ep->com.qp) {
+			attrs.next_state = IWCH_QP_STATE_ERROR;
+			iwch_modify_qp(ep->com.qp->rhp,
+				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+				     &attrs, 1);
+		}
+		break;
+	default:
+		BUG();
+	}
+	__state_set(&ep->com, CLOSING);
+	spin_unlock_irqrestore(&ep->com.lock, flags);
+	abort_connection(ep, NULL, GFP_ATOMIC);
+	put_ep(&ep->com);
+}
+
+int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	int err;
+	struct iwch_ep *ep = to_ep(cm_id);
+	PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
+
+	if (state_read(&ep->com) == DEAD) {
+		put_ep(&ep->com);
+		return -ECONNRESET;
+	}
+	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+	if (mpa_rev == 0)
+		abort_connection(ep, NULL, GFP_KERNEL);
+	else {
+		err = send_mpa_reject(ep, pdata, pdata_len);
+		err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
+	}
+	return 0;
+}
+
+int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+	int err;
+	struct iwch_qp_attributes attrs;
+	enum iwch_qp_attr_mask mask;
+	struct iwch_ep *ep = to_ep(cm_id);
+	struct iwch_dev *h = to_iwch_dev(cm_id->device);
+	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
+
+	PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
+	if (state_read(&ep->com) == DEAD)
+		return -ECONNRESET;
+
+	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+	BUG_ON(!qp);
+
+	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
+	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
+		abort_connection(ep, NULL, GFP_KERNEL);
+		return -EINVAL;
+	}
+
+	cm_id->add_ref(cm_id);
+	ep->com.cm_id = cm_id;
+	ep->com.qp = qp;
+
+	ep->com.rpl_done = 0;
+	ep->com.rpl_err = 0;
+	ep->ird = conn_param->ird;
+	ep->ord = conn_param->ord;
+	PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord);
+
+	get_ep(&ep->com);
+
+	/* bind QP to EP and move to RTS */
+	attrs.mpa_attr = ep->mpa_attr;
+	attrs.max_ird = ep->ord;
+	attrs.max_ord = ep->ord;
+	attrs.llp_stream_handle = ep;
+	attrs.next_state = IWCH_QP_STATE_RTS;
+
+	/* bind QP and TID with INIT_WR */
+	mask = IWCH_QP_ATTR_NEXT_STATE |
+			     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
+			     IWCH_QP_ATTR_MPA_ATTR |
+			     IWCH_QP_ATTR_MAX_IRD |
+			     IWCH_QP_ATTR_MAX_ORD;
+
+	err = iwch_modify_qp(ep->com.qp->rhp,
+			     ep->com.qp, mask, &attrs, 1);
+	if (err)
+		goto err;
+
+	err = send_mpa_reply(ep, conn_param->private_data,
+			     conn_param->private_data_len);
+	if (err)
+		goto err;
+	
+	/* wait for wr_ack */
+	wait_event(ep->com.waitq, ep->com.rpl_done);
+	err = ep->com.rpl_err;
+	if (err) 
+		goto err;
+
+	state_set(&ep->com, FPDU_MODE);
+	established_upcall(ep);
+	put_ep(&ep->com);
+	return 0;
+err:
+	ep->com.cm_id = NULL;
+	ep->com.qp = NULL;
+	cm_id->rem_ref(cm_id);
+	abort_connection(ep, NULL, GFP_KERNEL);
+	put_ep(&ep->com);
+	return err;
+}
+
+int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+	int err = 0;
+	struct iwch_dev *h = to_iwch_dev(cm_id->device);
+	struct iwch_ep *ep;
+	struct rtable *rt;
+
+	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
+	if (!ep) {
+		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__);
+		err = -ENOMEM;
+		goto out;
+	}
+	init_timer(&ep->timer);
+	ep->plen = conn_param->private_data_len;
+	if (ep->plen)
+		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
+		       conn_param->private_data, ep->plen);
+	ep->ird = conn_param->ird;
+	ep->ord = conn_param->ord;
+	ep->com.tdev = h->rdev.t3cdev_p;
+
+	cm_id->add_ref(cm_id);
+	ep->com.cm_id = cm_id;
+	ep->com.qp = get_qhp(h, conn_param->qpn);
+	BUG_ON(!ep->com.qp);
+	PDBG("%s qpn 0x%x qp %p cm_id %p\n", __FUNCTION__, conn_param->qpn,
+	     ep->com.qp, cm_id);
+
+	/*
+	 * Allocate an active TID to initiate a TCP connection.
+	 */
+	ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
+	if (ep->atid == -1) {
+		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__);
+		err = -ENOMEM;
+		goto fail2;
+	}
+
+	/* find a route */
+	rt = find_route(h->rdev.t3cdev_p,
+			cm_id->local_addr.sin_addr.s_addr,
+			cm_id->remote_addr.sin_addr.s_addr,
+			cm_id->local_addr.sin_port,
+			cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
+	if (!rt) {
+		printk(KERN_ERR MOD "%s - cannot find route.\n", __FUNCTION__);
+		err = -EHOSTUNREACH;
+		goto fail3;
+	}
+	ep->dst = &rt->u.dst;
+
+	/* get a l2t entry */
+	ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
+			     ep->dst->neighbour->dev);
+	if (!ep->l2t) {
+		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __FUNCTION__);
+		err = -ENOMEM;
+		goto fail4;
+	}
+
+	state_set(&ep->com, CONNECTING);
+	ep->tos = IPTOS_LOWDELAY;
+	ep->com.local_addr = cm_id->local_addr;
+	ep->com.remote_addr = cm_id->remote_addr;
+
+	/* send connect request to rnic */
+	err = send_connect(ep);
+	if (!err)
+		goto out;
+
+	l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t);
+fail4:
+	dst_release(ep->dst);
+fail3:
+	cxgb3_free_atid(ep->com.tdev, ep->atid);
+fail2:
+	put_ep(&ep->com);
+out:
+	return err;
+}
+
+int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	int err = 0;
+	struct iwch_dev *h = to_iwch_dev(cm_id->device);
+	struct iwch_listen_ep *ep;
+
+
+	might_sleep();
+
+	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
+	if (!ep) {
+		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__);
+		err = -ENOMEM;
+		goto fail1;
+	}
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	ep->com.tdev = h->rdev.t3cdev_p;
+	cm_id->add_ref(cm_id);
+	ep->com.cm_id = cm_id;
+	ep->backlog = backlog;
+	ep->com.local_addr = cm_id->local_addr;
+
+	/*
+	 * Allocate a server TID.
+	 */
+	ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
+	if (ep->stid == -1) {
+		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__);
+		err = -ENOMEM;
+		goto fail2;
+	}
+
+	state_set(&ep->com, LISTEN);
+	err = listen_start(ep);
+	if (err)
+		goto fail3;
+
+	/* wait for pass_open_rpl */
+	wait_event(ep->com.waitq, ep->com.rpl_done);
+	err = ep->com.rpl_err;
+	if (!err) {
+		cm_id->provider_data = ep;
+		goto out;
+	}
+fail3:
+	cxgb3_free_stid(ep->com.tdev, ep->stid);
+fail2:
+	put_ep(&ep->com);
+fail1:
+out:
+	return err;
+}
+
+int iwch_destroy_listen(struct iw_cm_id *cm_id)
+{
+	int err;
+	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
+
+	PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+	might_sleep();
+	state_set(&ep->com, DEAD);
+	ep->com.rpl_done = 0;
+	ep->com.rpl_err = 0;
+	err = listen_stop(ep);
+	wait_event(ep->com.waitq, ep->com.rpl_done);
+	cxgb3_free_stid(ep->com.tdev, ep->stid);
+	err = ep->com.rpl_err;
+	cm_id->rem_ref(cm_id);
+	put_ep(&ep->com);
+	return err;
+}
+
+int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
+{
+	int ret=0;
+	unsigned long flags;
+	int close = 0;
+	
+	spin_lock_irqsave(&ep->com.lock, flags);
+
+	PDBG("%s ep %p state %s, abrupt %d\n", __FUNCTION__, ep,
+	     states[ep->com.state], abrupt);
+
+	if (ep->com.state == DEAD) {
+		PDBG("%s already dead ep %p\n", __FUNCTION__, ep);
+		goto out;
+	}
+
+	if (abrupt) {
+		if (ep->com.state != ABORTING) {
+			ep->com.state = ABORTING;
+			close = 1;
+		}
+		goto out;
+	}
+	
+	switch (ep->com.state) {
+	case MPA_REQ_WAIT:
+	case MPA_REQ_SENT:
+	case MPA_REQ_RCVD:
+	case MPA_REP_SENT:
+	case FPDU_MODE:
+		start_ep_timer(ep);
+		ep->com.state = CLOSING;
+		close = 1;
+		break;
+	case CLOSING:
+		ep->com.state = MORIBUND;
+		close = 1;
+		break;
+	case MORIBUND:
+		break;
+	default:
+		BUG();
+		break;
+	}
+out:
+	spin_unlock_irqrestore(&ep->com.lock, flags);
+	if (close) {
+		if (abrupt)
+			ret = send_abort(ep, NULL, gfp);
+		else
+			ret = send_halfclose(ep, gfp);
+	}
+	return ret;
+}
+
+int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
+		     struct l2t_entry *l2t)
+{
+	struct iwch_ep *ep = ctx;
+	
+	if (ep->dst != old)
+		return 0;
+
+	PDBG("%s ep %p redirect to dst %p l2t %p\n", __FUNCTION__, ep, new,
+	     l2t);
+	dst_hold(new);
+	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+	ep->l2t = l2t;
+	dst_release(old);
+	ep->dst = new;
+	return 1;
+}
+
+/*
+ * All the CM events are handled on a work queue to have a safe context.
+ */
+static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct iwch_ep_common *epc = ctx;
+
+	get_ep(epc);
+
+	/*
+	 * Save ctx and tdev in the skb->cb area.
+	 */
+	*((void **) skb->cb) = ctx;
+	*((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
+
+	/*
+	 * Queue the skb and schedule the worker thread.
+	 */
+	skb_queue_tail(&rxq, skb);
+	queue_work(workq, &skb_work);
+	return 0;
+}
+
+static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+	struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
+	
+	if (rpl->status != CPL_ERR_NONE) {
+		printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
+		       "for tid %u\n", rpl->status, GET_TID(rpl));
+	}
+	return CPL_RET_BUF_DONE;
+}
+
+int __init iwch_cm_init(void)
+{
+	skb_queue_head_init(&rxq);
+
+	workq = create_singlethread_workqueue("iw_cxgb3");
+	if (!workq)
+		return -ENOMEM;
+
+	/*
+	 * All upcalls from the T3 Core go to sched() to
+	 * schedule the processing on a work queue.
+	 */
+	t3c_handlers[CPL_ACT_ESTABLISH] = sched;
+	t3c_handlers[CPL_ACT_OPEN_RPL] = sched;
+	t3c_handlers[CPL_RX_DATA] = sched;
+	t3c_handlers[CPL_TX_DMA_ACK] = sched;
+	t3c_handlers[CPL_ABORT_RPL_RSS] = sched;
+	t3c_handlers[CPL_ABORT_RPL] = sched;
+	t3c_handlers[CPL_PASS_OPEN_RPL] = sched;
+	t3c_handlers[CPL_CLOSE_LISTSRV_RPL] = sched;
+	t3c_handlers[CPL_PASS_ACCEPT_REQ] = sched;
+	t3c_handlers[CPL_PASS_ESTABLISH] = sched;
+	t3c_handlers[CPL_PEER_CLOSE] = sched;
+	t3c_handlers[CPL_CLOSE_CON_RPL] = sched;
+	t3c_handlers[CPL_ABORT_REQ_RSS] = sched;
+	t3c_handlers[CPL_RDMA_TERMINATE] = sched;
+	t3c_handlers[CPL_RDMA_EC_STATUS] = sched;
+	t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl;
+
+	/*
+	 * These are the real handlers that are called from a
+	 * work queue.
+	 */
+	work_handlers[CPL_ACT_ESTABLISH] = act_establish;
+	work_handlers[CPL_ACT_OPEN_RPL] = act_open_rpl;
+	work_handlers[CPL_RX_DATA] = rx_data;
+	work_handlers[CPL_TX_DMA_ACK] = tx_ack;
+	work_handlers[CPL_ABORT_RPL_RSS] = abort_rpl;
+	work_handlers[CPL_ABORT_RPL] = abort_rpl;
+	work_handlers[CPL_PASS_OPEN_RPL] = pass_open_rpl;
+	work_handlers[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl;
+	work_handlers[CPL_PASS_ACCEPT_REQ] = pass_accept_req;
+	work_handlers[CPL_PASS_ESTABLISH] = pass_establish;
+	work_handlers[CPL_PEER_CLOSE] = peer_close;
+	work_handlers[CPL_ABORT_REQ_RSS] = peer_abort;
+	work_handlers[CPL_CLOSE_CON_RPL] = close_con_rpl;
+	work_handlers[CPL_RDMA_TERMINATE] = terminate;
+	work_handlers[CPL_RDMA_EC_STATUS] = ec_status;
+	return 0;
+}
+
+void __exit iwch_cm_term(void)
+{
+	flush_workqueue(workq);
+	destroy_workqueue(workq);
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IWCH_CM_H_
+#define _IWCH_CM_H_
+
+#include <linux/inet.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+
+#include "cxgb3_offload.h"
+#include "iwch_provider.h"
+
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+
+#define MPA_MAX_PRIVATE_DATA 	256
+#define MPA_REV 		0	/* XXX - amso1100 uses rev 0 ! */
+#define MPA_REJECT 		0x20
+#define MPA_CRC			0x40
+#define MPA_MARKERS		0x80
+#define MPA_FLAGS_MASK		0xE0
+
+#define put_ep(ep) { \
+	PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__,  \
+	     ep, atomic_read(&((ep)->kref.refcount))); \
+	kref_put(&((ep)->kref), __free_ep); \
+}
+
+#define get_ep(ep) { \
+	PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \
+	     ep, atomic_read(&((ep)->kref.refcount))); \
+	kref_get(&((ep)->kref));  \
+}
+
+struct mpa_message {
+	u8 key[16];
+	u8 flags;
+	u8 revision;
+	__be16 private_data_size;
+	u8 private_data[0];
+};
+
+struct terminate_message {
+	u8 layer_etype;
+	u8 ecode;
+	__be16 hdrct_rsvd;
+	u8 len_hdrs[0];
+};
+
+#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
+
+enum iwch_layers_types {
+	LAYER_RDMAP 		= 0x00,
+	LAYER_DDP		= 0x10,
+	LAYER_MPA		= 0x20,
+	RDMAP_LOCAL_CATA	= 0x00,
+	RDMAP_REMOTE_PROT	= 0x01,
+	RDMAP_REMOTE_OP		= 0x02,
+	DDP_LOCAL_CATA		= 0x00,
+	DDP_TAGGED_ERR		= 0x01,
+	DDP_UNTAGGED_ERR	= 0x02,
+	DDP_LLP			= 0x03
+};
+
+enum iwch_rdma_ecodes {
+	RDMAP_INV_STAG		= 0x00,
+	RDMAP_BASE_BOUNDS	= 0x01,
+	RDMAP_ACC_VIOL		= 0x02,
+	RDMAP_STAG_NOT_ASSOC	= 0x03,
+	RDMAP_TO_WRAP		= 0x04,
+	RDMAP_INV_VERS		= 0x05,
+	RDMAP_INV_OPCODE	= 0x06,
+	RDMAP_STREAM_CATA	= 0x07,
+	RDMAP_GLOBAL_CATA	= 0x08,
+	RDMAP_CANT_INV_STAG	= 0x09,
+	RDMAP_UNSPECIFIED	= 0xff	
+};
+
+enum iwch_ddp_ecodes {
+	DDPT_INV_STAG		= 0x00,
+	DDPT_BASE_BOUNDS	= 0x01,
+	DDPT_STAG_NOT_ASSOC	= 0x02,
+	DDPT_TO_WRAP		= 0x03,
+	DDPT_INV_VERS		= 0x04,
+	DDPU_INV_QN		= 0x01,
+	DDPU_INV_MSN_NOBUF	= 0x02,
+	DDPU_INV_MSN_RANGE	= 0x03,
+	DDPU_INV_MO		= 0x04,
+	DDPU_MSG_TOOBIG		= 0x05,
+	DDPU_INV_VERS		= 0x06
+};
+
+enum iwch_mpa_ecodes {
+	MPA_CRC_ERR		= 0x02,
+	MPA_MARKER_ERR		= 0x03
+};
+
+enum iwch_ep_state {
+	IDLE = 0,
+	LISTEN,	
+	CONNECTING,
+	MPA_REQ_WAIT,
+	MPA_REQ_SENT,
+	MPA_REQ_RCVD,
+	MPA_REP_SENT,
+	FPDU_MODE,
+	ABORTING,
+	CLOSING,
+	MORIBUND,
+	DEAD,
+};
+
+enum iwch_ep_flags {
+	PEER_ABORT_IN_PROGRESS	= (1 << 0),
+	ABORT_REQ_IN_PROGRESS	= (1 << 1),
+};
+
+struct iwch_ep_common {
+	struct iw_cm_id *cm_id;
+	struct iwch_qp *qp;
+	struct t3cdev *tdev;
+	enum iwch_ep_state state;
+	struct kref kref;
+	spinlock_t lock;
+	struct sockaddr_in local_addr;
+	struct sockaddr_in remote_addr;
+	wait_queue_head_t waitq;
+	int rpl_done;
+	int rpl_err;
+};
+
+struct iwch_listen_ep {
+	struct iwch_ep_common com;
+	unsigned int stid;
+	int backlog;
+};
+
+struct iwch_ep {
+	struct iwch_ep_common com;
+	struct iwch_ep *parent_ep;
+	struct timer_list timer;
+	unsigned int atid;
+	u32 hwtid;
+	u32 snd_seq;
+	u32 rcv_seq;
+	struct l2t_entry *l2t;
+	struct dst_entry *dst;
+	struct sk_buff *mpa_skb;
+	struct iwch_mpa_attributes mpa_attr;
+	unsigned int mpa_pkt_len;
+	u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
+	u8 tos;
+	u16 emss;
+	u16 plen;
+	u32 ird;
+	u32 ord;
+	u32 flags;
+};
+
+static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
+{
+	return (struct iwch_ep *)cm_id->provider_data;
+}
+
+static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
+{
+	return (struct iwch_listen_ep *)cm_id->provider_data;
+}
+
+static inline int compute_wscale(int win)
+{
+	int wscale = 0;
+
+	while (wscale < 14 && (65535<<wscale) < win)
+		wscale++;
+	return wscale;
+}
+
+/* CM prototypes */
+
+int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
+int iwch_destroy_listen(struct iw_cm_id *cm_id);
+int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
+int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp);
+int iwch_quiesce_tid(struct iwch_ep *ep);
+int iwch_resume_tid(struct iwch_ep *ep);
+void __free_ep(struct kref *kref);
+void iwch_rearp(struct iwch_ep *ep);
+int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t);
+
+int __init iwch_cm_init(void);
+void __exit iwch_cm_term(void);
+
+#endif				/* _IWCH_CM_H_ */
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "iwch_provider.h"
+#include "iwch.h"
+
+/*
+ * Get one cq entry from cxio and map it to openib.
+ *
+ * Returns:
+ * 	0 			EMPTY;
+ *	1			cqe returned
+ *	-EAGAIN 		caller must try again
+ * 	any other -errno	fatal error
+ */
+int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
+		     struct ib_wc *wc)
+{
+	struct iwch_qp *qhp = NULL;
+	struct t3_cqe cqe, *rd_cqe;
+	struct t3_wq *wq;
+	u32 credit = 0;
+	u8 cqe_flushed;
+	u64 cookie;
+	int ret = 1;
+
+	rd_cqe = cxio_next_cqe(&chp->cq);
+
+	if (!rd_cqe)
+		return 0;
+
+	qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
+	if (!qhp)
+		wq = NULL;
+	else {
+		spin_lock(&qhp->lock);
+		wq = &(qhp->wq);
+	}
+	ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
+				   &credit);
+	if (t3a_device(chp->rhp) && credit) {
+		PDBG("%s updating %d cq credits on id %d\n", __FUNCTION__,
+		     credit, chp->cq.cqid);
+		cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
+	}
+
+	if (ret) {
+		ret = -EAGAIN;
+		goto out;
+	}
+	ret = 1;
+
+	wc->wr_id = cookie;
+	wc->qp = &qhp->ibqp;
+	wc->vendor_err = CQE_STATUS(cqe);
+
+	PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
+	     "lo 0x%x cookie 0x%llx\n", __FUNCTION__,
+	     CQE_QPID(cqe), CQE_TYPE(cqe),
+	     CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
+	     CQE_WRID_LOW(cqe), cookie);
+
+	if (CQE_TYPE(cqe) == 0) {
+		if (!CQE_STATUS(cqe))
+			wc->byte_len = CQE_LEN(cqe);
+		else
+			wc->byte_len = 0;
+		wc->opcode = IB_WC_RECV;
+	} else {
+		switch (CQE_OPCODE(cqe)) {
+		case T3_RDMA_WRITE:
+			wc->opcode = IB_WC_RDMA_WRITE;
+			break;
+		case T3_READ_REQ:
+			wc->opcode = IB_WC_RDMA_READ;
+			wc->byte_len = CQE_LEN(cqe);
+			break;
+		case T3_SEND:
+		case T3_SEND_WITH_SE:
+			wc->opcode = IB_WC_SEND;
+			break;
+		case T3_BIND_MW:
+			wc->opcode = IB_WC_BIND_MW;
+			break;
+
+		/* these aren't supported yet */
+		case T3_SEND_WITH_INV:
+		case T3_SEND_WITH_SE_INV:
+		case T3_LOCAL_INV:
+		case T3_FAST_REGISTER:
+		default:
+			printk(KERN_ERR MOD "Unexpected opcode %d "
+			       "in the CQE received for QPID=0x%0x\n",
+			       CQE_OPCODE(cqe), CQE_QPID(cqe));
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	if (cqe_flushed)
+		wc->status = IB_WC_WR_FLUSH_ERR;
+	else {
+		
+		switch (CQE_STATUS(cqe)) {
+		case TPT_ERR_SUCCESS:
+			wc->status = IB_WC_SUCCESS;
+			break;
+		case TPT_ERR_STAG:
+			wc->status = IB_WC_LOC_ACCESS_ERR;
+			break;
+		case TPT_ERR_PDID:
+			wc->status = IB_WC_LOC_PROT_ERR;
+			break;
+		case TPT_ERR_QPID:
+		case TPT_ERR_ACCESS:
+			wc->status = IB_WC_LOC_ACCESS_ERR;
+			break;
+		case TPT_ERR_WRAP:
+			wc->status = IB_WC_GENERAL_ERR;
+			break;
+		case TPT_ERR_BOUND:
+			wc->status = IB_WC_LOC_LEN_ERR;
+			break;
+		case TPT_ERR_INVALIDATE_SHARED_MR:
+		case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+			wc->status = IB_WC_MW_BIND_ERR;
+			break;
+		case TPT_ERR_CRC:
+		case TPT_ERR_MARKER:
+		case TPT_ERR_PDU_LEN_ERR:
+		case TPT_ERR_OUT_OF_RQE:
+		case TPT_ERR_DDP_VERSION:
+		case TPT_ERR_RDMA_VERSION:
+		case TPT_ERR_DDP_QUEUE_NUM:
+		case TPT_ERR_MSN:
+		case TPT_ERR_TBIT:
+		case TPT_ERR_MO:
+		case TPT_ERR_MSN_RANGE:
+		case TPT_ERR_IRD_OVERFLOW:
+		case TPT_ERR_OPCODE:
+			wc->status = IB_WC_FATAL_ERR;
+			break;
+		case TPT_ERR_SWFLUSH:
+			wc->status = IB_WC_WR_FLUSH_ERR;
+			break;
+		default:
+			printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for "
+			       "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe));
+			ret = -EINVAL;
+		}
+	}
+out:
+	if (wq)
+		spin_unlock(&qhp->lock);
+	return ret;
+}
+
+int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+	struct iwch_dev *rhp;
+	struct iwch_cq *chp;
+	unsigned long flags;
+	int npolled;
+	int err = 0;
+
+	chp = to_iwch_cq(ibcq);
+	rhp = chp->rhp;
+
+	spin_lock_irqsave(&chp->lock, flags);
+	for (npolled = 0; npolled < num_entries; ++npolled) {
+#ifdef DEBUG
+		int i=0;
+#endif
+
+		/*
+	 	 * Because T3 can post CQEs that are _not_ associated
+	 	 * with a WR, we might have to poll again after removing
+	 	 * one of these.
+		 */
+		do {
+			err = iwch_poll_cq_one(rhp, chp, wc + npolled);
+#ifdef DEBUG
+			BUG_ON(++i > 1000);
+#endif
+		} while (err == -EAGAIN);
+		if (err <= 0)
+			break;
+	}
+	spin_unlock_irqrestore(&chp->lock, flags);
+
+	if (err < 0)
+		return err;
+	else {
+		return npolled;
+	}
+}
+
+int iwch_modify_cq(struct ib_cq *cq, int cqe)
+{
+	PDBG("iwch_modify_cq: TBD\n");
+	return 0;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/mman.h>
+#include <net/sock.h>
+#include "iwch_provider.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+#include "cxio_hal.h"
+#include "cxio_wr.h"
+
+static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
+			  struct respQ_msg_t *rsp_msg,
+			  enum ib_event_type ib_event,
+			  int send_term)
+{
+	struct ib_event event;
+	struct iwch_qp_attributes attrs;
+	struct iwch_qp *qhp;
+
+	spin_lock(&rnicp->lock);
+	qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
+
+	if (!qhp) {
+		printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n",
+		       __FUNCTION__, CQE_STATUS(rsp_msg->cqe),
+		       CQE_QPID(rsp_msg->cqe));
+		spin_unlock(&rnicp->lock);
+		return;
+	}
+
+	if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
+	    (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
+		PDBG("%s AE received after RTS - "
+		     "qp state %d qpid 0x%x status 0x%x\n", __FUNCTION__,
+		     qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
+		spin_unlock(&rnicp->lock);
+		return;
+	}
+
+	printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
+	       "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
+	       CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
+	       CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
+	       CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+
+	atomic_inc(&qhp->refcnt);
+	spin_unlock(&rnicp->lock);
+
+	event.event = ib_event;
+	event.device = chp->ibcq.device;
+	if (ib_event == IB_EVENT_CQ_ERR)
+		event.element.cq = &chp->ibcq;
+	else
+		event.element.qp = &qhp->ibqp;
+
+	if (qhp->ibqp.event_handler)
+		(*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
+
+	if (qhp->attr.state == IWCH_QP_STATE_RTS) {
+		attrs.next_state = IWCH_QP_STATE_TERMINATE;
+		iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
+			       &attrs, 1);
+		if (send_term)
+			iwch_post_terminate(qhp, rsp_msg);
+	}
+
+	if (atomic_dec_and_test(&qhp->refcnt))
+		wake_up(&qhp->wait);
+}
+
+void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
+{
+	struct iwch_dev *rnicp;
+	struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
+	struct iwch_cq *chp;
+	struct iwch_qp *qhp;
+	u32 cqid = RSPQ_CQID(rsp_msg);
+
+	rnicp = (struct iwch_dev *) rdev_p->ulp;
+	spin_lock(&rnicp->lock);
+	chp = get_chp(rnicp, cqid);
+	qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
+	if (!chp || !qhp) {
+		printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
+		       "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
+		       cqid, CQE_QPID(rsp_msg->cqe),
+		       CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+		       CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
+		       CQE_WRID_LOW(rsp_msg->cqe));
+		spin_unlock(&rnicp->lock);
+		goto out;
+	}
+	iwch_qp_add_ref(&qhp->ibqp);
+	atomic_inc(&chp->refcnt);
+	spin_unlock(&rnicp->lock);
+
+	/*
+	 * 1) completion of our sending a TERMINATE.
+	 * 2) incoming TERMINATE message.
+	 */
+	if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
+	    (CQE_STATUS(rsp_msg->cqe) == 0)) {
+		if (SQ_TYPE(rsp_msg->cqe)) {
+			PDBG("%s QPID 0x%x ep %p disconnecting\n",
+			     __FUNCTION__, qhp->wq.qpid, qhp->ep);
+			iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
+		} else {
+			PDBG("%s post REQ_ERR AE QPID 0x%x\n", __FUNCTION__,
+			     qhp->wq.qpid);
+			post_qp_event(rnicp, chp, rsp_msg,
+				      IB_EVENT_QP_REQ_ERR, 0);
+			iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
+		}
+		goto done;
+	}
+
+	/* Bad incoming Read request */
+	if (SQ_TYPE(rsp_msg->cqe) &&
+	    (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
+		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
+		goto done;
+	}
+
+	/* Bad incoming write */
+	if (RQ_TYPE(rsp_msg->cqe) &&
+	    (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
+		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
+		goto done;
+	}
+
+	switch (CQE_STATUS(rsp_msg->cqe)) {
+
+	/* Completion Events */
+	case TPT_ERR_SUCCESS:
+
+		/*
+		 * Confirm the destination entry if this is a RECV completion.
+		 */
+		if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
+			dst_confirm(qhp->ep->dst);
+		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+		break;
+
+	case TPT_ERR_STAG:
+	case TPT_ERR_PDID:
+	case TPT_ERR_QPID:
+	case TPT_ERR_ACCESS:
+	case TPT_ERR_WRAP:
+	case TPT_ERR_BOUND:
+	case TPT_ERR_INVALIDATE_SHARED_MR:
+	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+		printk(KERN_ERR "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
+		       "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
+		       CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
+		       CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
+		       CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
+		break;
+
+	/* Device Fatal Errors */
+	case TPT_ERR_ECC:
+	case TPT_ERR_ECC_PSTAG:
+	case TPT_ERR_INTERNAL_ERR:
+		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
+		break;
+	
+	/* QP Fatal Errors */
+	case TPT_ERR_OUT_OF_RQE:
+	case TPT_ERR_PBL_ADDR_BOUND:
+	case TPT_ERR_CRC:
+	case TPT_ERR_MARKER:
+	case TPT_ERR_PDU_LEN_ERR:
+	case TPT_ERR_DDP_VERSION:
+	case TPT_ERR_RDMA_VERSION:
+	case TPT_ERR_OPCODE:
+	case TPT_ERR_DDP_QUEUE_NUM:
+	case TPT_ERR_MSN:
+	case TPT_ERR_TBIT:
+	case TPT_ERR_MO:
+	case TPT_ERR_MSN_GAP:
+	case TPT_ERR_MSN_RANGE:
+	case TPT_ERR_RQE_ADDR_BOUND:
+	case TPT_ERR_IRD_OVERFLOW:
+		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
+		break;
+
+	default:
+		printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n",
+		       CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
+		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
+		break;
+	}
+done:
+	if (atomic_dec_and_test(&chp->refcnt))
+                wake_up(&chp->wait);
+	iwch_qp_rem_ref(&qhp->ibqp);
+out:
+	dev_kfree_skb_irq(skb);
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_H__
+#define __IWCH_H__
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "cxio_hal.h"
+#include "cxgb3_offload.h"
+
+struct iwch_pd;
+struct iwch_cq;
+struct iwch_qp;
+struct iwch_mr;
+
+struct iwch_rnic_attributes {
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 max_qps;
+	u32 max_wrs;				/* Max for any SQ/RQ */
+	u32 max_sge_per_wr;
+	u32 max_sge_per_rdma_write_wr;	/* for RDMA Write WR */
+	u32 max_cqs;
+	u32 max_cqes_per_cq;
+	u32 max_mem_regs;
+	u32 max_phys_buf_entries;		/* for phys buf list */
+	u32 max_pds;
+
+	/*
+	 * The memory page sizes supported by this RNIC.
+	 * Bit position i in bitmap indicates page of
+	 * size (4k)^i.  Phys block list mode unsupported.
+	 */
+	u32 mem_pgsizes_bitmask;
+	u8 can_resize_wq;
+
+	/*
+	 * The maximum number of RDMA Reads that can be outstanding
+	 * per QP with this RNIC as the target.
+	 */
+	u32 max_rdma_reads_per_qp;
+
+	/*
+	 * The maximum number of resources used for RDMA Reads
+	 * by this RNIC with this RNIC as the target.
+	 */
+	u32 max_rdma_read_resources;
+
+	/*
+	 * The max depth per QP for initiation of RDMA Read
+	 * by this RNIC.
+	 */
+	u32 max_rdma_read_qp_depth;
+
+	/*
+	 * The maximum depth for initiation of RDMA Read
+	 * operations by this RNIC on all QPs
+	 */
+	u32 max_rdma_read_depth;
+	u8 rq_overflow_handled;
+	u32 can_modify_ird;
+	u32 can_modify_ord;
+	u32 max_mem_windows;
+	u32 stag0_value;
+	u8 zbva_support;
+	u8 local_invalidate_fence;
+	u32 cq_overflow_detection;
+};
+
+struct iwch_dev {
+	struct ib_device ibdev;
+	struct cxio_rdev rdev;
+	u32 device_cap_flags;
+	struct iwch_rnic_attributes attr;
+	struct idr cqidr;
+	struct idr qpidr;
+	struct idr mmidr;
+	spinlock_t lock;
+	struct list_head entry;
+};
+
+static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct iwch_dev, ibdev);
+}
+
+static inline int t3b_device(const struct iwch_dev *rhp)
+{
+	return (rhp->rdev.t3cdev_p->type == T3B);
+}
+
+static inline int t3a_device(const struct iwch_dev *rhp)
+{
+	return (rhp->rdev.t3cdev_p->type == T3A);
+}
+
+static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
+{
+	return idr_find(&rhp->cqidr, cqid);
+}
+
+static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
+{
+	return idr_find(&rhp->qpidr, qpid);
+}
+
+static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
+{
+	return idr_find(&rhp->mmidr, mmid);
+}
+
+static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
+				void *handle, u32 id)
+{
+	int ret;
+	u32 newid;
+
+	do {
+		if (!idr_pre_get(idr, GFP_KERNEL)) {
+			return -ENOMEM;
+		}
+		spin_lock_irq(&rhp->lock);
+		ret = idr_get_new_above(idr, handle, id, &newid);
+		BUG_ON(newid != id);
+		spin_unlock_irq(&rhp->lock);
+	} while (ret == -EAGAIN);
+
+	return ret;
+}
+
+static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
+{
+	spin_lock_irq(&rhp->lock);
+	idr_remove(idr, id);
+	spin_unlock_irq(&rhp->lock);
+}
+
+extern struct cxgb3_client t3c_client;
+extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+#endif
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <asm/byteorder.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+
+#include "cxio_hal.h"
+#include "iwch.h"
+#include "iwch_provider.h"
+
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+					struct iwch_mr *mhp,
+					int shift,
+					__be64 *page_list)
+{
+	u32 stag;
+	u32 mmid;
+
+
+	if (cxio_register_phys_mem(&rhp->rdev,
+				   &stag, mhp->attr.pdid,
+				   mhp->attr.perms,
+				   mhp->attr.zbva,
+				   mhp->attr.va_fbo,
+				   mhp->attr.len,
+				   shift-12,
+				   page_list,
+				   &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+		return -ENOMEM;
+	mhp->attr.state = 1;
+	mhp->attr.stag = stag;
+	mmid = stag >> 8;
+	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+	PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp);
+	return 0;
+}
+
+int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+					struct iwch_mr *mhp,
+					int shift,
+					__be64 *page_list,
+					int npages)
+{
+	u32 stag;
+	u32 mmid;
+
+
+	/* We could support this... */
+	if (npages > mhp->attr.pbl_size)
+		return -ENOMEM;
+
+	stag = mhp->attr.stag;
+	if (cxio_reregister_phys_mem(&rhp->rdev,
+				   &stag, mhp->attr.pdid,
+				   mhp->attr.perms,
+				   mhp->attr.zbva,
+				   mhp->attr.va_fbo,
+				   mhp->attr.len,
+				   shift-12,
+				   page_list,
+				   &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+		return -ENOMEM;
+	mhp->attr.state = 1;
+	mhp->attr.stag = stag;
+	mmid = stag >> 8;
+	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+	PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp);
+	return 0;
+}
+
+int build_phys_page_list(struct ib_phys_buf *buffer_list,
+					int num_phys_buf,
+					u64 *iova_start,
+					u64 *total_size,
+					int *npages,
+					int *shift,
+					__be64 **page_list)
+{
+	u64 mask;
+	int i, j, n;
+
+	mask = 0;
+	*total_size = 0;
+	for (i = 0; i < num_phys_buf; ++i) {
+		if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
+			return -EINVAL;
+		if (i != 0 && i != num_phys_buf - 1 &&
+		    (buffer_list[i].size & ~PAGE_MASK))
+			return -EINVAL;
+		*total_size += buffer_list[i].size;
+		if (i > 0)
+			mask |= buffer_list[i].addr;
+	}
+
+	if (*total_size > 0xFFFFFFFFULL)
+		return -ENOMEM;
+
+	/* Find largest page shift we can use to cover buffers */
+	for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
+		if (num_phys_buf > 1) {
+			if ((1ULL << *shift) & mask)
+				break;
+		} else
+			if (1ULL << *shift >=
+			    buffer_list[0].size +
+			    (buffer_list[0].addr & ((1ULL << *shift) - 1)))
+				break;
+
+	buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
+	buffer_list[0].addr &= ~0ull << *shift;
+
+	*npages = 0;
+	for (i = 0; i < num_phys_buf; ++i)
+		*npages += (buffer_list[i].size +
+			(1ULL << *shift) - 1) >> *shift;
+
+	if (!*npages)
+		return -EINVAL;
+
+	*page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
+	if (!*page_list)
+		return -ENOMEM;
+
+	n = 0;
+	for (i = 0; i < num_phys_buf; ++i)
+		for (j = 0;
+		     j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
+		     ++j)
+			(*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
+			    ((u64) j << *shift));
+
+	PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
+	     __FUNCTION__, *iova_start, mask, *shift, *total_size, *npages);
+
+	return 0;
+
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -0,0 +1,1186 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/ethtool.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include <cxio_hal.h>
+#include "iwch.h"
+#include "iwch_provider.h"
+#include "iwch_cm.h"
+#include "iwch_user.h"
+
+static int iwch_modify_port(struct ib_device *ibdev,
+			    u8 port, int port_modify_mask,
+			    struct ib_port_modify *props)
+{
+	return -ENOSYS;
+}
+
+static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
+				    struct ib_ah_attr *ah_attr)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static int iwch_ah_destroy(struct ib_ah *ah)
+{
+	return -ENOSYS;
+}
+
+static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	return -ENOSYS;
+}
+
+static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	return -ENOSYS;
+}
+
+static int iwch_process_mad(struct ib_device *ibdev,
+			    int mad_flags,
+			    u8 port_num,
+			    struct ib_wc *in_wc,
+			    struct ib_grh *in_grh,
+			    struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	return -ENOSYS;
+}
+
+static int iwch_dealloc_ucontext(struct ib_ucontext *context)
+{
+	struct iwch_dev *rhp = to_iwch_dev(context->device);
+	struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
+	struct iwch_mm_entry *mm, *tmp;
+
+	PDBG("%s context %p\n", __FUNCTION__, context);
+	list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
+		kfree(mm);
+	cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
+	kfree(ucontext);
+	return 0;
+}
+
+static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
+					struct ib_udata *udata)
+{
+	struct iwch_ucontext *context;
+	struct iwch_dev *rhp = to_iwch_dev(ibdev);
+
+	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+	cxio_init_ucontext(&rhp->rdev, &context->uctx);
+	INIT_LIST_HEAD(&context->mmaps);
+	spin_lock_init(&context->mmap_lock);
+	return &context->ibucontext;
+}
+
+static int iwch_destroy_cq(struct ib_cq *ib_cq)
+{
+	struct iwch_cq *chp;
+
+	PDBG("%s ib_cq %p\n", __FUNCTION__, ib_cq);
+	chp = to_iwch_cq(ib_cq);
+
+	remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
+	atomic_dec(&chp->refcnt);
+	wait_event(chp->wait, !atomic_read(&chp->refcnt));
+
+	cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+	kfree(chp);
+	return 0;
+}
+
+static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries,
+			     struct ib_ucontext *ib_context,
+			     struct ib_udata *udata)
+{
+	struct iwch_dev *rhp;
+	struct iwch_cq *chp;
+	struct iwch_create_cq_resp uresp;
+	struct iwch_create_cq_req ureq;
+	struct iwch_ucontext *ucontext = NULL;
+
+	PDBG("%s ib_dev %p entries %d\n", __FUNCTION__, ibdev, entries);
+	rhp = to_iwch_dev(ibdev);
+	chp = kzalloc(sizeof(*chp), GFP_KERNEL);
+	if (!chp)
+		return ERR_PTR(-ENOMEM);
+
+ 	if (ib_context) {
+ 		ucontext = to_iwch_ucontext(ib_context);
+ 		if (!t3a_device(rhp)) {
+ 			if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) {
+ 				kfree(chp);
+ 				return ERR_PTR(-EFAULT);
+ 			}
+ 			chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
+		}
+	}
+
+	if (t3a_device(rhp)) {
+
+		/*
+		 * T3A: Add some fluff to handle extra CQEs inserted
+	 	 * for various errors.
+		 * Additional CQE possibilities:
+		 *      TERMINATE,
+		 *      incoming RDMA WRITE Failures
+		 *      incoming RDMA READ REQUEST FAILUREs
+		 * NOTE: We cannot ensure the CQ won't overflow.
+		 */
+		entries += 16;
+	}
+	entries = roundup_pow_of_two(entries);
+	chp->cq.size_log2 = ilog2(entries);
+
+	if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
+		kfree(chp);
+		return ERR_PTR(-ENOMEM);
+	}
+	chp->rhp = rhp;
+	chp->ibcq.cqe = (1 << chp->cq.size_log2) - 1;
+	spin_lock_init(&chp->lock);
+	atomic_set(&chp->refcnt, 1);
+	init_waitqueue_head(&chp->wait);
+	insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+
+	if (ucontext) {
+		struct iwch_mm_entry *mm;
+
+		mm = kmalloc(sizeof *mm, GFP_KERNEL);
+		if (!mm) {
+			iwch_destroy_cq(&chp->ibcq);
+			return ERR_PTR(-ENOMEM);
+		}
+		uresp.cqid = chp->cq.cqid;
+		uresp.size_log2 = chp->cq.size_log2;
+		spin_lock(&ucontext->mmap_lock);
+		uresp.key = ucontext->key;
+		ucontext->key += PAGE_SIZE;
+		spin_unlock(&ucontext->mmap_lock);
+		if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+			kfree(mm);
+			iwch_destroy_cq(&chp->ibcq);
+			return ERR_PTR(-EFAULT);
+		}
+		mm->key = uresp.key;
+		mm->addr = virt_to_phys(chp->cq.queue);
+		mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
+					     sizeof (struct t3_cqe));
+		insert_mmap(ucontext, mm);
+	}
+	PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
+	     chp->cq.cqid, chp, (1 << chp->cq.size_log2),
+	     (u64)chp->cq.dma_addr);
+	return &chp->ibcq;
+}
+
+static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+	struct iwch_cq *chp = to_iwch_cq(cq);
+	struct t3_cq oldcq, newcq;
+	int ret;
+
+	PDBG("%s ib_cq %p cqe %d\n", __FUNCTION__, cq, cqe);
+
+	/* We don't downsize... */
+	if (cqe <= cq->cqe)
+		return 0;
+
+	/* create new t3_cq with new size */
+	cqe = roundup_pow_of_two(cqe+1);
+	newcq.size_log2 = ilog2(cqe);
+
+	/* Dont allow resize to less than the current wce count */
+	if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
+		return -ENOMEM;
+	}
+
+	/* Quiesce all QPs using this CQ */
+	ret = iwch_quiesce_qps(chp);
+	if (ret) {
+		return ret;
+	}
+
+	ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
+	if (ret) {
+		kfree(chp);
+		return ret;
+	}
+	
+	/* copy CQEs */
+	memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
+				        sizeof(struct t3_cqe));
+
+	/* old iwch_qp gets new t3_cq but keeps old cqid */
+	oldcq = chp->cq;
+	chp->cq = newcq;
+	chp->cq.cqid = oldcq.cqid;
+
+	/* resize new t3_cq to update the HW context */
+	ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
+	if (ret) {
+		chp->cq = oldcq;
+		return ret;
+	}
+	chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
+
+	/* destroy old t3_cq */
+	oldcq.cqid = newcq.cqid;
+	ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
+	if (ret) {
+		printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n",
+			__FUNCTION__, ret);
+	}
+	
+	/* add user hooks here */
+
+	/* resume qps */
+	ret = iwch_resume_qps(chp);
+	return ret;
+}
+
+static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+	struct iwch_dev *rhp;
+	struct iwch_cq *chp;
+	enum t3_cq_opcode cq_op;
+	int err;
+	unsigned long flag;
+	u32 rptr;
+
+	chp = to_iwch_cq(ibcq);
+	rhp = chp->rhp;
+	if (notify == IB_CQ_SOLICITED)
+		cq_op = CQ_ARM_SE;
+	else
+		cq_op = CQ_ARM_AN;
+	if (chp->user_rptr_addr) {
+		if (get_user(rptr, chp->user_rptr_addr))
+			return -EFAULT;
+		spin_lock_irqsave(&chp->lock, flag);
+		chp->cq.rptr = rptr;
+	} else
+		spin_lock_irqsave(&chp->lock, flag);
+	PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr);
+	err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
+	spin_unlock_irqrestore(&chp->lock, flag);
+	if (err)
+		printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
+		       chp->cq.cqid);
+	return err;
+}
+
+static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	int len = vma->vm_end - vma->vm_start;
+	u32 key = vma->vm_pgoff << PAGE_SHIFT;
+	struct cxio_rdev *rdev_p;
+	int ret = 0;
+	struct iwch_mm_entry *mm;
+	struct iwch_ucontext *ucontext;
+	u64 addr;
+
+	PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __FUNCTION__, vma->vm_pgoff,
+	     key, len);
+
+	if (vma->vm_start & (PAGE_SIZE-1)) {
+                return -EINVAL;
+        }
+
+	rdev_p = &(to_iwch_dev(context->device)->rdev);
+	ucontext = to_iwch_ucontext(context);
+
+	mm = remove_mmap(ucontext, key, len);
+	if (!mm)
+		return -EINVAL;
+	addr = mm->addr;
+	kfree(mm);
+
+	if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
+	    (addr < (rdev_p->rnic_info.udbell_physbase +
+		       rdev_p->rnic_info.udbell_len))) {
+
+		/*
+		 * Map T3 DB register.
+		 */
+		if (vma->vm_flags & VM_READ) {
+                	return -EPERM;
+		}
+
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+		vma->vm_flags &= ~VM_MAYREAD;
+		ret = io_remap_pfn_range(vma, vma->vm_start, 
+					 addr >> PAGE_SHIFT,
+				         len, vma->vm_page_prot);
+	} else {
+
+		/*
+		 * Map WQ or CQ contig dma memory...
+		 */
+		ret = remap_pfn_range(vma, vma->vm_start, 
+				      addr >> PAGE_SHIFT,
+				      len, vma->vm_page_prot);
+	}
+	
+	return ret;
+}
+
+static int iwch_deallocate_pd(struct ib_pd *pd)
+{
+	struct iwch_dev *rhp;
+	struct iwch_pd *php;
+
+	php = to_iwch_pd(pd);
+	rhp = php->rhp;
+	PDBG("%s ibpd %p pdid 0x%x\n", __FUNCTION__, pd, php->pdid);
+	cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
+	kfree(php);
+	return 0;
+}
+
+static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
+			       struct ib_ucontext *context,
+			       struct ib_udata *udata)
+{
+	struct iwch_pd *php;
+	u32 pdid;
+	struct iwch_dev *rhp;
+
+	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+	rhp = (struct iwch_dev *) ibdev;
+	pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
+	if (!pdid)
+		return ERR_PTR(-EINVAL);
+	php = kzalloc(sizeof(*php), GFP_KERNEL);
+	if (!php) {
+		cxio_hal_put_pdid(rhp->rdev.rscp, pdid);
+		return ERR_PTR(-ENOMEM);
+	}
+	php->pdid = pdid;
+	php->rhp = rhp;
+	if (context) {
+		if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) {
+			iwch_deallocate_pd(&php->ibpd);
+			return ERR_PTR(-EFAULT);
+		}
+	}
+	PDBG("%s pdid 0x%0x ptr 0x%p\n", __FUNCTION__, pdid, php);
+	return &php->ibpd;
+}
+
+static int iwch_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct iwch_dev *rhp;
+	struct iwch_mr *mhp;
+	u32 mmid;
+
+	PDBG("%s ib_mr %p\n", __FUNCTION__, ib_mr);
+	/* There can be no memory windows */
+	if (atomic_read(&ib_mr->usecnt))
+		return -EINVAL;
+
+	mhp = to_iwch_mr(ib_mr);
+	rhp = mhp->rhp;
+	mmid = mhp->attr.stag >> 8;
+	cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+		       mhp->attr.pbl_addr);
+	remove_handle(rhp, &rhp->mmidr, mmid);
+	if (mhp->kva)
+		kfree((void *) (unsigned long) mhp->kva);
+	PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
+	kfree(mhp);
+	return 0;
+}
+
+static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
+					struct ib_phys_buf *buffer_list,
+					int num_phys_buf,
+					int acc,
+					u64 *iova_start)
+{
+	__be64 *page_list;
+	int shift;
+	u64 total_size;
+	int npages;
+	struct iwch_dev *rhp;
+	struct iwch_pd *php;
+	struct iwch_mr *mhp;
+	int ret;
+		
+	PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+	php = to_iwch_pd(pd);
+	rhp = php->rhp;
+
+	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+	if (!mhp)
+		return ERR_PTR(-ENOMEM);
+
+	/* First check that we have enough alignment */
+	if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (num_phys_buf > 1 &&
+	    ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
+			 	   &total_size, &npages, &shift, &page_list);
+	if (ret)
+		goto err;
+
+	mhp->rhp = rhp;
+	mhp->attr.pdid = php->pdid;
+	mhp->attr.zbva = 0;
+	mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+	mhp->attr.va_fbo = *iova_start;
+	mhp->attr.page_size = shift - 12;
+	mhp->attr.len = (u32) total_size;
+	mhp->attr.pbl_size = npages;
+	ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
+	kfree(page_list);
+	if (ret) {
+		goto err;
+	}
+	return &mhp->ibmr;
+err:
+	kfree(mhp);
+	return ERR_PTR(ret);
+	
+}
+
+static int iwch_reregister_phys_mem(struct ib_mr *mr,
+				     int mr_rereg_mask,
+				     struct ib_pd *pd,
+                                     struct ib_phys_buf *buffer_list,
+                                     int num_phys_buf,
+                                     int acc, u64 * iova_start)
+{
+
+	struct iwch_mr mh, *mhp;
+	struct iwch_pd *php;
+	struct iwch_dev *rhp;
+	__be64 *page_list = NULL;
+	int shift = 0;
+	u64 total_size;
+	int npages;
+	int ret;
+
+	PDBG("%s ib_mr %p ib_pd %p\n", __FUNCTION__, mr, pd);
+
+	/* There can be no memory windows */
+	if (atomic_read(&mr->usecnt))
+		return -EINVAL;
+
+	mhp = to_iwch_mr(mr);
+	rhp = mhp->rhp;
+	php = to_iwch_pd(mr->pd);
+
+	/* make sure we are on the same adapter */
+	if (rhp != php->rhp)
+		return -EINVAL;
+
+	memcpy(&mh, mhp, sizeof *mhp);
+
+	if (mr_rereg_mask & IB_MR_REREG_PD)
+		php = to_iwch_pd(pd);
+	if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+		mh.attr.perms = iwch_ib_to_tpt_access(acc);
+	if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+		ret = build_phys_page_list(buffer_list, num_phys_buf,
+					   iova_start,
+					   &total_size, &npages,
+					   &shift, &page_list);
+		if (ret)
+			return ret;
+	}
+
+	ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
+	kfree(page_list);
+	if (ret) {
+		return ret;
+	}
+	if (mr_rereg_mask & IB_MR_REREG_PD)
+		mhp->attr.pdid = php->pdid;
+	if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+		mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+	if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+		mhp->attr.zbva = 0;
+		mhp->attr.va_fbo = *iova_start;
+		mhp->attr.page_size = shift - 12;
+		mhp->attr.len = (u32) total_size;
+		mhp->attr.pbl_size = npages;
+	}
+
+	return 0;	
+}
+
+
+struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+				      int acc, struct ib_udata *udata)
+{
+	__be64 *pages;
+	int shift, n, len;
+	int i, j, k;
+	int err = 0;
+	struct ib_umem_chunk *chunk;
+	struct iwch_dev *rhp;
+	struct iwch_pd *php;
+	struct iwch_mr *mhp;
+	struct iwch_reg_user_mr_resp uresp;
+
+	PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+	shift = ffs(region->page_size) - 1;
+
+	php = to_iwch_pd(pd);
+	rhp = php->rhp;
+	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+	if (!mhp)
+		return ERR_PTR(-ENOMEM);
+
+	n = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list)
+		n += chunk->nents;
+
+	pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+	if (!pages) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	i = n = 0;
+
+	list_for_each_entry(chunk, &region->chunk_list, list)
+		for (j = 0; j < chunk->nmap; ++j) {
+			len = sg_dma_len(&chunk->page_list[j]) >> shift;
+			for (k = 0; k < len; ++k) {
+				pages[i++] = cpu_to_be64(sg_dma_address(
+					&chunk->page_list[j]) +
+					region->page_size * k);
+			}
+		}
+
+	mhp->rhp = rhp;
+	mhp->attr.pdid = php->pdid;
+	mhp->attr.zbva = 0;
+	mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+	mhp->attr.va_fbo = region->virt_base;
+	mhp->attr.page_size = shift - 12;
+	mhp->attr.len = (u32) region->length;
+	mhp->attr.pbl_size = i;
+	err = iwch_register_mem(rhp, php, mhp, shift, pages);
+	kfree(pages);
+	if (err)
+		goto err;
+
+	if (udata && t3b_device(rhp)) {
+		uresp.pbl_addr = (mhp->attr.pbl_addr -
+                                 rhp->rdev.rnic_info.pbl_base) >> 3;
+		PDBG("%s user resp pbl_addr 0x%x\n", __FUNCTION__,
+		     uresp.pbl_addr);
+			
+		if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+			iwch_dereg_mr(&mhp->ibmr);
+			err = -EFAULT;
+			goto err;
+		}
+	}
+
+	return &mhp->ibmr;
+
+err:
+	kfree(mhp);
+	return ERR_PTR(err);
+}
+
+struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct ib_phys_buf bl;
+	u64 kva;
+	struct ib_mr *ibmr;
+
+	PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+
+	/*
+	 * T3 only supports 32 bits of size.
+	 */
+	bl.size = 0xffffffff;
+	bl.addr = 0;
+	kva = 0;
+	ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
+	return ibmr;
+}
+
+struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
+{
+	struct iwch_dev *rhp;
+	struct iwch_pd *php;
+	struct iwch_mw *mhp;
+	u32 mmid;
+	u32 stag = 0;
+	int ret;
+
+	php = to_iwch_pd(pd);
+	rhp = php->rhp;
+	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+	if (!mhp)
+		return ERR_PTR(-ENOMEM);
+	ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
+	if (ret) {
+		kfree(mhp);
+		return ERR_PTR(ret);
+	}
+	mhp->rhp = rhp;
+	mhp->attr.pdid = php->pdid;
+	mhp->attr.type = TPT_MW;
+	mhp->attr.stag = stag;
+	mmid = (stag) >> 8;
+	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+	PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __FUNCTION__, mmid, mhp, stag);
+	return &(mhp->ibmw);
+}
+
+int iwch_dealloc_mw(struct ib_mw *mw)
+{
+	struct iwch_dev *rhp;
+	struct iwch_mw *mhp;
+	u32 mmid;
+
+	mhp = to_iwch_mw(mw);
+	rhp = mhp->rhp;
+	mmid = (mw->rkey) >> 8;
+	cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+	remove_handle(rhp, &rhp->mmidr, mmid);
+	kfree(mhp);
+	PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __FUNCTION__, mw, mmid, mhp);
+	return 0;
+}
+
+static int iwch_destroy_qp(struct ib_qp *ib_qp)
+{
+	struct iwch_dev *rhp;
+	struct iwch_qp *qhp;
+	struct iwch_qp_attributes attrs;
+	struct iwch_ucontext *ucontext;
+
+	qhp = to_iwch_qp(ib_qp);
+	rhp = qhp->rhp;
+
+	attrs.next_state = IWCH_QP_STATE_ERROR;
+	iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
+	wait_event(qhp->wait, !qhp->ep);
+
+	remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid);
+
+	atomic_dec(&qhp->refcnt);
+	wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
+
+	ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context)
+				  : NULL;
+	cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+
+	PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __FUNCTION__,
+	     ib_qp, qhp->wq.qpid, qhp);
+	kfree(qhp);
+	return 0;
+}
+
+static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
+			     struct ib_qp_init_attr *attrs,
+			     struct ib_udata *udata)
+{
+	struct iwch_dev *rhp;
+	struct iwch_qp *qhp;
+	struct iwch_pd *php;
+	struct iwch_cq *schp;
+	struct iwch_cq *rchp;
+	struct iwch_create_qp_resp uresp;
+	int wqsize, sqsize, rqsize;
+	struct iwch_ucontext *ucontext;
+
+	PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+	if (attrs->qp_type != IB_QPT_RC)
+		return ERR_PTR(-EINVAL);
+	php = to_iwch_pd(pd);
+	rhp = php->rhp;
+	schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
+	rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
+	if (!schp || !rchp)
+		return ERR_PTR(-EINVAL);
+
+	/* The RQT size must be # of entries + 1 rounded up to a power of two */
+	rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
+	if (rqsize == attrs->cap.max_recv_wr)
+		rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
+
+	/* T3 doesn't support RQT depth < 16 */
+	if (rqsize < 16)
+		rqsize = 16;
+
+	if (rqsize > T3_MAX_RQ_SIZE)
+		return ERR_PTR(-EINVAL);
+	
+	if (attrs->cap.max_inline_data > T3_MAX_INLINE)
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * NOTE: The SQ and total WQ sizes don't need to be
+	 * a power of two.  However, all the code assumes
+	 * they are. EG: Q_FREECNT() and friends.
+	 */
+	sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
+	wqsize = roundup_pow_of_two(rqsize + sqsize);
+	PDBG("%s wqsize %d sqsize %d rqsize %d\n", __FUNCTION__,
+	     wqsize, sqsize, rqsize);
+	qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
+	if (!qhp)
+		return ERR_PTR(-ENOMEM);
+	qhp->wq.size_log2 = ilog2(wqsize);
+	qhp->wq.rq_size_log2 = ilog2(rqsize);
+	qhp->wq.sq_size_log2 = ilog2(sqsize);
+	ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
+	if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
+			   ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
+		kfree(qhp);
+		return ERR_PTR(-ENOMEM);
+	}
+	attrs->cap.max_recv_wr = rqsize - 1;
+	attrs->cap.max_send_wr = sqsize;
+	qhp->rhp = rhp;
+	qhp->attr.pd = php->pdid;
+	qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
+	qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
+	qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
+	qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+	qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
+	qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
+	qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+	qhp->attr.state = IWCH_QP_STATE_IDLE;
+	qhp->attr.next_state = IWCH_QP_STATE_IDLE;
+
+	/*
+	 * XXX - These don't get passed in from the openib user
+ 	 * at create time.  The CM sets them via a QP modify.
+	 * Need to fix...  I think the CM should
+	 */
+	qhp->attr.enable_rdma_read = 1;
+	qhp->attr.enable_rdma_write = 1;
+	qhp->attr.enable_bind = 1;
+	qhp->attr.max_ord = 1;
+	qhp->attr.max_ird = 1;
+
+	spin_lock_init(&qhp->lock);
+	init_waitqueue_head(&qhp->wait);
+	atomic_set(&qhp->refcnt, 1);
+	insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+	if (udata) {
+
+		struct iwch_mm_entry *mm1, *mm2;
+
+		mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
+		if (!mm1) {
+			iwch_destroy_qp(&qhp->ibqp);
+			return ERR_PTR(-ENOMEM);
+		}
+			
+		mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+		if (!mm2) {
+			kfree(mm1);
+			iwch_destroy_qp(&qhp->ibqp);
+			return ERR_PTR(-ENOMEM);
+		}
+			
+		uresp.qpid = qhp->wq.qpid;
+		uresp.size_log2 = qhp->wq.size_log2;
+		uresp.sq_size_log2 = qhp->wq.sq_size_log2;
+		uresp.rq_size_log2 = qhp->wq.rq_size_log2;
+		spin_lock(&ucontext->mmap_lock);
+		uresp.key = ucontext->key;
+		ucontext->key += PAGE_SIZE;
+		uresp.db_key = ucontext->key;
+		ucontext->key += PAGE_SIZE;
+		spin_unlock(&ucontext->mmap_lock);
+		if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+			kfree(mm1);
+			kfree(mm2);
+			iwch_destroy_qp(&qhp->ibqp);
+			return ERR_PTR(-EFAULT);
+		}
+		mm1->key = uresp.key;
+		mm1->addr = virt_to_phys(qhp->wq.queue);
+		mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr));
+		insert_mmap(ucontext, mm1);
+		mm2->key = uresp.db_key;
+		mm2->addr = qhp->wq.udb & PAGE_MASK;
+		mm2->len = PAGE_SIZE;
+		insert_mmap(ucontext, mm2);
+	}
+	qhp->ibqp.qp_num = qhp->wq.qpid;
+	init_timer(&(qhp->timer));
+	PDBG("%s sq_num_entries %d, rq_num_entries %d "
+	     "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n",
+	     __FUNCTION__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+	     qhp->wq.qpid, qhp, (u64)qhp->wq.dma_addr, 1 << qhp->wq.size_log2);
+	return (&qhp->ibqp);
+}
+
+static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		      int attr_mask, struct ib_udata *udata)
+{
+	struct iwch_dev *rhp;
+	struct iwch_qp *qhp;
+	enum iwch_qp_attr_mask mask = 0;
+	struct iwch_qp_attributes attrs;
+
+	PDBG("%s ib_qp %p\n", __FUNCTION__, ibqp);
+
+	/* iwarp does not support the RTR state */
+	if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
+		attr_mask &= ~IB_QP_STATE;
+
+	/* Make sure we still have something left to do */
+	if (!attr_mask)
+		return 0;
+
+	memset(&attrs, 0, sizeof attrs);
+	qhp = to_iwch_qp(ibqp);
+	rhp = qhp->rhp;
+
+	attrs.next_state = iwch_convert_state(attr->qp_state);
+	attrs.enable_rdma_read = (attr->qp_access_flags &
+			       IB_ACCESS_REMOTE_READ) ?  1 : 0;
+	attrs.enable_rdma_write = (attr->qp_access_flags &
+				IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+	attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
+
+
+	mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
+	mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
+			(IWCH_QP_ATTR_ENABLE_RDMA_READ |
+			 IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
+			 IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
+
+	return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
+}
+
+void iwch_qp_add_ref(struct ib_qp *qp)
+{
+	PDBG("%s ib_qp %p\n", __FUNCTION__, qp);
+	atomic_inc(&(to_iwch_qp(qp)->refcnt));
+}
+
+void iwch_qp_rem_ref(struct ib_qp *qp)
+{
+	PDBG("%s ib_qp %p\n", __FUNCTION__, qp);
+	if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
+                wake_up(&(to_iwch_qp(qp)->wait));
+}
+
+struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
+{
+	PDBG("%s ib_dev %p qpn 0x%x\n", __FUNCTION__, dev, qpn);
+	return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
+}
+
+
+static int iwch_query_pkey(struct ib_device *ibdev,
+			   u8 port, u16 index, u16 * pkey)
+{
+	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+	*pkey = 0;
+	return 0;
+}
+
+static int iwch_query_gid(struct ib_device *ibdev, u8 port,
+			  int index, union ib_gid *gid)
+{
+	struct iwch_dev *dev;
+
+	PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
+	       __FUNCTION__, ibdev, port, index, gid);
+	dev = to_iwch_dev(ibdev);
+	BUG_ON(port == 0 || port > 2);
+	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+	memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6);
+	return 0;
+}
+
+static int iwch_query_device(struct ib_device *ibdev,
+			     struct ib_device_attr *props)
+{
+
+	struct iwch_dev *dev;
+	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+
+	dev = to_iwch_dev(ibdev);
+	memset(props, 0, sizeof *props);
+	memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
+	props->device_cap_flags = dev->device_cap_flags;
+	props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
+	props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
+	props->max_mr_size = ~0ull;
+	props->max_qp = dev->attr.max_qps;
+	props->max_qp_wr = dev->attr.max_wrs;
+	props->max_sge = dev->attr.max_sge_per_wr;
+	props->max_sge_rd = 1;
+	props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
+	props->max_cq = dev->attr.max_cqs;
+	props->max_cqe = dev->attr.max_cqes_per_cq;
+	props->max_mr = dev->attr.max_mem_regs;
+	props->max_pd = dev->attr.max_pds;
+	props->local_ca_ack_delay = 0;
+
+	return 0;
+}
+
+static int iwch_query_port(struct ib_device *ibdev,
+			   u8 port, struct ib_port_attr *props)
+{
+	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+	props->max_mtu = IB_MTU_4096;
+	props->lid = 0;
+	props->lmc = 0;
+	props->sm_lid = 0;
+	props->sm_sl = 0;
+	props->state = IB_PORT_ACTIVE;
+	props->phys_state = 0;
+	props->port_cap_flags =
+	    IB_PORT_CM_SUP |
+	    IB_PORT_SNMP_TUNNEL_SUP |
+	    IB_PORT_REINIT_SUP |
+	    IB_PORT_DEVICE_MGMT_SUP |
+	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->qkey_viol_cntr = 0;
+	props->active_width = 2;
+	props->active_speed = 2;
+	props->max_msg_sz = -1;
+
+	return 0;
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+					    ibdev.class_dev);
+	PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
+	return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+					    ibdev.class_dev);
+	struct ethtool_drvinfo info;
+	struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
+
+	PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
+	lldev->ethtool_ops->get_drvinfo(lldev, &info);
+	return sprintf(buf, "%s\n", info.fw_version);
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+					    ibdev.class_dev);
+	struct ethtool_drvinfo info;
+	struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
+
+	PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
+	lldev->ethtool_ops->get_drvinfo(lldev, &info);
+	return sprintf(buf, "%s\n", info.driver);
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+					    ibdev.class_dev);
+	PDBG("%s class dev 0x%p\n", __FUNCTION__, dev);
+	return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor,
+		                       dev->rdev.rnic_info.pdev->device);
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *iwch_class_attributes[] = {
+	&class_device_attr_hw_rev,
+	&class_device_attr_fw_ver,
+	&class_device_attr_hca_type,
+	&class_device_attr_board_id
+};
+
+int iwch_register_device(struct iwch_dev *dev)
+{
+	int ret;
+	int i;
+
+	PDBG("%s iwch_dev %p\n", __FUNCTION__, dev);
+	strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
+	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+	memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
+	dev->ibdev.owner = THIS_MODULE;
+	dev->device_cap_flags =
+	    (IB_DEVICE_ZERO_STAG |
+	     IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+
+	dev->ibdev.uverbs_cmd_mask =
+	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
+	dev->ibdev.node_type = RDMA_NODE_RNIC;
+	memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
+	dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
+	dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
+	dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev);
+	dev->ibdev.query_device = iwch_query_device;
+	dev->ibdev.query_port = iwch_query_port;
+	dev->ibdev.modify_port = iwch_modify_port;
+	dev->ibdev.query_pkey = iwch_query_pkey;
+	dev->ibdev.query_gid = iwch_query_gid;
+	dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
+	dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
+	dev->ibdev.mmap = iwch_mmap;
+	dev->ibdev.alloc_pd = iwch_allocate_pd;
+	dev->ibdev.dealloc_pd = iwch_deallocate_pd;
+	dev->ibdev.create_ah = iwch_ah_create;
+	dev->ibdev.destroy_ah = iwch_ah_destroy;
+	dev->ibdev.create_qp = iwch_create_qp;
+	dev->ibdev.modify_qp = iwch_ib_modify_qp;
+	dev->ibdev.destroy_qp = iwch_destroy_qp;
+	dev->ibdev.create_cq = iwch_create_cq;
+	dev->ibdev.destroy_cq = iwch_destroy_cq;
+	dev->ibdev.resize_cq = iwch_resize_cq;
+	dev->ibdev.poll_cq = iwch_poll_cq;
+	dev->ibdev.get_dma_mr = iwch_get_dma_mr;
+	dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
+	dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
+	dev->ibdev.reg_user_mr = iwch_reg_user_mr;
+	dev->ibdev.dereg_mr = iwch_dereg_mr;
+	dev->ibdev.alloc_mw = iwch_alloc_mw;
+	dev->ibdev.bind_mw = iwch_bind_mw;
+	dev->ibdev.dealloc_mw = iwch_dealloc_mw;
+
+	dev->ibdev.attach_mcast = iwch_multicast_attach;
+	dev->ibdev.detach_mcast = iwch_multicast_detach;
+	dev->ibdev.process_mad = iwch_process_mad;
+
+	dev->ibdev.req_notify_cq = iwch_arm_cq;
+	dev->ibdev.post_send = iwch_post_send;
+	dev->ibdev.post_recv = iwch_post_receive;
+
+
+	dev->ibdev.iwcm =
+	    (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
+					   GFP_KERNEL);
+	dev->ibdev.iwcm->connect = iwch_connect;
+	dev->ibdev.iwcm->accept = iwch_accept_cr;
+	dev->ibdev.iwcm->reject = iwch_reject_cr;
+	dev->ibdev.iwcm->create_listen = iwch_create_listen;
+	dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen;
+	dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
+	dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
+	dev->ibdev.iwcm->get_qp = iwch_get_qp;
+
+	ret = ib_register_device(&dev->ibdev);
+	if (ret)
+		goto bail1;
+
+	for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
+		ret = class_device_create_file(&dev->ibdev.class_dev,
+					       iwch_class_attributes[i]);
+		if (ret) {
+			goto bail2;
+		}
+	}
+	return 0;
+bail2:
+	ib_unregister_device(&dev->ibdev);
+bail1:
+	return ret;
+}
+
+void iwch_unregister_device(struct iwch_dev *dev)
+{
+	int i;
+
+	PDBG("%s iwch_dev %p\n", __FUNCTION__, dev);
+	for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
+		class_device_remove_file(&dev->ibdev.class_dev,
+					 iwch_class_attributes[i]);
+	ib_unregister_device(&dev->ibdev);
+	return;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_PROVIDER_H__
+#define __IWCH_PROVIDER_H__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <rdma/ib_verbs.h>
+#include <asm/types.h>
+#include "t3cdev.h"
+#include "iwch.h"
+#include "cxio_wr.h"
+#include "cxio_hal.h"
+
+struct iwch_pd {
+	struct ib_pd ibpd;
+	u32 pdid;
+	struct iwch_dev *rhp;
+};
+
+static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct iwch_pd, ibpd);
+}
+
+struct tpt_attributes {
+	u32 stag;
+	u32 state:1;
+	u32 type:2;
+	u32 rsvd:1;
+	enum tpt_mem_perm perms;
+	u32 remote_invaliate_disable:1;
+	u32 zbva:1;
+	u32 mw_bind_enable:1;
+	u32 page_size:5;
+
+	u32 pdid;
+	u32 qpid;
+	u32 pbl_addr;
+	u32 len;
+	u64 va_fbo;
+	u32 pbl_size;
+};
+
+struct iwch_mr {
+	struct ib_mr ibmr;
+	struct iwch_dev *rhp;
+	u64 kva;
+	struct tpt_attributes attr;
+};
+
+typedef struct iwch_mw iwch_mw_handle;
+
+static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct iwch_mr, ibmr);
+}
+
+struct iwch_mw {
+	struct ib_mw ibmw;
+	struct iwch_dev *rhp;
+	u64 kva;
+	struct tpt_attributes attr;
+};
+
+static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw)
+{
+	return container_of(ibmw, struct iwch_mw, ibmw);
+}
+
+struct iwch_cq {
+	struct ib_cq ibcq;
+	struct iwch_dev *rhp;
+	struct t3_cq cq;
+	spinlock_t lock;
+	atomic_t refcnt;
+	wait_queue_head_t wait;
+	u32 *user_rptr_addr;
+};
+
+static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct iwch_cq, ibcq);
+}
+
+enum IWCH_QP_FLAGS {
+	QP_QUIESCED = 0x01
+};
+
+struct iwch_mpa_attributes {
+	u8 recv_marker_enabled;
+	u8 xmit_marker_enabled;	/* iWARP: enable inbound Read Resp. */
+	u8 crc_enabled;
+	u8 version;	/* 0 or 1 */
+};
+
+struct iwch_qp_attributes {
+	u32 scq;
+	u32 rcq;
+	u32 sq_num_entries;
+	u32 rq_num_entries;
+	u32 sq_max_sges;
+	u32 sq_max_sges_rdma_write;
+	u32 rq_max_sges;
+	u32 state;
+	u8 enable_rdma_read;
+	u8 enable_rdma_write;	/* enable inbound Read Resp. */
+	u8 enable_bind;
+	u8 enable_mmid0_fastreg;	/* Enable STAG0 + Fast-register */
+	/*
+	 * Next QP state. If specify the current state, only the
+	 * QP attributes will be modified.
+	 */
+	u32 max_ord;
+	u32 max_ird;
+	u32 pd;	/* IN */
+	u32 next_state;
+	char terminate_buffer[52];
+	u32 terminate_msg_len;
+	u8 is_terminate_local;
+	struct iwch_mpa_attributes mpa_attr;	/* IN-OUT */
+	struct iwch_ep *llp_stream_handle;
+	char *stream_msg_buf;	/* Last stream msg. before Idle -> RTS */
+	u32 stream_msg_buf_len;	/* Only on Idle -> RTS */
+};
+
+struct iwch_qp {
+	struct ib_qp ibqp;
+	struct iwch_dev *rhp;
+	struct iwch_ep *ep;
+	struct iwch_qp_attributes attr;
+	struct t3_wq wq;
+	spinlock_t lock;
+	atomic_t refcnt;
+	wait_queue_head_t wait;
+	enum IWCH_QP_FLAGS flags;
+	struct timer_list timer;
+};
+
+static inline int qp_quiesced(struct iwch_qp *qhp)
+{
+	return (qhp->flags & QP_QUIESCED);
+}
+
+static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct iwch_qp, ibqp);
+}
+
+void iwch_qp_add_ref(struct ib_qp *qp);
+void iwch_qp_rem_ref(struct ib_qp *qp);
+struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn);
+
+struct iwch_ucontext {
+	struct ib_ucontext ibucontext;
+	struct cxio_ucontext uctx;
+	u32 key;
+	spinlock_t mmap_lock;
+	struct list_head mmaps;
+};
+
+static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c)
+{
+	return container_of(c, struct iwch_ucontext, ibucontext);
+}
+
+struct iwch_mm_entry {
+	struct list_head entry;
+	u64 addr;
+	u32 key;
+	unsigned len;
+};
+
+static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
+						u32 key, unsigned len)
+{
+	struct list_head *pos, *nxt;
+	struct iwch_mm_entry *mm;
+
+	spin_lock_irq(&ucontext->mmap_lock);
+	list_for_each_safe(pos, nxt, &ucontext->mmaps) {
+		
+		mm = list_entry(pos, struct iwch_mm_entry, entry);
+		if (mm->key == key && mm->len == len) {
+			list_del_init(&mm->entry);
+			spin_unlock_irq(&ucontext->mmap_lock);
+			PDBG("%s addr 0x%llx key 0x%x len %d\n", 
+			     __FUNCTION__, mm->addr, mm->key, mm->len);
+			return mm;
+		}
+	}
+	spin_unlock_irq(&ucontext->mmap_lock);
+	return NULL;
+}
+
+static inline void insert_mmap(struct iwch_ucontext *ucontext,
+			       struct iwch_mm_entry *mm)
+{
+	spin_lock_irq(&ucontext->mmap_lock);
+	PDBG("%s addr 0x%llx key 0x%x len %d\n", 
+	     __FUNCTION__, mm->addr, mm->key, mm->len);
+	list_add_tail(&mm->entry, &ucontext->mmaps);
+	spin_unlock_irq(&ucontext->mmap_lock);
+}
+
+enum iwch_qp_attr_mask {
+	IWCH_QP_ATTR_NEXT_STATE = 1 << 0,
+	IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
+	IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
+	IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
+	IWCH_QP_ATTR_MAX_ORD = 1 << 11,
+	IWCH_QP_ATTR_MAX_IRD = 1 << 12,
+	IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
+	IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
+	IWCH_QP_ATTR_MPA_ATTR = 1 << 24,
+	IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
+	IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ |
+				     IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
+				     IWCH_QP_ATTR_MAX_ORD |
+				     IWCH_QP_ATTR_MAX_IRD |
+				     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
+				     IWCH_QP_ATTR_STREAM_MSG_BUFFER |
+				     IWCH_QP_ATTR_MPA_ATTR |
+				     IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE)
+};
+
+int iwch_modify_qp(struct iwch_dev *rhp,
+				struct iwch_qp *qhp,
+				enum iwch_qp_attr_mask mask,
+				struct iwch_qp_attributes *attrs,
+				int internal);
+
+enum iwch_qp_state {
+	IWCH_QP_STATE_IDLE,
+	IWCH_QP_STATE_RTS,
+	IWCH_QP_STATE_ERROR,
+	IWCH_QP_STATE_TERMINATE,
+	IWCH_QP_STATE_CLOSING,
+	IWCH_QP_STATE_TOT
+};
+
+static inline int iwch_convert_state(enum ib_qp_state ib_state)
+{
+	switch (ib_state) {
+	case IB_QPS_RESET:
+	case IB_QPS_INIT:
+		return IWCH_QP_STATE_IDLE;
+	case IB_QPS_RTS:
+		return IWCH_QP_STATE_RTS;
+	case IB_QPS_SQD:
+		return IWCH_QP_STATE_CLOSING;
+	case IB_QPS_SQE:
+		return IWCH_QP_STATE_TERMINATE;
+	case IB_QPS_ERR:
+		return IWCH_QP_STATE_ERROR;
+	default:
+		return -1;
+	}
+}
+
+static inline u32 iwch_ib_to_tpt_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
+	       (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
+	       (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
+	       TPT_LOCAL_READ;
+}
+
+static inline u32 iwch_ib_to_mwbind_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) |
+	       (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) |
+	       (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) |
+	       T3_MEM_ACCESS_LOCAL_READ;
+}
+
+enum iwch_mmid_state {
+	IWCH_STAG_STATE_VALID,
+	IWCH_STAG_STATE_INVALID
+};
+
+enum iwch_qp_query_flags {
+	IWCH_QP_QUERY_CONTEXT_NONE = 0x0,	/* No ctx; Only attrs */
+	IWCH_QP_QUERY_CONTEXT_GET = 0x1,	/* Get ctx + attrs */
+	IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2,	/* Not Supported */
+
+	/*
+	 * Quiesce QP context; Consumer
+	 * will NOT replay outstanding WR
+	 */
+	IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4,
+	IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8,
+	IWCH_QP_QUERY_TEST_USERWRITE = 0x32	/* Test special */
+};
+
+int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		      struct ib_send_wr **bad_wr);
+int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr);
+int iwch_bind_mw(struct ib_qp *qp,
+			     struct ib_mw *mw,
+			     struct ib_mw_bind *mw_bind);
+int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
+int iwch_register_device(struct iwch_dev *dev);
+void iwch_unregister_device(struct iwch_dev *dev);
+int iwch_quiesce_qps(struct iwch_cq *chp);
+int iwch_resume_qps(struct iwch_cq *chp);
+void stop_read_rep_timer(struct iwch_qp *qhp);
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+					struct iwch_mr *mhp,
+					int shift,
+					__be64 *page_list);
+int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+					struct iwch_mr *mhp,
+					int shift,
+					__be64 *page_list,
+					int npages);
+int build_phys_page_list(struct ib_phys_buf *buffer_list,
+					int num_phys_buf,
+					u64 *iova_start,
+					u64 *total_size,
+					int *npages,
+					int *shift,
+					__be64 **page_list);
+
+
+#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
+
+#endif
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "iwch_provider.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+#include "cxio_hal.h"
+
+#define NO_SUPPORT -1
+
+static inline int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
+				       u8 * flit_cnt)
+{
+	int i;
+	u32 plen;
+
+	switch (wr->opcode) {
+	case IB_WR_SEND:
+	case IB_WR_SEND_WITH_IMM:
+		if (wr->send_flags & IB_SEND_SOLICITED)
+			wqe->send.rdmaop = T3_SEND_WITH_SE;
+		else
+			wqe->send.rdmaop = T3_SEND;
+		wqe->send.rem_stag = 0;
+		break;
+#if 0				/* Not currently supported */
+	case TYPE_SEND_INVALIDATE:
+	case TYPE_SEND_INVALIDATE_IMMEDIATE:
+		wqe->send.rdmaop = T3_SEND_WITH_INV;
+		wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+		break;
+	case TYPE_SEND_SE_INVALIDATE:
+		wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
+		wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+		break;
+#endif
+	default:
+		break;
+	}
+	if (wr->num_sge > T3_MAX_SGE)
+		return -EINVAL;
+	wqe->send.reserved[0] = 0;
+	wqe->send.reserved[1] = 0;
+	wqe->send.reserved[2] = 0;
+	if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+		plen = 4;
+		wqe->send.sgl[0].stag = wr->imm_data;
+		wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
+		wqe->send.num_sgle = __constant_cpu_to_be32(0);
+		*flit_cnt = 5;
+	} else {
+		plen = 0;
+		for (i = 0; i < wr->num_sge; i++) {
+			if ((plen + wr->sg_list[i].length) < plen) {
+				return -EMSGSIZE;
+			}
+			plen += wr->sg_list[i].length;
+			wqe->send.sgl[i].stag =
+			    cpu_to_be32(wr->sg_list[i].lkey);
+			wqe->send.sgl[i].len =
+			    cpu_to_be32(wr->sg_list[i].length);
+			wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
+		}
+		wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
+		*flit_cnt = 4 + ((wr->num_sge) << 1);
+	}
+	wqe->send.plen = cpu_to_be32(plen);
+	return 0;
+}
+
+static inline int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
+					u8 *flit_cnt)
+{
+	int i;
+	u32 plen;
+	if (wr->num_sge > T3_MAX_SGE)
+		return -EINVAL;
+	wqe->write.rdmaop = T3_RDMA_WRITE;
+	wqe->write.reserved[0] = 0;
+	wqe->write.reserved[1] = 0;
+	wqe->write.reserved[2] = 0;
+	wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
+	wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+
+	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+		plen = 4;
+		wqe->write.sgl[0].stag = wr->imm_data;
+		wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
+		wqe->write.num_sgle = __constant_cpu_to_be32(0);
+		*flit_cnt = 6;
+	} else {
+		plen = 0;
+		for (i = 0; i < wr->num_sge; i++) {
+			if ((plen + wr->sg_list[i].length) < plen) {
+				return -EMSGSIZE;
+			}
+			plen += wr->sg_list[i].length;
+			wqe->write.sgl[i].stag =
+			    cpu_to_be32(wr->sg_list[i].lkey);
+			wqe->write.sgl[i].len =
+			    cpu_to_be32(wr->sg_list[i].length);
+			wqe->write.sgl[i].to =
+			    cpu_to_be64(wr->sg_list[i].addr);
+		}
+		wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
+		*flit_cnt = 5 + ((wr->num_sge) << 1);
+	}
+	wqe->write.plen = cpu_to_be32(plen);
+	return 0;
+}
+
+static inline int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
+				       u8 *flit_cnt)
+{
+	if (wr->num_sge > 1)
+		return -EINVAL;
+	wqe->read.rdmaop = T3_READ_REQ;
+	wqe->read.reserved[0] = 0;
+	wqe->read.reserved[1] = 0;
+	wqe->read.reserved[2] = 0;
+	wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+	wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
+	wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
+	wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
+	wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
+	*flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+	return 0;
+}
+
+/*
+ * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
+ */
+static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp,
+				   struct ib_sge *sg_list, u32 num_sgle,
+				   u32 * pbl_addr, u8 * page_size)
+{
+	int i;
+	struct iwch_mr *mhp;
+	u32 offset;
+	for (i = 0; i < num_sgle; i++) {
+
+		mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
+		if (!mhp) {
+			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			return -EIO;
+		}
+		if (!mhp->attr.state) {
+			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			return -EIO;
+		}
+		if (mhp->attr.zbva) {
+			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			return -EIO;
+		}
+
+		if (sg_list[i].addr < mhp->attr.va_fbo) {
+			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			return -EINVAL;
+		}
+		if (sg_list[i].addr + ((u64) sg_list[i].length) <
+		    sg_list[i].addr) {
+			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			return -EINVAL;
+		}
+		if (sg_list[i].addr + ((u64) sg_list[i].length) >
+		    mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
+			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			return -EINVAL;
+		}
+		offset = sg_list[i].addr - mhp->attr.va_fbo;
+		offset += ((u32) mhp->attr.va_fbo) %
+		          (1UL << (12 + mhp->attr.page_size));
+		pbl_addr[i] = ((mhp->attr.pbl_addr -
+			        rhp->rdev.rnic_info.pbl_base) >> 3) +
+			      (offset >> (12 + mhp->attr.page_size));
+		page_size[i] = mhp->attr.page_size;
+	}
+	return 0;
+}
+
+static inline int iwch_build_rdma_recv(struct iwch_dev *rhp,
+						    union t3_wr *wqe,
+						    struct ib_recv_wr *wr)
+{
+	int i, err = 0;
+	u32 pbl_addr[4];
+	u8 page_size[4];
+	if (wr->num_sge > T3_MAX_SGE)
+		return -EINVAL;
+	err = iwch_sgl2pbl_map(rhp, wr->sg_list, wr->num_sge, pbl_addr,
+			       page_size);
+	if (err)
+		return err;
+	wqe->recv.pagesz[0] = page_size[0];
+	wqe->recv.pagesz[1] = page_size[1];
+	wqe->recv.pagesz[2] = page_size[2];
+	wqe->recv.pagesz[3] = page_size[3];
+	wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
+	for (i = 0; i < wr->num_sge; i++) {
+		wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
+		wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+		
+		/* to in the WQE == the offset into the page */
+		wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
+				(1UL << (12 + page_size[i])));
+
+		/* pbl_addr is the adapters address in the PBL */
+		wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
+	}
+	for (; i < T3_MAX_SGE; i++) {
+		wqe->recv.sgl[i].stag = 0;
+		wqe->recv.sgl[i].len = 0;
+		wqe->recv.sgl[i].to = 0;
+		wqe->recv.pbl_addr[i] = 0;
+	}
+	return 0;
+}
+
+int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		      struct ib_send_wr **bad_wr)
+{
+	int err = 0;
+	u8 t3_wr_flit_cnt;
+	enum t3_wr_opcode t3_wr_opcode = 0;
+	enum t3_wr_flags t3_wr_flags;
+	struct iwch_qp *qhp;
+	u32 idx;
+	union t3_wr *wqe;
+	u32 num_wrs;
+	unsigned long flag;
+	struct t3_swsq *sqp;
+
+	qhp = to_iwch_qp(ibqp);
+	spin_lock_irqsave(&qhp->lock, flag);
+	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -EINVAL;
+	}
+	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
+		  qhp->wq.sq_size_log2);
+	if (num_wrs <= 0) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -ENOMEM;
+	}
+	while (wr) {
+		if (num_wrs == 0) {
+			err = -ENOMEM;
+			*bad_wr = wr;
+			break;
+		}
+		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+		wqe = (union t3_wr *) (qhp->wq.queue + idx);
+		t3_wr_flags = 0;
+		if (wr->send_flags & IB_SEND_SOLICITED)
+			t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
+		if (wr->send_flags & IB_SEND_FENCE)
+			t3_wr_flags |= T3_READ_FENCE_FLAG;
+		if (wr->send_flags & IB_SEND_SIGNALED)
+			t3_wr_flags |= T3_COMPLETION_FLAG;
+		sqp = qhp->wq.sq +
+		      Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
+		switch (wr->opcode) {
+		case IB_WR_SEND:
+		case IB_WR_SEND_WITH_IMM:
+			t3_wr_opcode = T3_WR_SEND;
+			err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
+			break;
+		case IB_WR_RDMA_WRITE:
+		case IB_WR_RDMA_WRITE_WITH_IMM:
+			t3_wr_opcode = T3_WR_WRITE;
+			err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
+			break;
+		case IB_WR_RDMA_READ:
+			t3_wr_opcode = T3_WR_READ;
+			t3_wr_flags = 0; /* T3 reads are always signaled */
+			err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
+			if (err)
+				break;
+			sqp->read_len = wqe->read.local_len;
+			if (!qhp->wq.oldest_read)
+				qhp->wq.oldest_read = sqp;
+			break;
+		default:
+			PDBG("%s post of type=%d TBD!\n", __FUNCTION__,
+			     wr->opcode);
+			err = -EINVAL;
+		}
+		if (err) {
+			*bad_wr = wr;
+			break;
+		}
+		wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+		sqp->wr_id = wr->wr_id;
+		sqp->opcode = wr2opcode(t3_wr_opcode);
+		sqp->sq_wptr = qhp->wq.sq_wptr;
+		sqp->complete = 0;
+		sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+
+		build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
+			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+			       0, t3_wr_flit_cnt);
+		PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
+		     __FUNCTION__, wr->wr_id, idx,
+		     Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
+		     sqp->opcode);
+		wr = wr->next;
+		num_wrs--;
+		++(qhp->wq.wptr);
+		++(qhp->wq.sq_wptr);
+	}
+	spin_unlock_irqrestore(&qhp->lock, flag);
+	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+	return err;
+}
+
+int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr)
+{
+	int err = 0;
+	struct iwch_qp *qhp;
+	u32 idx;
+	union t3_wr *wqe;
+	u32 num_wrs;
+	unsigned long flag;
+
+	qhp = to_iwch_qp(ibqp);
+	spin_lock_irqsave(&qhp->lock, flag);
+	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -EINVAL;
+	}
+	num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
+			    qhp->wq.rq_size_log2) - 1;
+	if (!wr) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -EINVAL;
+	}
+	while (wr) {
+		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+		wqe = (union t3_wr *) (qhp->wq.queue + idx);
+		if (num_wrs)
+			err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
+		else
+			err = -ENOMEM;
+		if (err) {
+			*bad_wr = wr;
+			break;
+		}
+		qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
+			wr->wr_id;
+		build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
+			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+			       0, sizeof(struct t3_receive_wr) >> 3);
+		PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
+		     "wqe %p \n", __FUNCTION__, wr->wr_id, idx,
+		     qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
+		++(qhp->wq.rq_wptr);
+		++(qhp->wq.wptr);
+		wr = wr->next;
+		num_wrs--;
+	}
+	spin_unlock_irqrestore(&qhp->lock, flag);
+	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+	return err;
+}
+
+int iwch_bind_mw(struct ib_qp *qp,
+			     struct ib_mw *mw,
+			     struct ib_mw_bind *mw_bind)
+{
+	struct iwch_dev *rhp;
+	struct iwch_mw *mhp;
+	struct iwch_qp *qhp;
+	union t3_wr *wqe;
+	u32 pbl_addr;
+	u8 page_size;
+	u32 num_wrs;
+	unsigned long flag;
+	struct ib_sge sgl;
+	int err=0;
+	enum t3_wr_flags t3_wr_flags;
+	u32 idx;
+	struct t3_swsq *sqp;
+
+	qhp = to_iwch_qp(qp);
+	mhp = to_iwch_mw(mw);
+	rhp = qhp->rhp;
+
+	spin_lock_irqsave(&qhp->lock, flag);
+	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -EINVAL;
+	}
+	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
+			    qhp->wq.sq_size_log2);
+	if ((num_wrs) <= 0) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return -ENOMEM;
+	}
+	idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+	PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __FUNCTION__, idx,
+	     mw, mw_bind);
+	wqe = (union t3_wr *) (qhp->wq.queue + idx);
+
+	t3_wr_flags = 0;
+	if (mw_bind->send_flags & IB_SEND_SIGNALED)
+		t3_wr_flags = T3_COMPLETION_FLAG;
+
+        sgl.addr = mw_bind->addr;
+        sgl.lkey = mw_bind->mr->lkey;
+        sgl.length = mw_bind->length;
+        wqe->bind.reserved = 0;
+        wqe->bind.type = T3_VA_BASED_TO;
+
+        /* TBD: check perms */
+        wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
+        wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
+        wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
+        wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
+        wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
+        err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
+        if (err) {
+		spin_unlock_irqrestore(&qhp->lock, flag);
+                return err;
+	}
+	wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+	sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
+	sqp->wr_id = mw_bind->wr_id;
+	sqp->opcode = T3_BIND_MW;
+	sqp->sq_wptr = qhp->wq.sq_wptr;
+	sqp->complete = 0;
+	sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
+        wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
+        wqe->bind.mr_pagesz = page_size;
+	wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
+	build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
+		       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
+			        sizeof(struct t3_bind_mw_wr) >> 3);
+	++(qhp->wq.wptr);
+	++(qhp->wq.sq_wptr);
+	spin_unlock_irqrestore(&qhp->lock, flag);
+
+	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+	return err;
+}
+
+static inline void build_term_codes(struct respQ_msg_t *rsp_msg, 
+				    u8 *layer_type, u8 *ecode)
+{
+	int status = TPT_ERR_INTERNAL_ERR;
+	int tagged = 0;
+	int opcode = -1;
+	int rqtype = 0;
+	int send_inv = 0;
+
+	if (rsp_msg) {
+		status = CQE_STATUS(rsp_msg->cqe);
+		opcode = CQE_OPCODE(rsp_msg->cqe);
+		rqtype = RQ_TYPE(rsp_msg->cqe);
+		send_inv = (opcode == T3_SEND_WITH_INV) || 
+		           (opcode == T3_SEND_WITH_SE_INV);
+		tagged = (opcode == T3_RDMA_WRITE) || 
+		    	 (rqtype && (opcode == T3_READ_RESP));
+	} 
+
+	switch (status) {
+	case TPT_ERR_STAG:
+		if (send_inv) {
+			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+			*ecode = RDMAP_CANT_INV_STAG;
+		} else {
+			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+			*ecode = RDMAP_INV_STAG;
+		}
+		break;
+	case TPT_ERR_PDID:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+		if ((opcode == T3_SEND_WITH_INV) ||
+		    (opcode == T3_SEND_WITH_SE_INV))
+			*ecode = RDMAP_CANT_INV_STAG;
+		else 
+			*ecode = RDMAP_STAG_NOT_ASSOC;
+		break;
+	case TPT_ERR_QPID:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+		*ecode = RDMAP_STAG_NOT_ASSOC;
+		break;
+	case TPT_ERR_ACCESS:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+		*ecode = RDMAP_ACC_VIOL;
+		break;
+	case TPT_ERR_WRAP:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+		*ecode = RDMAP_TO_WRAP;
+		break;
+	case TPT_ERR_BOUND:
+		if (tagged) {
+			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+			*ecode = DDPT_BASE_BOUNDS;
+		} else {
+			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+			*ecode = RDMAP_BASE_BOUNDS;
+		}
+		break;
+	case TPT_ERR_INVALIDATE_SHARED_MR:
+	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+		*ecode = RDMAP_CANT_INV_STAG;
+		break;
+	case TPT_ERR_ECC:
+	case TPT_ERR_ECC_PSTAG:
+	case TPT_ERR_INTERNAL_ERR:
+		*layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
+		*ecode = 0;
+		break;
+	case TPT_ERR_OUT_OF_RQE:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_INV_MSN_NOBUF;
+		break;
+	case TPT_ERR_PBL_ADDR_BOUND:
+		*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+		*ecode = DDPT_BASE_BOUNDS;
+		break;
+	case TPT_ERR_CRC:
+		*layer_type = LAYER_MPA|DDP_LLP;
+		*ecode = MPA_CRC_ERR;
+		break;
+	case TPT_ERR_MARKER:
+		*layer_type = LAYER_MPA|DDP_LLP;
+		*ecode = MPA_MARKER_ERR;
+		break;
+	case TPT_ERR_PDU_LEN_ERR:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_MSG_TOOBIG;
+		break;
+	case TPT_ERR_DDP_VERSION:
+		if (tagged) {
+			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+			*ecode = DDPT_INV_VERS;
+		} else {
+			*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+			*ecode = DDPU_INV_VERS;
+		}
+		break;
+	case TPT_ERR_RDMA_VERSION:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+		*ecode = RDMAP_INV_VERS;
+		break;
+	case TPT_ERR_OPCODE:
+		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+		*ecode = RDMAP_INV_OPCODE;
+		break;
+	case TPT_ERR_DDP_QUEUE_NUM:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_INV_QN;
+		break;
+	case TPT_ERR_MSN:
+	case TPT_ERR_MSN_GAP:
+	case TPT_ERR_MSN_RANGE:
+	case TPT_ERR_IRD_OVERFLOW:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_INV_MSN_RANGE;
+		break;
+	case TPT_ERR_TBIT:
+		*layer_type = LAYER_DDP|DDP_LOCAL_CATA;
+		*ecode = 0;
+		break;
+	case TPT_ERR_MO:
+		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+		*ecode = DDPU_INV_MO;
+		break;
+	default:
+		*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
+		*ecode = 0;
+		break;
+	}
+}
+
+/*
+ * This posts a TERMINATE with layer=RDMA, type=catastrophic.
+ */
+int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
+{
+	union t3_wr *wqe;
+	struct terminate_message *term;
+	struct sk_buff *skb;
+
+	PDBG("%s %d\n", __FUNCTION__, __LINE__);
+	skb = alloc_skb(40, GFP_ATOMIC);
+	if (!skb) {
+		printk(KERN_ERR "%s cannot send TERMINATE!\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	wqe = (union t3_wr *)skb_put(skb, 40);
+	memset(wqe, 0, 40);
+	wqe->send.rdmaop = T3_TERMINATE;
+	
+	/* immediate data length */
+	wqe->send.plen = htonl(4);
+
+	/* immediate data starts here. */
+	term = (struct terminate_message *)wqe->send.sgl;
+	build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
+	wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
+			 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
+	wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
+	skb->priority = CPL_PRIORITY_DATA;
+	return (cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb));
+}
+
+/*
+ * Assumes qhp lock is held.
+ */
+static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+{
+	struct iwch_cq *rchp, *schp;
+	int count;
+
+	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
+	schp = get_chp(qhp->rhp, qhp->attr.scq);
+	
+	PDBG("%s qhp %p rchp %p schp %p\n", __FUNCTION__, qhp, rchp, schp);
+	/* take a ref on the qhp since we must release the lock */
+	atomic_inc(&qhp->refcnt);
+	spin_unlock_irqrestore(&qhp->lock, *flag);
+
+	/* locking heirarchy: cq lock first, then qp lock. */
+	spin_lock_irqsave(&rchp->lock, *flag);
+	spin_lock(&qhp->lock);
+	cxio_flush_hw_cq(&rchp->cq);
+	cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
+	cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+	spin_unlock(&qhp->lock);
+	spin_unlock_irqrestore(&rchp->lock, *flag);
+
+	/* locking heirarchy: cq lock first, then qp lock. */
+	spin_lock_irqsave(&schp->lock, *flag);
+	spin_lock(&qhp->lock);
+	cxio_flush_hw_cq(&schp->cq);
+	cxio_count_scqes(&schp->cq, &qhp->wq, &count);
+	cxio_flush_sq(&qhp->wq, &schp->cq, count);
+	spin_unlock(&qhp->lock);
+	spin_unlock_irqrestore(&schp->lock, *flag);
+
+	/* deref */
+	if (atomic_dec_and_test(&qhp->refcnt))
+                wake_up(&qhp->wait);
+
+	spin_lock_irqsave(&qhp->lock, *flag);
+}
+
+static inline void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+{
+	if (t3b_device(qhp->rhp))
+		cxio_set_wq_in_error(&qhp->wq);
+	else
+		__flush_qp(qhp, flag);
+}
+
+
+/*
+ * Return non zero if at least one RECV was pre-posted.
+ */
+static inline int rqes_posted(struct iwch_qp *qhp)
+{
+	return (fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV);
+}
+
+static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
+				enum iwch_qp_attr_mask mask,
+				struct iwch_qp_attributes *attrs)
+{
+	struct t3_rdma_init_attr init_attr;
+	int ret;
+
+	init_attr.tid = qhp->ep->hwtid;
+	init_attr.qpid = qhp->wq.qpid;
+	init_attr.pdid = qhp->attr.pd;
+	init_attr.scqid = qhp->attr.scq;
+	init_attr.rcqid = qhp->attr.rcq;
+	init_attr.rq_addr = qhp->wq.rq_addr;
+	init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
+	init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
+		qhp->attr.mpa_attr.recv_marker_enabled |
+		(qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
+		(qhp->attr.mpa_attr.crc_enabled << 2);
+
+	/*
+	 * XXX - The IWCM doesn't quite handle getting these
+ 	 * attrs set before going into RTS.  For now, just turn
+	 * them on always...
+	 */
+#if 0
+	init_attr.qpcaps = qhp->attr.enableRdmaRead |
+		(qhp->attr.enableRdmaWrite << 1) |
+		(qhp->attr.enableBind << 2) |
+		(qhp->attr.enable_stag0_fastreg << 3) |
+		(qhp->attr.enable_stag0_fastreg << 4);
+#else
+	init_attr.qpcaps = 0x1f;
+#endif
+	init_attr.tcp_emss = qhp->ep->emss;
+	init_attr.ord = qhp->attr.max_ord;
+	init_attr.ird = qhp->attr.max_ird;
+	init_attr.qp_dma_addr = qhp->wq.dma_addr;
+	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
+	init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+	init_attr.irs = qhp->ep->rcv_seq;
+	PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
+	     "flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
+	     init_attr.rq_addr, init_attr.rq_size,
+	     init_attr.flags, init_attr.qpcaps);
+	ret = cxio_rdma_init(&rhp->rdev, &init_attr);
+	PDBG("%s ret %d\n", __FUNCTION__, ret);
+	return ret;
+}
+
+int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
+				enum iwch_qp_attr_mask mask,
+				struct iwch_qp_attributes *attrs,
+				int internal)
+{
+	int ret = 0;
+	struct iwch_qp_attributes newattr = qhp->attr;
+	unsigned long flag;
+	int disconnect = 0;
+	int terminate = 0;
+	int abort = 0;
+	int free = 0;
+	struct iwch_ep *ep = NULL;
+
+	PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __FUNCTION__,
+	     qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
+	     (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+
+	spin_lock_irqsave(&qhp->lock, flag);
+
+	/* Process attr changes if in IDLE */
+	if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
+		if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
+			ret = -EIO;
+			goto out;
+		}
+		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
+			newattr.enable_rdma_read = attrs->enable_rdma_read;
+		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
+			newattr.enable_rdma_write = attrs->enable_rdma_write;
+		if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
+			newattr.enable_bind = attrs->enable_bind;
+		if (mask & IWCH_QP_ATTR_MAX_ORD) {
+			if (attrs->max_ord >
+			    rhp->attr.max_rdma_read_qp_depth) {
+				ret = -EINVAL;
+				goto out;
+			}
+			newattr.max_ord = attrs->max_ord;
+		}
+		if (mask & IWCH_QP_ATTR_MAX_IRD) {
+			if (attrs->max_ird >
+		  	    rhp->attr.max_rdma_reads_per_qp) {
+				ret = -EINVAL;
+				goto out;
+			}
+			newattr.max_ird = attrs->max_ird;
+		}
+		qhp->attr = newattr;
+	}
+	
+	if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
+		goto out;
+	if (qhp->attr.state == attrs->next_state)
+		goto out;
+
+	switch (qhp->attr.state) {
+	case IWCH_QP_STATE_IDLE:
+		switch (attrs->next_state) {
+		case IWCH_QP_STATE_RTS:
+			if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			qhp->attr.mpa_attr = attrs->mpa_attr;
+			qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
+			qhp->ep = qhp->attr.llp_stream_handle;
+			qhp->attr.state = IWCH_QP_STATE_RTS;
+
+			/*
+			 * Ref the endpoint here and deref when we
+	 		 * disassociate the endpoint from the QP.  This
+			 * happens in CLOSING->IDLE transition or *->ERROR
+			 * transition.
+			 */
+			get_ep(&qhp->ep->com);
+			spin_unlock_irqrestore(&qhp->lock, flag);
+			ret = rdma_init(rhp, qhp, mask, attrs);
+			spin_lock_irqsave(&qhp->lock, flag);
+			if (ret)
+				goto err;
+			break;
+		case IWCH_QP_STATE_ERROR:
+			qhp->attr.state = IWCH_QP_STATE_ERROR;
+			flush_qp(qhp, &flag);
+			break;
+		default:
+			ret = -EINVAL;	
+			goto out;
+		}
+		break;
+	case IWCH_QP_STATE_RTS:
+		switch (attrs->next_state) {
+		case IWCH_QP_STATE_CLOSING:
+			BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+			qhp->attr.state = IWCH_QP_STATE_CLOSING;
+			if (!internal) {
+				abort=0;
+				disconnect = 1;
+				ep = qhp->ep;
+			}
+			break;
+		case IWCH_QP_STATE_TERMINATE:
+			qhp->attr.state = IWCH_QP_STATE_TERMINATE;
+			if (t3b_device(qhp->rhp))
+				cxio_set_wq_in_error(&qhp->wq);
+			if (!internal)
+				terminate = 1;
+			break;
+		case IWCH_QP_STATE_ERROR:
+			qhp->attr.state = IWCH_QP_STATE_ERROR;
+			if (!internal) {
+				abort=1;
+				disconnect = 1;
+				ep = qhp->ep;
+			}
+			goto err;
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+		}
+		break;
+	case IWCH_QP_STATE_CLOSING:
+		if (!internal) {
+			ret = -EINVAL;
+			goto out;
+		}
+		switch (attrs->next_state) {
+			case IWCH_QP_STATE_IDLE:
+				qhp->attr.state = IWCH_QP_STATE_IDLE;
+				qhp->attr.llp_stream_handle = NULL;
+				put_ep(&qhp->ep->com);
+				qhp->ep = NULL;
+				wake_up(&qhp->wait);
+				break;
+			case IWCH_QP_STATE_ERROR:
+				goto err;
+			default:
+				ret = -EINVAL;
+				goto err;
+		}
+		break;
+	case IWCH_QP_STATE_ERROR:
+		if (attrs->next_state != IWCH_QP_STATE_IDLE) {
+			ret = -EINVAL;
+			goto out;
+		}
+		
+		if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
+		    !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
+			ret = -EINVAL;
+			goto out;
+		}
+		qhp->attr.state = IWCH_QP_STATE_IDLE;
+		memset(&qhp->attr, 0, sizeof(qhp->attr));
+		break;
+	case IWCH_QP_STATE_TERMINATE:
+		if (!internal) {
+			ret = -EINVAL;
+			goto out;
+		}
+		goto err;
+		break;
+	default:
+		printk(KERN_ERR "%s in a bad state %d\n",
+		       __FUNCTION__, qhp->attr.state);
+		ret = -EINVAL;
+		goto err;
+		break;
+	}
+	goto out;
+err:
+	PDBG("%s disassociating ep %p qpid 0x%x\n", __FUNCTION__, qhp->ep,
+	     qhp->wq.qpid);
+
+	/* disassociate the LLP connection */
+	qhp->attr.llp_stream_handle = NULL;
+	ep = qhp->ep;
+	qhp->ep = NULL;
+	qhp->attr.state = IWCH_QP_STATE_ERROR;
+	free=1;
+	wake_up(&qhp->wait);
+	BUG_ON(!ep);
+	flush_qp(qhp, &flag);
+out:
+	spin_unlock_irqrestore(&qhp->lock, flag);
+
+	if (terminate)
+		iwch_post_terminate(qhp, NULL);
+
+	/*
+	 * If disconnect is 1, then we need to initiate a disconnect
+	 * on the EP.  This can be a normal close (RTS->CLOSING) or
+	 * an abnormal close (RTS/CLOSING->ERROR).
+	 */
+	if (disconnect)
+		iwch_ep_disconnect(ep, abort, GFP_KERNEL);
+
+	/*
+	 * If free is 1, then we've disassociated the EP from the QP
+	 * and we need to dereference the EP.
+	 */
+	if (free)
+		put_ep(&ep->com);
+
+	PDBG("%s exit state %d\n", __FUNCTION__, qhp->attr.state);
+	return ret;
+}
+
+static int quiesce_qp(struct iwch_qp *qhp)
+{
+	spin_lock_irq(&qhp->lock);
+	iwch_quiesce_tid(qhp->ep);
+	qhp->flags |= QP_QUIESCED;
+	spin_unlock_irq(&qhp->lock);
+	return 0;
+}
+
+static int resume_qp(struct iwch_qp *qhp)
+{
+	spin_lock_irq(&qhp->lock);
+	iwch_resume_tid(qhp->ep);
+	qhp->flags &= ~QP_QUIESCED;
+	spin_unlock_irq(&qhp->lock);
+	return 0;
+}
+
+int iwch_quiesce_qps(struct iwch_cq *chp)
+{
+	int i;
+	struct iwch_qp *qhp;
+
+	for (i=0; i < T3_MAX_NUM_QP; i++) {
+		qhp = get_qhp(chp->rhp, i);
+		if (!qhp)
+			continue;
+		if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
+			quiesce_qp(qhp);
+			continue;
+		}
+		if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
+			quiesce_qp(qhp);
+	}
+	return 0;
+}
+
+int iwch_resume_qps(struct iwch_cq *chp)
+{
+	int i;
+	struct iwch_qp *qhp;
+
+	for (i=0; i < T3_MAX_NUM_QP; i++) {
+		qhp = get_qhp(chp->rhp, i);
+		if (!qhp)
+			continue;
+		if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
+			resume_qp(qhp);
+			continue;
+		}
+		if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
+			resume_qp(qhp);
+	}
+	return 0;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/iwch_user.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_USER_H__
+#define __IWCH_USER_H__
+
+#define IWCH_UVERBS_ABI_VERSION	1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+struct iwch_create_cq_req {
+	__u64 user_rptr_addr;
+};
+
+struct iwch_create_cq_resp {
+	__u64 key;		
+	__u32 cqid;
+	__u32 size_log2;
+};
+
+struct iwch_create_qp_resp {
+	__u64 key;
+	__u64 db_key;	
+	__u32 qpid;
+	__u32 size_log2;
+	__u32 sq_size_log2;
+	__u32 rq_size_log2;
+};
+
+struct iwch_reg_user_mr_resp {
+	__u32 pbl_addr;
+};
+#endif
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/Kconfig
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/Kconfig
@@ -0,0 +1,27 @@
+config INFINIBAND_CXGB3
+	tristate "Chelsio RDMA Driver"
+	depends on CHELSIO_T3 && INFINIBAND
+	select GENERIC_ALLOCATOR
+	---help---
+	  This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
+	  10GbE adapters.
+
+	  For general information about Chelsio and our products, visit
+	  our website at <http://www.chelsio.com>.
+
+	  For customer support, please visit our customer support page at
+	  <http://www.chelsio.com/support.htm>.
+
+	  Please send feedback to <linux-bugs@chelsio.com>.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called iw_cxgb3.
+
+config INFINIBAND_CXGB3_DEBUG
+	bool "Verbose debugging output"
+	depends on INFINIBAND_CXGB3
+	default n
+	---help---
+	  This option causes the Chelsio RDMA driver to produce copious
+	  amounts of debug messages.  Select this if you are developing
+	  the driver or trying to diagnose a problem.
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/Makefile
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/Makefile
@@ -0,0 +1,12 @@
+EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 \
+		-I$(TOPDIR)/drivers/infiniband/hw/cxgb3/core 
+
+obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
+
+iw_cxgb3-y :=  iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
+	       iwch_provider.o iwch.o core/cxio_hal.o core/cxio_resource.o
+
+ifdef CONFIG_INFINIBAND_CXGB3_DEBUG
+EXTRA_CFLAGS += -DDEBUG -g 
+iw_cxgb3-y += core/cxio_dbg.o
+endif
--- linux-2.6.18.noarch/drivers/infiniband/hw/cxgb3/tcb.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/cxgb3/tcb.h
@@ -0,0 +1,632 @@
+/*
+ * Copyright (c) 2007 Chelsio, Inc. All rights reserved.
+ *	
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _TCB_DEFS_H
+#define _TCB_DEFS_H
+
+#define W_TCB_T_STATE    0
+#define S_TCB_T_STATE    0
+#define M_TCB_T_STATE    0xfULL
+#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE)
+
+#define W_TCB_TIMER    0
+#define S_TCB_TIMER    4
+#define M_TCB_TIMER    0x1ULL
+#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER)
+
+#define W_TCB_DACK_TIMER    0
+#define S_TCB_DACK_TIMER    5
+#define M_TCB_DACK_TIMER    0x1ULL
+#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER)
+
+#define W_TCB_DEL_FLAG    0
+#define S_TCB_DEL_FLAG    6
+#define M_TCB_DEL_FLAG    0x1ULL
+#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG)
+
+#define W_TCB_L2T_IX    0
+#define S_TCB_L2T_IX    7
+#define M_TCB_L2T_IX    0x7ffULL
+#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX)
+
+#define W_TCB_SMAC_SEL    0
+#define S_TCB_SMAC_SEL    18
+#define M_TCB_SMAC_SEL    0x3ULL
+#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL)
+
+#define W_TCB_TOS    0
+#define S_TCB_TOS    20
+#define M_TCB_TOS    0x3fULL
+#define V_TCB_TOS(x) ((x) << S_TCB_TOS)
+
+#define W_TCB_MAX_RT    0
+#define S_TCB_MAX_RT    26
+#define M_TCB_MAX_RT    0xfULL
+#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT)
+
+#define W_TCB_T_RXTSHIFT    0
+#define S_TCB_T_RXTSHIFT    30
+#define M_TCB_T_RXTSHIFT    0xfULL
+#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT)
+
+#define W_TCB_T_DUPACKS    1
+#define S_TCB_T_DUPACKS    2
+#define M_TCB_T_DUPACKS    0xfULL
+#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS)
+
+#define W_TCB_T_MAXSEG    1
+#define S_TCB_T_MAXSEG    6
+#define M_TCB_T_MAXSEG    0xfULL
+#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG)
+
+#define W_TCB_T_FLAGS1    1
+#define S_TCB_T_FLAGS1    10
+#define M_TCB_T_FLAGS1    0xffffffffULL
+#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1)
+
+#define W_TCB_T_MIGRATION    1
+#define S_TCB_T_MIGRATION    20
+#define M_TCB_T_MIGRATION    0x1ULL
+#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION)
+
+#define W_TCB_T_FLAGS2    2
+#define S_TCB_T_FLAGS2    10
+#define M_TCB_T_FLAGS2    0x7fULL
+#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2)
+
+#define W_TCB_SND_SCALE    2
+#define S_TCB_SND_SCALE    17
+#define M_TCB_SND_SCALE    0xfULL
+#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE)
+
+#define W_TCB_RCV_SCALE    2
+#define S_TCB_RCV_SCALE    21
+#define M_TCB_RCV_SCALE    0xfULL
+#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE)
+
+#define W_TCB_SND_UNA_RAW    2
+#define S_TCB_SND_UNA_RAW    25
+#define M_TCB_SND_UNA_RAW    0x7ffffffULL
+#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW)
+
+#define W_TCB_SND_NXT_RAW    3
+#define S_TCB_SND_NXT_RAW    20
+#define M_TCB_SND_NXT_RAW    0x7ffffffULL
+#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW)
+
+#define W_TCB_RCV_NXT    4
+#define S_TCB_RCV_NXT    15
+#define M_TCB_RCV_NXT    0xffffffffULL
+#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT)
+
+#define W_TCB_RCV_ADV    5
+#define S_TCB_RCV_ADV    15
+#define M_TCB_RCV_ADV    0xffffULL
+#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV)
+
+#define W_TCB_SND_MAX_RAW    5
+#define S_TCB_SND_MAX_RAW    31
+#define M_TCB_SND_MAX_RAW    0x7ffffffULL
+#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW)
+
+#define W_TCB_SND_CWND    6
+#define S_TCB_SND_CWND    26
+#define M_TCB_SND_CWND    0x7ffffffULL
+#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND)
+
+#define W_TCB_SND_SSTHRESH    7
+#define S_TCB_SND_SSTHRESH    21
+#define M_TCB_SND_SSTHRESH    0x7ffffffULL
+#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH)
+
+#define W_TCB_T_RTT_TS_RECENT_AGE    8
+#define S_TCB_T_RTT_TS_RECENT_AGE    16
+#define M_TCB_T_RTT_TS_RECENT_AGE    0xffffffffULL
+#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE)
+
+#define W_TCB_T_RTSEQ_RECENT    9
+#define S_TCB_T_RTSEQ_RECENT    16
+#define M_TCB_T_RTSEQ_RECENT    0xffffffffULL
+#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT)
+
+#define W_TCB_T_SRTT    10
+#define S_TCB_T_SRTT    16
+#define M_TCB_T_SRTT    0xffffULL
+#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT)
+
+#define W_TCB_T_RTTVAR    11
+#define S_TCB_T_RTTVAR    0
+#define M_TCB_T_RTTVAR    0xffffULL
+#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR)
+
+#define W_TCB_TS_LAST_ACK_SENT_RAW    11
+#define S_TCB_TS_LAST_ACK_SENT_RAW    16
+#define M_TCB_TS_LAST_ACK_SENT_RAW    0x7ffffffULL
+#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW)
+
+#define W_TCB_DIP    12
+#define S_TCB_DIP    11
+#define M_TCB_DIP    0xffffffffULL
+#define V_TCB_DIP(x) ((x) << S_TCB_DIP)
+
+#define W_TCB_SIP    13
+#define S_TCB_SIP    11
+#define M_TCB_SIP    0xffffffffULL
+#define V_TCB_SIP(x) ((x) << S_TCB_SIP)
+
+#define W_TCB_DP    14
+#define S_TCB_DP    11
+#define M_TCB_DP    0xffffULL
+#define V_TCB_DP(x) ((x) << S_TCB_DP)
+
+#define W_TCB_SP    14
+#define S_TCB_SP    27
+#define M_TCB_SP    0xffffULL
+#define V_TCB_SP(x) ((x) << S_TCB_SP)
+
+#define W_TCB_TIMESTAMP    15
+#define S_TCB_TIMESTAMP    11
+#define M_TCB_TIMESTAMP    0xffffffffULL
+#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP)
+
+#define W_TCB_TIMESTAMP_OFFSET    16
+#define S_TCB_TIMESTAMP_OFFSET    11
+#define M_TCB_TIMESTAMP_OFFSET    0xfULL
+#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET)
+
+#define W_TCB_TX_MAX    16
+#define S_TCB_TX_MAX    15
+#define M_TCB_TX_MAX    0xffffffffULL
+#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX)
+
+#define W_TCB_TX_HDR_PTR_RAW    17
+#define S_TCB_TX_HDR_PTR_RAW    15
+#define M_TCB_TX_HDR_PTR_RAW    0x1ffffULL
+#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW)
+
+#define W_TCB_TX_LAST_PTR_RAW    18
+#define S_TCB_TX_LAST_PTR_RAW    0
+#define M_TCB_TX_LAST_PTR_RAW    0x1ffffULL
+#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW)
+
+#define W_TCB_TX_COMPACT    18
+#define S_TCB_TX_COMPACT    17
+#define M_TCB_TX_COMPACT    0x1ULL
+#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT)
+
+#define W_TCB_RX_COMPACT    18
+#define S_TCB_RX_COMPACT    18
+#define M_TCB_RX_COMPACT    0x1ULL
+#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT)
+
+#define W_TCB_RCV_WND    18
+#define S_TCB_RCV_WND    19
+#define M_TCB_RCV_WND    0x7ffffffULL
+#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND)
+
+#define W_TCB_RX_HDR_OFFSET    19
+#define S_TCB_RX_HDR_OFFSET    14
+#define M_TCB_RX_HDR_OFFSET    0x7ffffffULL
+#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET)
+
+#define W_TCB_RX_FRAG0_START_IDX_RAW    20
+#define S_TCB_RX_FRAG0_START_IDX_RAW    9
+#define M_TCB_RX_FRAG0_START_IDX_RAW    0x7ffffffULL
+#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW)
+
+#define W_TCB_RX_FRAG1_START_IDX_OFFSET    21
+#define S_TCB_RX_FRAG1_START_IDX_OFFSET    4
+#define M_TCB_RX_FRAG1_START_IDX_OFFSET    0x7ffffffULL
+#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET)
+
+#define W_TCB_RX_FRAG0_LEN    21
+#define S_TCB_RX_FRAG0_LEN    31
+#define M_TCB_RX_FRAG0_LEN    0x7ffffffULL
+#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN)
+
+#define W_TCB_RX_FRAG1_LEN    22
+#define S_TCB_RX_FRAG1_LEN    26
+#define M_TCB_RX_FRAG1_LEN    0x7ffffffULL
+#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN)
+
+#define W_TCB_NEWRENO_RECOVER    23
+#define S_TCB_NEWRENO_RECOVER    21
+#define M_TCB_NEWRENO_RECOVER    0x7ffffffULL
+#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER)
+
+#define W_TCB_PDU_HAVE_LEN    24
+#define S_TCB_PDU_HAVE_LEN    16
+#define M_TCB_PDU_HAVE_LEN    0x1ULL
+#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN)
+
+#define W_TCB_PDU_LEN    24
+#define S_TCB_PDU_LEN    17
+#define M_TCB_PDU_LEN    0xffffULL
+#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN)
+
+#define W_TCB_RX_QUIESCE    25
+#define S_TCB_RX_QUIESCE    1
+#define M_TCB_RX_QUIESCE    0x1ULL
+#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE)
+
+#define W_TCB_RX_PTR_RAW    25
+#define S_TCB_RX_PTR_RAW    2
+#define M_TCB_RX_PTR_RAW    0x1ffffULL
+#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW)
+
+#define W_TCB_CPU_NO    25
+#define S_TCB_CPU_NO    19
+#define M_TCB_CPU_NO    0x7fULL
+#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO)
+
+#define W_TCB_ULP_TYPE    25
+#define S_TCB_ULP_TYPE    26
+#define M_TCB_ULP_TYPE    0xfULL
+#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE)
+
+#define W_TCB_RX_FRAG1_PTR_RAW    25
+#define S_TCB_RX_FRAG1_PTR_RAW    30
+#define M_TCB_RX_FRAG1_PTR_RAW    0x1ffffULL
+#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW)
+
+#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW    26
+#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW    15
+#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW    0x7ffffffULL
+#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW)
+
+#define W_TCB_RX_FRAG2_PTR_RAW    27
+#define S_TCB_RX_FRAG2_PTR_RAW    10
+#define M_TCB_RX_FRAG2_PTR_RAW    0x1ffffULL
+#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW)
+
+#define W_TCB_RX_FRAG2_LEN_RAW    27
+#define S_TCB_RX_FRAG2_LEN_RAW    27
+#define M_TCB_RX_FRAG2_LEN_RAW    0x7ffffffULL
+#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW)
+
+#define W_TCB_RX_FRAG3_PTR_RAW    28
+#define S_TCB_RX_FRAG3_PTR_RAW    22
+#define M_TCB_RX_FRAG3_PTR_RAW    0x1ffffULL
+#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW)
+
+#define W_TCB_RX_FRAG3_LEN_RAW    29
+#define S_TCB_RX_FRAG3_LEN_RAW    7
+#define M_TCB_RX_FRAG3_LEN_RAW    0x7ffffffULL
+#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW)
+
+#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW    30
+#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW    2
+#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW    0x7ffffffULL
+#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW)
+
+#define W_TCB_PDU_HDR_LEN    30
+#define S_TCB_PDU_HDR_LEN    29
+#define M_TCB_PDU_HDR_LEN    0xffULL
+#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN)
+
+#define W_TCB_SLUSH1    31
+#define S_TCB_SLUSH1    5
+#define M_TCB_SLUSH1    0x7ffffULL
+#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1)
+
+#define W_TCB_ULP_RAW    31
+#define S_TCB_ULP_RAW    24
+#define M_TCB_ULP_RAW    0xffULL
+#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW)
+
+#define W_TCB_DDP_RDMAP_VERSION    25
+#define S_TCB_DDP_RDMAP_VERSION    30
+#define M_TCB_DDP_RDMAP_VERSION    0x1ULL
+#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION)
+
+#define W_TCB_MARKER_ENABLE_RX    25
+#define S_TCB_MARKER_ENABLE_RX    31
+#define M_TCB_MARKER_ENABLE_RX    0x1ULL
+#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX)
+
+#define W_TCB_MARKER_ENABLE_TX    26
+#define S_TCB_MARKER_ENABLE_TX    0
+#define M_TCB_MARKER_ENABLE_TX    0x1ULL
+#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX)
+
+#define W_TCB_CRC_ENABLE    26
+#define S_TCB_CRC_ENABLE    1
+#define M_TCB_CRC_ENABLE    0x1ULL
+#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE)
+
+#define W_TCB_IRS_ULP    26
+#define S_TCB_IRS_ULP    2
+#define M_TCB_IRS_ULP    0x1ffULL
+#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP)
+
+#define W_TCB_ISS_ULP    26
+#define S_TCB_ISS_ULP    11
+#define M_TCB_ISS_ULP    0x1ffULL
+#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP)
+
+#define W_TCB_TX_PDU_LEN    26
+#define S_TCB_TX_PDU_LEN    20
+#define M_TCB_TX_PDU_LEN    0x3fffULL
+#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN)
+
+#define W_TCB_TX_PDU_OUT    27
+#define S_TCB_TX_PDU_OUT    2
+#define M_TCB_TX_PDU_OUT    0x1ULL
+#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT)
+
+#define W_TCB_CQ_IDX_SQ    27
+#define S_TCB_CQ_IDX_SQ    3
+#define M_TCB_CQ_IDX_SQ    0xffffULL
+#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ)
+
+#define W_TCB_CQ_IDX_RQ    27
+#define S_TCB_CQ_IDX_RQ    19
+#define M_TCB_CQ_IDX_RQ    0xffffULL
+#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ)
+
+#define W_TCB_QP_ID    28
+#define S_TCB_QP_ID    3
+#define M_TCB_QP_ID    0xffffULL
+#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID)
+
+#define W_TCB_PD_ID    28
+#define S_TCB_PD_ID    19
+#define M_TCB_PD_ID    0xffffULL
+#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID)
+
+#define W_TCB_STAG    29
+#define S_TCB_STAG    3
+#define M_TCB_STAG    0xffffffffULL
+#define V_TCB_STAG(x) ((x) << S_TCB_STAG)
+
+#define W_TCB_RQ_START    30
+#define S_TCB_RQ_START    3
+#define M_TCB_RQ_START    0x3ffffffULL
+#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START)
+
+#define W_TCB_RQ_MSN    30
+#define S_TCB_RQ_MSN    29
+#define M_TCB_RQ_MSN    0x3ffULL
+#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN)
+
+#define W_TCB_RQ_MAX_OFFSET    31
+#define S_TCB_RQ_MAX_OFFSET    7
+#define M_TCB_RQ_MAX_OFFSET    0xfULL
+#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET)
+
+#define W_TCB_RQ_WRITE_PTR    31
+#define S_TCB_RQ_WRITE_PTR    11
+#define M_TCB_RQ_WRITE_PTR    0x3ffULL
+#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR)
+
+#define W_TCB_INB_WRITE_PERM    31
+#define S_TCB_INB_WRITE_PERM    21
+#define M_TCB_INB_WRITE_PERM    0x1ULL
+#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM)
+
+#define W_TCB_INB_READ_PERM    31
+#define S_TCB_INB_READ_PERM    22
+#define M_TCB_INB_READ_PERM    0x1ULL
+#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM)
+
+#define W_TCB_ORD_L_BIT_VLD    31
+#define S_TCB_ORD_L_BIT_VLD    23
+#define M_TCB_ORD_L_BIT_VLD    0x1ULL
+#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD)
+
+#define W_TCB_RDMAP_OPCODE    31
+#define S_TCB_RDMAP_OPCODE    24
+#define M_TCB_RDMAP_OPCODE    0xfULL
+#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE)
+
+#define W_TCB_TX_FLUSH    31
+#define S_TCB_TX_FLUSH    28
+#define M_TCB_TX_FLUSH    0x1ULL
+#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH)
+
+#define W_TCB_TX_OOS_RXMT    31
+#define S_TCB_TX_OOS_RXMT    29
+#define M_TCB_TX_OOS_RXMT    0x1ULL
+#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT)
+
+#define W_TCB_TX_OOS_TXMT    31
+#define S_TCB_TX_OOS_TXMT    30
+#define M_TCB_TX_OOS_TXMT    0x1ULL
+#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT)
+
+#define W_TCB_SLUSH_AUX2    31
+#define S_TCB_SLUSH_AUX2    31
+#define M_TCB_SLUSH_AUX2    0x1ULL
+#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2)
+
+#define W_TCB_RX_FRAG1_PTR_RAW2    25
+#define S_TCB_RX_FRAG1_PTR_RAW2    30
+#define M_TCB_RX_FRAG1_PTR_RAW2    0x1ffffULL
+#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2)
+
+#define W_TCB_RX_DDP_FLAGS    26
+#define S_TCB_RX_DDP_FLAGS    15
+#define M_TCB_RX_DDP_FLAGS    0x3ffULL
+#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS)
+
+#define W_TCB_SLUSH_AUX3    26
+#define S_TCB_SLUSH_AUX3    31
+#define M_TCB_SLUSH_AUX3    0x1ffULL
+#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3)
+
+#define W_TCB_RX_DDP_BUF0_OFFSET    27
+#define S_TCB_RX_DDP_BUF0_OFFSET    8
+#define M_TCB_RX_DDP_BUF0_OFFSET    0x3fffffULL
+#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET)
+
+#define W_TCB_RX_DDP_BUF0_LEN    27
+#define S_TCB_RX_DDP_BUF0_LEN    30
+#define M_TCB_RX_DDP_BUF0_LEN    0x3fffffULL
+#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN)
+
+#define W_TCB_RX_DDP_BUF1_OFFSET    28
+#define S_TCB_RX_DDP_BUF1_OFFSET    20
+#define M_TCB_RX_DDP_BUF1_OFFSET    0x3fffffULL
+#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET)
+
+#define W_TCB_RX_DDP_BUF1_LEN    29
+#define S_TCB_RX_DDP_BUF1_LEN    10
+#define M_TCB_RX_DDP_BUF1_LEN    0x3fffffULL
+#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN)
+
+#define W_TCB_RX_DDP_BUF0_TAG    30
+#define S_TCB_RX_DDP_BUF0_TAG    0
+#define M_TCB_RX_DDP_BUF0_TAG    0xffffffffULL
+#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG)
+
+#define W_TCB_RX_DDP_BUF1_TAG    31
+#define S_TCB_RX_DDP_BUF1_TAG    0
+#define M_TCB_RX_DDP_BUF1_TAG    0xffffffffULL
+#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG)
+
+#define S_TF_DACK    10
+#define V_TF_DACK(x) ((x) << S_TF_DACK)
+
+#define S_TF_NAGLE    11
+#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE)
+
+#define S_TF_RECV_SCALE    12
+#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE)
+
+#define S_TF_RECV_TSTMP    13
+#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP)
+
+#define S_TF_RECV_SACK    14
+#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK)
+
+#define S_TF_TURBO    15
+#define V_TF_TURBO(x) ((x) << S_TF_TURBO)
+
+#define S_TF_KEEPALIVE    16
+#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE)
+
+#define S_TF_TCAM_BYPASS    17
+#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS)
+
+#define S_TF_CORE_FIN    18
+#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN)
+
+#define S_TF_CORE_MORE    19
+#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE)
+
+#define S_TF_MIGRATING    20
+#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING)
+
+#define S_TF_ACTIVE_OPEN    21
+#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN)
+
+#define S_TF_ASK_MODE    22
+#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE)
+
+#define S_TF_NON_OFFLOAD    23
+#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD)
+
+#define S_TF_MOD_SCHD    24
+#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD)
+
+#define S_TF_MOD_SCHD_REASON0    25
+#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0)
+
+#define S_TF_MOD_SCHD_REASON1    26
+#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1)
+
+#define S_TF_MOD_SCHD_RX    27
+#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX)
+
+#define S_TF_CORE_PUSH    28
+#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH)
+
+#define S_TF_RCV_COALESCE_ENABLE    29
+#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE)
+
+#define S_TF_RCV_COALESCE_PUSH    30
+#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH)
+
+#define S_TF_RCV_COALESCE_LAST_PSH    31
+#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH)
+
+#define S_TF_RCV_COALESCE_HEARTBEAT    32
+#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT)
+
+#define S_TF_HALF_CLOSE    33
+#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE)
+
+#define S_TF_DACK_MSS    34
+#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS)
+
+#define S_TF_CCTRL_SEL0    35
+#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0)
+
+#define S_TF_CCTRL_SEL1    36
+#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1)
+
+#define S_TF_TCP_NEWRENO_FAST_RECOVERY    37
+#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY)
+
+#define S_TF_TX_PACE_AUTO    38
+#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO)
+
+#define S_TF_PEER_FIN_HELD    39
+#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD)
+
+#define S_TF_CORE_URG    40
+#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG)
+
+#define S_TF_RDMA_ERROR    41
+#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR)
+
+#define S_TF_SSWS_DISABLED    42
+#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED)
+
+#define S_TF_DUPACK_COUNT_ODD    43
+#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD)
+
+#define S_TF_TX_CHANNEL    44
+#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL)
+
+#define S_TF_RX_CHANNEL    45
+#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL)
+
+#define S_TF_TX_PACE_FIXED    46
+#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED)
+
+#define S_TF_RDMA_FLM_ERROR    47
+#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR)
+
+#define S_TF_RX_FLOW_CONTROL_DISABLE    48
+#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE)
+
+#endif /* _TCB_DEFS_H */
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_av.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_av.c
@@ -57,7 +57,7 @@ struct ib_ah *ehca_create_ah(struct ib_p
 	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
 					      ib_device);
 
-	av = kmem_cache_alloc(av_cache, SLAB_KERNEL);
+	av = kmem_cache_alloc(av_cache, GFP_KERNEL);
 	if (!av) {
 		ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
 			 pd, ah_attr);
@@ -118,8 +118,7 @@ struct ib_ah *ehca_create_ah(struct ib_p
 		}
 		memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
 	}
-	/* for the time being we use a hard coded PMTU of 2048 Bytes */
-	av->av.pmtu = 4;
+	av->av.pmtu = EHCA_MAX_MTU;
 
 	/* dgid comes in grh.word_3 */
 	memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
@@ -193,7 +192,7 @@ int ehca_modify_ah(struct ib_ah *ah, str
 		memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
 	}
 
-	new_ehca_av.pmtu = 4; /* see also comment in create_ah() */
+	new_ehca_av.pmtu = EHCA_MAX_MTU;
 
 	memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
 	       sizeof(ah_attr->grh.dgid));
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_classes.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -42,9 +42,6 @@
 #ifndef __EHCA_CLASSES_H__
 #define __EHCA_CLASSES_H__
 
-#include "ehca_classes.h"
-#include "ipz_pt_fn.h"
-
 struct ehca_module;
 struct ehca_qp;
 struct ehca_cq;
@@ -54,14 +51,22 @@ struct ehca_mw;
 struct ehca_pd;
 struct ehca_av;
 
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
 #ifdef CONFIG_PPC64
 #include "ehca_classes_pSeries.h"
 #endif
+#include "ehca_classes.h"
+#include "ipz_pt_fn.h"
+#include "ehca_irq.h"
 
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_user_verbs.h>
+#define EHCA_EQE_CACHE_SIZE 20
 
-#include "ehca_irq.h"
+struct ehca_eqe_cache_entry {
+	struct ehca_eqe *eqe;
+	struct ehca_cq *cq;
+};
 
 struct ehca_eq {
 	u32 length;
@@ -74,6 +79,8 @@ struct ehca_eq {
 	spinlock_t spinlock;
 	struct tasklet_struct interrupt_task;
 	u32 ist;
+	spinlock_t irq_spinlock;
+	struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
 };
 
 struct ehca_sport {
@@ -119,13 +126,14 @@ struct ehca_qp {
 	struct ipz_qp_handle ipz_qp_handle;
 	struct ehca_pfqp pf;
 	struct ib_qp_init_attr init_attr;
-	u64 uspace_squeue;
-	u64 uspace_rqueue;
-	u64 uspace_fwh;
 	struct ehca_cq *send_cq;
 	struct ehca_cq *recv_cq;
 	unsigned int sqerr_purgeflag;
 	struct hlist_node list_entries;
+	/* mmap counter for resources mapped into user space */
+	u32 mm_count_squeue;
+	u32 mm_count_rqueue;
+	u32 mm_count_galpa;
 };
 
 /* must be power of 2 */
@@ -142,13 +150,16 @@ struct ehca_cq {
 	struct ipz_cq_handle ipz_cq_handle;
 	struct ehca_pfcq pf;
 	spinlock_t cb_lock;
-	u64 uspace_queue;
-	u64 uspace_fwh;
 	struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
 	struct list_head entry;
-	u32 nr_callbacks;
+	u32 nr_callbacks; /* #events assigned to cpu by scaling code */
+	u32 nr_events;    /* #events seen */
+	wait_queue_head_t wait_completion;
 	spinlock_t task_lock;
 	u32 ownpid;
+	/* mmap counter for resources mapped into user space */
+	u32 mm_count_queue;
+	u32 mm_count_galpa;
 };
 
 enum ehca_mr_flag {
@@ -248,20 +259,6 @@ struct ehca_ucontext {
 	struct ib_ucontext ib_ucontext;
 };
 
-struct ehca_module *ehca_module_new(void);
-
-int ehca_module_delete(struct ehca_module *me);
-
-int ehca_eq_ctor(struct ehca_eq *eq);
-
-int ehca_eq_dtor(struct ehca_eq *eq);
-
-struct ehca_shca *ehca_shca_new(void);
-
-int ehca_shca_delete(struct ehca_shca *me);
-
-struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
-
 int ehca_init_pd_cache(void);
 void ehca_cleanup_pd_cache(void);
 int ehca_init_cq_cache(void);
@@ -275,15 +272,16 @@ void ehca_cleanup_mrmw_cache(void);
 
 extern spinlock_t ehca_qp_idr_lock;
 extern spinlock_t ehca_cq_idr_lock;
+extern spinlock_t hcall_lock;
 extern struct idr ehca_qp_idr;
 extern struct idr ehca_cq_idr;
 
 extern int ehca_static_rate;
 extern int ehca_port_act_time;
 extern int ehca_use_hp_mr;
+extern int ehca_scaling_code;
 
 struct ipzu_queue_resp {
-	u64 queue;        /* points to first queue entry */
 	u32 qe_size;      /* queue entry size */
 	u32 act_nr_of_sg;
 	u32 queue_length; /* queue length allocated in bytes */
@@ -296,7 +294,6 @@ struct ehca_create_cq_resp {
 	u32 cq_number;
 	u32 token;
 	struct ipzu_queue_resp ipz_queue;
-	struct h_galpas galpas;
 };
 
 struct ehca_create_qp_resp {
@@ -309,7 +306,6 @@ struct ehca_create_qp_resp {
 	u32 dummy; /* padding for 8 byte alignment */
 	struct ipzu_queue_resp ipz_squeue;
 	struct ipzu_queue_resp ipz_rqueue;
-	struct h_galpas galpas;
 };
 
 struct ehca_alloc_cq_parms {
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_cq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -134,7 +134,7 @@ struct ib_cq *ehca_create_cq(struct ib_d
 	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
 		return ERR_PTR(-EINVAL);
 
-	my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL);
+	my_cq = kmem_cache_alloc(cq_cache, GFP_KERNEL);
 	if (!my_cq) {
 		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
 			 device);
@@ -147,6 +147,7 @@ struct ib_cq *ehca_create_cq(struct ib_d
 	spin_lock_init(&my_cq->spinlock);
 	spin_lock_init(&my_cq->cb_lock);
 	spin_lock_init(&my_cq->task_lock);
+	init_waitqueue_head(&my_cq->wait_completion);
 	my_cq->ownpid = current->tgid;
 
 	cq = &my_cq->ib_cq;
@@ -267,7 +268,6 @@ struct ib_cq *ehca_create_cq(struct ib_d
 	if (context) {
 		struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
 		struct ehca_create_cq_resp resp;
-		struct vm_area_struct *vma;
 		memset(&resp, 0, sizeof(resp));
 		resp.cq_number = my_cq->cq_number;
 		resp.token = my_cq->token;
@@ -276,40 +276,14 @@ struct ib_cq *ehca_create_cq(struct ib_d
 		resp.ipz_queue.queue_length = ipz_queue->queue_length;
 		resp.ipz_queue.pagesize = ipz_queue->pagesize;
 		resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
-		ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
-				       ipz_queue->queue_length,
-				       (void**)&resp.ipz_queue.queue,
-				       &vma);
-		if (ret) {
-			ehca_err(device, "Could not mmap queue pages");
-			cq = ERR_PTR(ret);
-			goto create_cq_exit4;
-		}
-		my_cq->uspace_queue = resp.ipz_queue.queue;
-		resp.galpas = my_cq->galpas;
-		ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
-					 (void**)&resp.galpas.kernel.fw_handle,
-					 &vma);
-		if (ret) {
-			ehca_err(device, "Could not mmap fw_handle");
-			cq = ERR_PTR(ret);
-			goto create_cq_exit5;
-		}
-		my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
 		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
 			ehca_err(device, "Copy to udata failed.");
-			goto create_cq_exit6;
+			goto create_cq_exit4;
 		}
 	}
 
 	return cq;
 
-create_cq_exit6:
-	ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
-
-create_cq_exit5:
-	ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
-
 create_cq_exit4:
 	ipz_queue_dtor(&my_cq->ipz_queue);
 
@@ -330,10 +304,19 @@ create_cq_exit1:
 	return cq;
 }
 
+static int get_cq_nr_events(struct ehca_cq *my_cq)
+{
+	int ret;
+	unsigned long flags;
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	ret = my_cq->nr_events;
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+	return ret;
+}
+
 int ehca_destroy_cq(struct ib_cq *cq)
 {
 	u64 h_ret;
-	int ret;
 	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
 	int cq_num = my_cq->cq_number;
 	struct ib_device *device = cq->device;
@@ -343,32 +326,31 @@ int ehca_destroy_cq(struct ib_cq *cq)
 	u32 cur_pid = current->tgid;
 	unsigned long flags;
 
+	if (cq->uobject) {
+		if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
+			ehca_err(device, "Resources still referenced in "
+				 "user space cq_num=%x", my_cq->cq_number);
+			return -EINVAL;
+		}
+		if (my_cq->ownpid != cur_pid) {
+			ehca_err(device, "Invalid caller pid=%x ownpid=%x "
+				 "cq_num=%x",
+				 cur_pid, my_cq->ownpid, my_cq->cq_number);
+			return -EINVAL;
+		}
+	}
+
 	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-	while (my_cq->nr_callbacks)
-		yield();
+	while (my_cq->nr_events) {
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+		wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq));
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		/* recheck nr_events to assure no cqe has just arrived */
+	}
 
 	idr_remove(&ehca_cq_idr, my_cq->token);
 	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 
-	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
-		ehca_err(device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_cq->ownpid);
-		return -EINVAL;
-	}
-
-	/* un-mmap if vma alloc */
-	if (my_cq->uspace_queue ) {
-		ret = ehca_munmap(my_cq->uspace_queue,
-				  my_cq->ipz_queue.queue_length);
-		if (ret)
-			ehca_err(device, "Could not munmap queue ehca_cq=%p "
-				 "cq_num=%x", my_cq, cq_num);
-		ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
-		if (ret)
-			ehca_err(device, "Could not munmap fwh ehca_cq=%p "
-				 "cq_num=%x", my_cq, cq_num);
-	}
-
 	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
 	if (h_ret == H_R_STATE) {
 		/* cq in err: read err data and destroy it forcibly */
@@ -397,7 +379,7 @@ int ehca_resize_cq(struct ib_cq *cq, int
 	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
 	u32 cur_pid = current->tgid;
 
-	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+	if (cq->uobject && my_cq->ownpid != cur_pid) {
 		ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
 			 cur_pid, my_cq->ownpid);
 		return -EINVAL;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_eq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shc
 	struct ib_device *ib_dev = &shca->ib_device;
 
 	spin_lock_init(&eq->spinlock);
+	spin_lock_init(&eq->irq_spinlock);
 	eq->is_initialized = 0;
 
 	if (type != EHCA_EQ && type != EHCA_NEQ) {
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_hca.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -50,7 +50,7 @@ int ehca_query_device(struct ib_device *
 					      ib_device);
 	struct hipz_query_hca *rblock;
 
-	rblock = ehca_alloc_fw_ctrlblock();
+	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!rblock) {
 		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
 		return -ENOMEM;
@@ -110,7 +110,7 @@ int ehca_query_port(struct ib_device *ib
 					      ib_device);
 	struct hipz_query_port *rblock;
 
-	rblock = ehca_alloc_fw_ctrlblock();
+	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!rblock) {
 		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
 		return -ENOMEM;
@@ -162,6 +162,9 @@ int ehca_query_port(struct ib_device *ib
 	props->active_width    = IB_WIDTH_12X;
 	props->active_speed    = 0x1;
 
+	/* at the moment (logical) link state is always LINK_UP */
+	props->phys_state      = 0x5;
+
 query_port1:
 	ehca_free_fw_ctrlblock(rblock);
 
@@ -179,7 +182,7 @@ int ehca_query_pkey(struct ib_device *ib
 		return -EINVAL;
 	}
 
-	rblock = ehca_alloc_fw_ctrlblock();
+	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!rblock) {
 		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
 		return -ENOMEM;
@@ -212,7 +215,7 @@ int ehca_query_gid(struct ib_device *ibd
 		return -EINVAL;
 	}
 
-	rblock = ehca_alloc_fw_ctrlblock();
+	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!rblock) {
 		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
 		return -ENOMEM;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_irq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -45,6 +45,7 @@
 #include "ehca_tools.h"
 #include "hcp_if.h"
 #include "hipz_fns.h"
+#include "ipz_pt_fn.h"
 
 #define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM(1,1)
 #define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM(8,31)
@@ -62,15 +63,11 @@
 #define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
 #define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
 
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-
 static void queue_comp_task(struct ehca_cq *__cq);
 
 static struct ehca_comp_pool* pool;
 static struct notifier_block comp_pool_callback_nb;
 
-#endif
-
 static inline void comp_event_callback(struct ehca_cq *cq)
 {
 	if (!cq->ib_cq.comp_handler)
@@ -137,22 +134,22 @@ int ehca_error_data(struct ehca_shca *sh
 	u64 *rblock;
 	unsigned long block_count;
 
-	rblock = ehca_alloc_fw_ctrlblock();
+	rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
 	if (!rblock) {
 		ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
 		ret = -ENOMEM;
 		goto error_data1;
 	}
 
+	/* rblock must be 4K aligned and should be 4K large */
 	ret = hipz_h_error_data(shca->ipz_hca_handle,
 				resource,
 				rblock,
 				&block_count);
 
-	if (ret == H_R_STATE) {
+	if (ret == H_R_STATE)
 		ehca_err(&shca->ib_device,
 			 "No error data is available: %lx.", resource);
-	}
 	else if (ret == H_SUCCESS) {
 		int length;
 
@@ -162,11 +159,9 @@ int ehca_error_data(struct ehca_shca *sh
 			length = EHCA_PAGESIZE;
 
 		print_error_data(shca, data, rblock, length);
-	}
-	else {
+	} else
 		ehca_err(&shca->ib_device,
 			 "Error data could not be fetched: %lx", resource);
-	}
 
 	ehca_free_fw_ctrlblock(rblock);
 
@@ -360,7 +355,7 @@ static inline void reset_eq_pending(stru
 	return;
 }
 
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs)
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
 {
 	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
 
@@ -393,7 +388,7 @@ void ehca_tasklet_neq(unsigned long data
 	return;
 }
 
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs)
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
 {
 	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
 
@@ -402,199 +397,275 @@ irqreturn_t ehca_interrupt_eq(int irq, v
 	return IRQ_HANDLED;
 }
 
-void ehca_tasklet_eq(unsigned long data)
+static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
 {
-	struct ehca_shca *shca = (struct ehca_shca*)data;
-	struct ehca_eqe *eqe;
-	int int_state;
-	int query_cnt = 0;
+	u64 eqe_value;
+	u32 token;
+	unsigned long flags;
+	struct ehca_cq *cq;
 
-	do {
-		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+	eqe_value = eqe->entry;
+	ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
+	if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+		ehca_dbg(&shca->ib_device, "Got completion event");
+		token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, token);
+		if (cq == NULL) {
+			spin_unlock(&ehca_cq_idr_lock);
+			ehca_err(&shca->ib_device,
+				 "Invalid eqe for non-existing cq token=%x",
+				 token);
+			return;
+		}
+		reset_eq_pending(cq);
+		cq->nr_events++;
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+		if (ehca_scaling_code)
+			queue_comp_task(cq);
+		else {
+			comp_event_callback(cq);
+			spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+			cq->nr_events--;
+			if (!cq->nr_events)
+				wake_up(&cq->wait_completion);
+			spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+		}
+	} else {
+		ehca_dbg(&shca->ib_device, "Got non completion event");
+		parse_identifier(shca, eqe_value);
+	}
+}
 
-		if ((shca->hw_level >= 2) && eqe)
-			int_state = 1;
-		else
-			int_state = 0;
+void ehca_process_eq(struct ehca_shca *shca, int is_irq)
+{
+	struct ehca_eq *eq = &shca->eq;
+	struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
+	u64 eqe_value;
+	unsigned long flags;
+	unsigned long irq_flags;
+	int eqe_cnt, i;
+	int eq_empty = 0;
+
+	spin_lock_irqsave(&eq->irq_spinlock, irq_flags);
+	if (is_irq) {
+		const int max_query_cnt = 100;
+		int query_cnt = 0;
+		int int_state = 1;
+		do {
+			int_state = hipz_h_query_int_state(
+				shca->ipz_hca_handle, eq->ist);
+			query_cnt++;
+			iosync();
+		} while (int_state && query_cnt < max_query_cnt);
+		if (unlikely((query_cnt == max_query_cnt)))
+			ehca_err(&shca->ib_device, "int_state=%x query_cnt=%x",
+				 int_state, query_cnt);
+	}
 
-		while ((int_state == 1) || eqe) {
-			while (eqe) {
-				u64 eqe_value = eqe->entry;
-
-				ehca_dbg(&shca->ib_device,
-					 "eqe_value=%lx", eqe_value);
-
-				/* TODO: better structure */
-				if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
-						   eqe_value)) {
-					unsigned long flags;
-					u32 token;
-					struct ehca_cq *cq;
-
-					ehca_dbg(&shca->ib_device,
-						 "... completion event");
-					token =
-						EHCA_BMASK_GET(EQE_CQ_TOKEN,
-							       eqe_value);
-					spin_lock_irqsave(&ehca_cq_idr_lock,
-							  flags);
-					cq = idr_find(&ehca_cq_idr, token);
-
-					if (cq == NULL) {
-						spin_unlock(&ehca_cq_idr_lock);
-						break;
-					}
-
-					reset_eq_pending(cq);
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-					queue_comp_task(cq);
-					spin_unlock_irqrestore(&ehca_cq_idr_lock,
-							       flags);
-#else
-					spin_unlock_irqrestore(&ehca_cq_idr_lock,
-							       flags);
-					comp_event_callback(cq);
-#endif
-				} else {
-					ehca_dbg(&shca->ib_device,
-						 "... non completion event");
-					parse_identifier(shca, eqe_value);
-				}
-				eqe =
-					(struct ehca_eqe *)ehca_poll_eq(shca,
-								    &shca->eq);
+	/* read out all eqes */
+	eqe_cnt = 0;
+	do {
+		u32 token;
+		eqe_cache[eqe_cnt].eqe =
+			(struct ehca_eqe *)ehca_poll_eq(shca, eq);
+		if (!eqe_cache[eqe_cnt].eqe)
+			break;
+		eqe_value = eqe_cache[eqe_cnt].eqe->entry;
+		if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+			token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+			spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+			eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+			if (!eqe_cache[eqe_cnt].cq) {
+				spin_unlock_irqrestore(&ehca_cq_idr_lock,
+						       flags);
+				ehca_err(&shca->ib_device,
+					 "Invalid eqe for non-existing cq "
+					 "token=%x", token);
+				continue;
 			}
-
-			if (shca->hw_level >= 2) {
-				int_state =
-				    hipz_h_query_int_state(shca->ipz_hca_handle,
-							   shca->eq.ist);
-				query_cnt++;
-				iosync();
-				if (query_cnt >= 100) {
-					query_cnt = 0;
-					int_state = 0;
-				}
+			eqe_cache[eqe_cnt].cq->nr_events++;
+			spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+		} else
+			eqe_cache[eqe_cnt].cq = NULL;
+		eqe_cnt++;
+	} while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
+	if (!eqe_cnt) {
+		if (is_irq)
+			ehca_dbg(&shca->ib_device,
+				 "No eqe found for irq event");
+		goto unlock_irq_spinlock;
+	} else if (!is_irq)
+		ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
+	if (eqe_cnt == EHCA_EQE_CACHE_SIZE)
+		ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
+	/* enable irq for new packets */
+	for (i = 0; i < eqe_cnt; i++) {
+		if (eq->eqe_cache[i].cq)
+			reset_eq_pending(eq->eqe_cache[i].cq);
+	}
+	/* check eq */
+	spin_lock_irqsave(&eq->spinlock, flags);
+	eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
+	spin_unlock_irqrestore(&eq->spinlock, flags);
+	/* call completion handler for cached eqes */
+	for (i = 0; i < eqe_cnt; i++)
+		if (eq->eqe_cache[i].cq) {
+			if (ehca_scaling_code)
+				queue_comp_task(eq->eqe_cache[i].cq);
+			else {
+				struct ehca_cq *cq = eq->eqe_cache[i].cq;
+				comp_event_callback(cq);
+				spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+				cq->nr_events--;
+				if (!cq->nr_events)
+					wake_up(&cq->wait_completion);
+				spin_unlock_irqrestore(&ehca_cq_idr_lock,
+						       flags);
 			}
-			eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
-
+		} else {
+			ehca_dbg(&shca->ib_device, "got non completion event");
+			parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
 		}
-	} while (int_state != 0);
+	/* poll eq if not empty */
+	if (eq_empty)
+		goto unlock_irq_spinlock;
+	do {
+		struct ehca_eqe *eqe;
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+		if (!eqe)
+			break;
+		process_eqe(shca, eqe);
+	} while (1);
 
-	return;
+unlock_irq_spinlock:
+	spin_unlock_irqrestore(&eq->irq_spinlock, irq_flags);
 }
 
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+void ehca_tasklet_eq(unsigned long data)
+{
+	ehca_process_eq((struct ehca_shca*)data, 1);
+}
 
 static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
 {
-	unsigned long flags_last_cpu;
+	int cpu;
+	unsigned long flags;
 
+	WARN_ON_ONCE(!in_interrupt());
 	if (ehca_debug_level)
 		ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
 
-	spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
-	pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
-	if (pool->last_cpu == NR_CPUS)
-		pool->last_cpu = first_cpu(cpu_online_map);
-	spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+	spin_lock_irqsave(&pool->last_cpu_lock, flags);
+	cpu = next_cpu(pool->last_cpu, cpu_online_map);
+	if (cpu == NR_CPUS)
+		cpu = first_cpu(cpu_online_map);
+	pool->last_cpu = cpu;
+	spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
 
-	return pool->last_cpu;
+	return cpu;
 }
 
 static void __queue_comp_task(struct ehca_cq *__cq,
 			      struct ehca_cpu_comp_task *cct)
 {
-	unsigned long flags_cct;
-	unsigned long flags_cq;
+	unsigned long flags;
 
-	spin_lock_irqsave(&cct->task_lock, flags_cct);
-	spin_lock_irqsave(&__cq->task_lock, flags_cq);
+	spin_lock_irqsave(&cct->task_lock, flags);
+	spin_lock(&__cq->task_lock);
 
 	if (__cq->nr_callbacks == 0) {
 		__cq->nr_callbacks++;
 		list_add_tail(&__cq->entry, &cct->cq_list);
 		cct->cq_jobs++;
 		wake_up(&cct->wait_queue);
-	}
-	else
+	} else
 		__cq->nr_callbacks++;
 
-	spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
-	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+	spin_unlock(&__cq->task_lock);
+	spin_unlock_irqrestore(&cct->task_lock, flags);
 }
 
 static void queue_comp_task(struct ehca_cq *__cq)
 {
-	int cpu;
 	int cpu_id;
 	struct ehca_cpu_comp_task *cct;
+	int cq_jobs;
+	unsigned long flags;
 
-	cpu = get_cpu();
 	cpu_id = find_next_online_cpu(pool);
-
 	BUG_ON(!cpu_online(cpu_id));
 
 	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+	BUG_ON(!cct);
 
-	if (cct->cq_jobs > 0) {
+	spin_lock_irqsave(&cct->task_lock, flags);
+	cq_jobs = cct->cq_jobs;
+	spin_unlock_irqrestore(&cct->task_lock, flags);
+	if (cq_jobs > 0) {
 		cpu_id = find_next_online_cpu(pool);
 		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+		BUG_ON(!cct);
 	}
 
 	__queue_comp_task(__cq, cct);
-
-	put_cpu();
-
-	return;
 }
 
 static void run_comp_task(struct ehca_cpu_comp_task* cct)
 {
 	struct ehca_cq *cq;
-	unsigned long flags_cct;
-	unsigned long flags_cq;
+	unsigned long flags;
 
-	spin_lock_irqsave(&cct->task_lock, flags_cct);
+	spin_lock_irqsave(&cct->task_lock, flags);
 
 	while (!list_empty(&cct->cq_list)) {
 		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-		spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+		spin_unlock_irqrestore(&cct->task_lock, flags);
 		comp_event_callback(cq);
-		spin_lock_irqsave(&cct->task_lock, flags_cct);
 
-		spin_lock_irqsave(&cq->task_lock, flags_cq);
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq->nr_events--;
+		if (!cq->nr_events)
+			wake_up(&cq->wait_completion);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+		spin_lock_irqsave(&cct->task_lock, flags);
+		spin_lock(&cq->task_lock);
 		cq->nr_callbacks--;
-		if (cq->nr_callbacks == 0) {
+		if (!cq->nr_callbacks) {
 			list_del_init(cct->cq_list.next);
 			cct->cq_jobs--;
 		}
-		spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+		spin_unlock(&cq->task_lock);
 	}
 
-	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
-	return;
+	spin_unlock_irqrestore(&cct->task_lock, flags);
 }
 
 static int comp_task(void *__cct)
 {
 	struct ehca_cpu_comp_task* cct = __cct;
+	int cql_empty;
 	DECLARE_WAITQUEUE(wait, current);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	while(!kthread_should_stop()) {
 		add_wait_queue(&cct->wait_queue, &wait);
 
-		if (list_empty(&cct->cq_list))
+		spin_lock_irq(&cct->task_lock);
+		cql_empty = list_empty(&cct->cq_list);
+		spin_unlock_irq(&cct->task_lock);
+		if (cql_empty)
 			schedule();
 		else
 			__set_current_state(TASK_RUNNING);
 
 		remove_wait_queue(&cct->wait_queue, &wait);
 
-		if (!list_empty(&cct->cq_list))
+		spin_lock_irq(&cct->task_lock);
+		cql_empty = list_empty(&cct->cq_list);
+		spin_unlock_irq(&cct->task_lock);
+		if (!cql_empty)
 			run_comp_task(__cct);
 
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -637,8 +708,6 @@ static void destroy_comp_task(struct ehc
 
 	if (task)
 		kthread_stop(task);
-
-	return;
 }
 
 static void take_over_work(struct ehca_comp_pool *pool,
@@ -708,14 +777,14 @@ static int comp_pool_callback(struct not
 	return NOTIFY_OK;
 }
 
-#endif
-
 int ehca_create_comp_pool(void)
 {
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
 	int cpu;
 	struct task_struct *task;
 
+	if (!ehca_scaling_code)
+		return 0;
+
 	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
 	if (pool == NULL)
 		return -ENOMEM;
@@ -740,23 +809,23 @@ int ehca_create_comp_pool(void)
 	comp_pool_callback_nb.notifier_call = comp_pool_callback;
 	comp_pool_callback_nb.priority =0;
 	register_cpu_notifier(&comp_pool_callback_nb);
-#endif
+
+	printk(KERN_INFO "eHCA scaling code enabled\n");
 
 	return 0;
 }
 
 void ehca_destroy_comp_pool(void)
 {
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
 	int i;
 
+	if (!ehca_scaling_code)
+		return;
+
 	unregister_cpu_notifier(&comp_pool_callback_nb);
 
 	for (i = 0; i < NR_CPUS; i++) {
 		if (cpu_online(i))
 			destroy_comp_task(pool, i);
 	}
-#endif
-
-	return;
 }
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_irq.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -51,11 +51,12 @@ struct ehca_shca;
 
 int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
 
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs);
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id);
 void ehca_tasklet_neq(unsigned long data);
 
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs);
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
 void ehca_tasklet_eq(unsigned long data);
+void ehca_process_eq(struct ehca_shca *shca, int is_irq);
 
 struct ehca_cpu_comp_task {
 	wait_queue_head_t wait_queue;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -143,7 +143,8 @@ struct ib_qp *ehca_create_qp(struct ib_p
 
 int ehca_destroy_qp(struct ib_qp *qp);
 
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		   struct ib_udata *udata);
 
 int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
@@ -170,19 +171,11 @@ int ehca_mmap(struct ib_ucontext *contex
 
 void ehca_poll_eqs(unsigned long data);
 
-int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
-		     struct vm_area_struct **vma);
-
-int ehca_mmap_register(u64 physical,void **mapped,
-		       struct vm_area_struct **vma);
-
-int ehca_munmap(unsigned long addr, size_t len);
-
 #ifdef CONFIG_PPC_64K_PAGES
-void *ehca_alloc_fw_ctrlblock(void);
+void *ehca_alloc_fw_ctrlblock(gfp_t flags);
 void ehca_free_fw_ctrlblock(void *ptr);
 #else
-#define ehca_alloc_fw_ctrlblock() ((void*)get_zeroed_page(GFP_KERNEL))
+#define ehca_alloc_fw_ctrlblock(flags) ((void*) get_zeroed_page(flags))
 #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
 #endif
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_main.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
 MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION("SVNEHCA_0015");
+MODULE_VERSION("SVNEHCA_0022");
 
 int ehca_open_aqp1     = 0;
 int ehca_debug_level   = 0;
@@ -62,6 +62,7 @@ int ehca_use_hp_mr     = 0;
 int ehca_port_act_time = 30;
 int ehca_poll_all_eqs  = 1;
 int ehca_static_rate   = -1;
+int ehca_scaling_code  = 1;
 
 module_param_named(open_aqp1,     ehca_open_aqp1,     int, 0);
 module_param_named(debug_level,   ehca_debug_level,   int, 0);
@@ -71,6 +72,7 @@ module_param_named(use_hp_mr,     ehca_u
 module_param_named(port_act_time, ehca_port_act_time, int, 0);
 module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, 0);
 module_param_named(static_rate,   ehca_static_rate,   int, 0);
+module_param_named(scaling_code,   ehca_scaling_code,   int, 0);
 
 MODULE_PARM_DESC(open_aqp1,
 		 "AQP1 on startup (0: no (default), 1: yes)");
@@ -91,12 +93,16 @@ MODULE_PARM_DESC(poll_all_eqs,
 		 " (0: no, 1: yes (default))");
 MODULE_PARM_DESC(static_rate,
 		 "set permanent static rate (default: disabled)");
+MODULE_PARM_DESC(scaling_code,
+		 "set scaling code (0: disabled, 1: enabled/default)");
 
 spinlock_t ehca_qp_idr_lock;
 spinlock_t ehca_cq_idr_lock;
+spinlock_t hcall_lock;
 DEFINE_IDR(ehca_qp_idr);
 DEFINE_IDR(ehca_cq_idr);
 
+
 static struct list_head shca_list; /* list of all registered ehcas */
 static spinlock_t shca_list_lock;
 
@@ -105,9 +111,9 @@ static struct timer_list poll_eqs_timer;
 #ifdef CONFIG_PPC_64K_PAGES
 static struct kmem_cache *ctblk_cache = NULL;
 
-void *ehca_alloc_fw_ctrlblock(void)
+void *ehca_alloc_fw_ctrlblock(gfp_t flags)
 {
-	void *ret = kmem_cache_zalloc(ctblk_cache, SLAB_KERNEL);
+	void *ret = kmem_cache_zalloc(ctblk_cache, flags);
 	if (!ret)
 		ehca_gen_err("Out of memory for ctblk");
 	return ret;
@@ -205,7 +211,7 @@ int ehca_sense_attributes(struct ehca_sh
 	u64 h_ret;
 	struct hipz_query_hca *rblock;
 
-	rblock = ehca_alloc_fw_ctrlblock();
+	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!rblock) {
 		ehca_gen_err("Cannot allocate rblock memory.");
 		return -ENOMEM;
@@ -257,7 +263,7 @@ static int init_node_guid(struct ehca_sh
 	int ret = 0;
 	struct hipz_query_hca *rblock;
 
-	rblock = ehca_alloc_fw_ctrlblock();
+	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!rblock) {
 		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
 		return -ENOMEM;
@@ -287,7 +293,7 @@ int ehca_init_device(struct ehca_shca *s
 	strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
 	shca->ib_device.owner               = THIS_MODULE;
 
-	shca->ib_device.uverbs_abi_ver	    = 5;
+	shca->ib_device.uverbs_abi_ver	    = 6;
 	shca->ib_device.uverbs_cmd_mask	    =
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
 		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
@@ -306,7 +312,7 @@ int ehca_init_device(struct ehca_shca *s
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
 
-	shca->ib_device.node_type           = IB_NODE_CA;
+	shca->ib_device.node_type           = RDMA_NODE_IB_CA;
 	shca->ib_device.phys_port_cnt       = shca->num_ports;
 	shca->ib_device.dma_device          = &shca->ibmebus_dev->ofdev.dev;
 	shca->ib_device.query_device        = ehca_query_device;
@@ -351,7 +357,7 @@ int ehca_init_device(struct ehca_shca *s
 	shca->ib_device.dealloc_fmr	    = ehca_dealloc_fmr;
 	shca->ib_device.attach_mcast	    = ehca_attach_mcast;
 	shca->ib_device.detach_mcast	    = ehca_detach_mcast;
-	/* shca->ib_device.process_mad	    = ehca_process_mad;	    */
+	/* shca->ib_device.process_mad	    = ehca_process_mad;     */
 	shca->ib_device.mmap		    = ehca_mmap;
 
 	return ret;
@@ -468,7 +474,7 @@ static ssize_t  ehca_show_##name(struct 
 									   \
 	shca = dev->driver_data;					   \
 									   \
-	rblock = ehca_alloc_fw_ctrlblock();				   \
+	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);			   \
 	if (!rblock) {						           \
 		dev_err(dev, "Can't allocate rblock memory.");		   \
 		return 0;						   \
@@ -777,8 +783,24 @@ void ehca_poll_eqs(unsigned long data)
 
 	spin_lock(&shca_list_lock);
 	list_for_each_entry(shca, &shca_list, shca_list) {
-		if (shca->eq.is_initialized)
-			ehca_tasklet_eq((unsigned long)(void*)shca);
+		if (shca->eq.is_initialized) {
+			/* call deadman proc only if eq ptr does not change */
+			struct ehca_eq *eq = &shca->eq;
+			int max = 3;
+			volatile u64 q_ofs, q_ofs2;
+			u64 flags;
+			spin_lock_irqsave(&eq->spinlock, flags);
+			q_ofs = eq->ipz_queue.current_q_offset;
+			spin_unlock_irqrestore(&eq->spinlock, flags);
+			do {
+				spin_lock_irqsave(&eq->spinlock, flags);
+				q_ofs2 = eq->ipz_queue.current_q_offset;
+				spin_unlock_irqrestore(&eq->spinlock, flags);
+				max--;
+			} while (q_ofs == q_ofs2 && max > 0);
+			if (q_ofs == q_ofs2)
+				ehca_process_eq(shca, 0);
+		}
 	}
 	mod_timer(&poll_eqs_timer, jiffies + HZ);
 	spin_unlock(&shca_list_lock);
@@ -789,11 +811,12 @@ int __init ehca_module_init(void)
 	int ret;
 
 	printk(KERN_INFO "eHCA Infiniband Device Driver "
-	                 "(Rel.: SVNEHCA_0015)\n");
+	                 "(Rel.: SVNEHCA_0022)\n");
 	idr_init(&ehca_qp_idr);
 	idr_init(&ehca_cq_idr);
 	spin_lock_init(&ehca_qp_idr_lock);
 	spin_lock_init(&ehca_cq_idr_lock);
+	spin_lock_init(&hcall_lock);
 
 	INIT_LIST_HEAD(&shca_list);
 	spin_lock_init(&shca_list_lock);
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -53,7 +53,7 @@ static struct ehca_mr *ehca_mr_new(void)
 {
 	struct ehca_mr *me;
 
-	me = kmem_cache_alloc(mr_cache, SLAB_KERNEL);
+	me = kmem_cache_alloc(mr_cache, GFP_KERNEL);
 	if (me) {
 		memset(me, 0, sizeof(struct ehca_mr));
 		spin_lock_init(&me->mrlock);
@@ -72,7 +72,7 @@ static struct ehca_mw *ehca_mw_new(void)
 {
 	struct ehca_mw *me;
 
-	me = kmem_cache_alloc(mw_cache, SLAB_KERNEL);
+	me = kmem_cache_alloc(mw_cache, GFP_KERNEL);
 	if (me) {
 		memset(me, 0, sizeof(struct ehca_mw));
 		spin_lock_init(&me->mwlock);
@@ -255,7 +255,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib
 	u32 num_pages_mr;
 	u32 num_pages_4k; /* 4k portion "pages" */
 
-        if (!pd) {
+	if (!pd) {
 		ehca_gen_err("bad pd=%p", pd);
 		return ERR_PTR(-EFAULT);
 	}
@@ -1013,7 +1013,7 @@ int ehca_reg_mr_rpages(struct ehca_shca 
 	u32 i;
 	u64 *kpage;
 
-	kpage = ehca_alloc_fw_ctrlblock();
+	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!kpage) {
 		ehca_err(&shca->ib_device, "kpage alloc failed");
 		ret = -ENOMEM;
@@ -1124,7 +1124,7 @@ inline int ehca_rereg_mr_rereg1(struct e
 	ehca_mrmw_map_acl(acl, &hipz_acl);
 	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
 
-	kpage = ehca_alloc_fw_ctrlblock();
+	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!kpage) {
 		ehca_err(&shca->ib_device, "kpage alloc failed");
 		ret = -ENOMEM;
@@ -2045,13 +2045,10 @@ int ehca_mrmw_map_hrc_alloc(const u64 hi
 	switch (hipz_rc) {
 	case H_SUCCESS:	             /* successful completion */
 		return 0;
-	case H_ADAPTER_PARM:         /* invalid adapter handle */
-	case H_RT_PARM:              /* invalid resource type */
 	case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
-	case H_MLENGTH_PARM:         /* invalid memory length */
-	case H_MEM_ACCESS_PARM:      /* invalid access controls */
 	case H_CONSTRAINED:          /* resource constraint */
-		return -EINVAL;
+	case H_NO_MEM:
+		return -ENOMEM;
 	case H_BUSY:                 /* long busy */
 		return -EBUSY;
 	default:
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_pd.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -50,7 +50,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_de
 {
 	struct ehca_pd *pd;
 
-	pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL);
+	pd = kmem_cache_alloc(pd_cache, GFP_KERNEL);
 	if (!pd) {
 		ehca_err(device, "device=%p context=%p out of memory",
 			 device, context);
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_qp.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -450,7 +450,7 @@ struct ib_qp *ehca_create_qp(struct ib_p
 	if (pd->uobject && udata)
 		context = pd->uobject->context;
 
-	my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL);
+	my_qp = kmem_cache_alloc(qp_cache, GFP_KERNEL);
 	if (!my_qp) {
 		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
 		return ERR_PTR(-ENOMEM);
@@ -637,7 +637,6 @@ struct ib_qp *ehca_create_qp(struct ib_p
 		struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
 		struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
 		struct ehca_create_qp_resp resp;
-		struct vm_area_struct * vma;
 		memset(&resp, 0, sizeof(resp));
 
 		resp.qp_num = my_qp->real_qp_num;
@@ -651,59 +650,21 @@ struct ib_qp *ehca_create_qp(struct ib_p
 		resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
 		resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
 		resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
-		ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000,
-				       ipz_rqueue->queue_length,
-				       (void**)&resp.ipz_rqueue.queue,
-				       &vma);
-		if (ret) {
-			ehca_err(pd->device, "Could not mmap rqueue pages");
-			goto create_qp_exit3;
-		}
-		my_qp->uspace_rqueue = resp.ipz_rqueue.queue;
 		/* squeue properties */
 		resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
 		resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
 		resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
 		resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
 		resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
-		ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000,
-				       ipz_squeue->queue_length,
-				       (void**)&resp.ipz_squeue.queue,
-				       &vma);
-		if (ret) {
-			ehca_err(pd->device, "Could not mmap squeue pages");
-			goto create_qp_exit4;
-		}
-		my_qp->uspace_squeue = resp.ipz_squeue.queue;
-		/* fw_handle */
-		resp.galpas = my_qp->galpas;
-		ret = ehca_mmap_register(my_qp->galpas.user.fw_handle,
-					 (void**)&resp.galpas.kernel.fw_handle,
-					 &vma);
-		if (ret) {
-			ehca_err(pd->device, "Could not mmap fw_handle");
-			goto create_qp_exit5;
-		}
-		my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
-
 		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
 			ehca_err(pd->device, "Copy to udata failed");
 			ret = -EINVAL;
-			goto create_qp_exit6;
+			goto create_qp_exit3;
 		}
 	}
 
 	return &my_qp->ib_qp;
 
-create_qp_exit6:
-	ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
-
-create_qp_exit5:
-	ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length);
-
-create_qp_exit4:
-	ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length);
-
 create_qp_exit3:
 	ipz_queue_dtor(&my_qp->ipz_rqueue);
 	ipz_queue_dtor(&my_qp->ipz_squeue);
@@ -732,8 +693,7 @@ static int prepare_sqe_rts(struct ehca_q
 	u64 h_ret;
 	struct ipz_queue *squeue;
 	void *bad_send_wqe_p, *bad_send_wqe_v;
-	void *squeue_start_p, *squeue_end_p;
-	void *squeue_start_v, *squeue_end_v;
+	u64 q_ofs;
 	struct ehca_wqe *wqe;
 	int qp_num = my_qp->ib_qp.qp_num;
 
@@ -755,26 +715,23 @@ static int prepare_sqe_rts(struct ehca_q
 	if (ehca_debug_level)
 		ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
 	squeue = &my_qp->ipz_squeue;
-	squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L));
-	squeue_end_p = squeue_start_p+squeue->queue_length;
-	squeue_start_v = abs_to_virt((u64)squeue_start_p);
-	squeue_end_v = abs_to_virt((u64)squeue_end_p);
-	ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p",
-		 qp_num, squeue_start_v, squeue_end_v);
+	if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) {
+		ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x"
+			 " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p);
+		return -EFAULT;
+	}
 
 	/* loop sets wqe's purge bit */
-	wqe = (struct ehca_wqe*)bad_send_wqe_v;
+	wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs);
 	*bad_wqe_cnt = 0;
 	while (wqe->optype != 0xff && wqe->wqef != 0xff) {
 		if (ehca_debug_level)
 			ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
 		wqe->nr_of_data_seg = 0; /* suppress data access */
 		wqe->wqef = WQEF_PURGE; /* WQE to be purged */
-		wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size);
+		q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
+		wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs);
 		*bad_wqe_cnt = (*bad_wqe_cnt)+1;
-		if ((void*)wqe >= squeue_end_v) {
-			wqe = squeue_start_v;
-		}
 	}
 	/*
 	 * bad wqe will be reprocessed and ignored when pol_cq() is called,
@@ -811,8 +768,8 @@ static int internal_modify_qp(struct ib_
 	unsigned long spl_flags = 0;
 
 	/* do query_qp to obtain current attr values */
-	mqpcb = ehca_alloc_fw_ctrlblock();
-	if (mqpcb == NULL) {
+	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+	if (!mqpcb) {
 		ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
 			 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
 		return -ENOMEM;
@@ -935,7 +892,7 @@ static int internal_modify_qp(struct ib_
 	     my_qp->qp_type == IB_QPT_SMI) &&
 	    statetrans == IB_QPST_SQE2RTS) {
 		/* mark next free wqe if kernel */
-		if (my_qp->uspace_squeue == 0) {
+		if (!ibqp->uobject) {
 			struct ehca_wqe *wqe;
 			/* lock send queue */
 			spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
@@ -1230,7 +1187,8 @@ modify_qp_exit1:
 	return ret;
 }
 
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		   struct ib_udata *udata)
 {
 	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
 	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
@@ -1276,7 +1234,7 @@ int ehca_query_qp(struct ib_qp *qp,
 		return -EINVAL;
 	}
 
-	qpcb = ehca_alloc_fw_ctrlblock();
+	qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!qpcb) {
 		ehca_err(qp->device,"Out of memory for qpcb "
 			 "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
@@ -1420,11 +1378,18 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
 	enum ib_qp_type	qp_type;
 	unsigned long flags;
 
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    my_pd->ownpid != cur_pid) {
-		ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		return -EINVAL;
+	if (ibqp->uobject) {
+		if (my_qp->mm_count_galpa ||
+		    my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
+			ehca_err(ibqp->device, "Resources still referenced in "
+				 "user space qp_num=%x", ibqp->qp_num);
+			return -EINVAL;
+		}
+		if (my_pd->ownpid != cur_pid) {
+			ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, my_pd->ownpid);
+			return -EINVAL;
+		}
 	}
 
 	if (my_qp->send_cq) {
@@ -1442,24 +1407,6 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
 	idr_remove(&ehca_qp_idr, my_qp->token);
 	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
 
-	/* un-mmap if vma alloc */
-	if (my_qp->uspace_rqueue) {
-		ret = ehca_munmap(my_qp->uspace_rqueue,
-				  my_qp->ipz_rqueue.queue_length);
-		if (ret)
-			ehca_err(ibqp->device, "Could not munmap rqueue "
-				 "qp_num=%x", qp_num);
-		ret = ehca_munmap(my_qp->uspace_squeue,
-				  my_qp->ipz_squeue.queue_length);
-		if (ret)
-			ehca_err(ibqp->device, "Could not munmap squeue "
-				 "qp_num=%x", qp_num);
-		ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
-		if (ret)
-			ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x",
-				 qp_num);
-	}
-
 	h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
 	if (h_ret != H_SUCCESS) {
 		ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -579,7 +579,7 @@ poll_cq_one_read_cqe:
 	} else
 		wc->status = IB_WC_SUCCESS;
 
-	wc->qp_num = cqe->local_qp_number;
+	wc->qp = NULL;
 	wc->byte_len = cqe->nr_bytes_transferred;
 	wc->pkey_index = cqe->pkey_index;
 	wc->slid = cqe->rlid;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_tools.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -63,6 +63,7 @@
 #include <asm/ibmebus.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
+#include <asm/hvcall.h>
 
 extern int ehca_debug_level;
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -68,105 +68,183 @@ int ehca_dealloc_ucontext(struct ib_ucon
 	return 0;
 }
 
-struct page *ehca_nopage(struct vm_area_struct *vma,
-			 unsigned long address, int *type)
+static void ehca_mm_open(struct vm_area_struct *vma)
 {
-	struct page *mypage = NULL;
-	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
-	u32 idr_handle = fileoffset >> 32;
-	u32 q_type = (fileoffset >> 28) & 0xF;	  /* CQ, QP,...        */
-	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
-	u32 cur_pid = current->tgid;
-	unsigned long flags;
-	struct ehca_cq *cq;
-	struct ehca_qp *qp;
-	struct ehca_pd *pd;
-	u64 offset;
-	void *vaddr;
+	u32 *count = (u32*)vma->vm_private_data;
+	if (!count) {
+		ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
+			     vma->vm_start, vma->vm_end);
+		return;
+	}
+	(*count)++;
+	if (!(*count))
+		ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
+			     vma->vm_start, vma->vm_end);
+	ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
+		     vma->vm_start, vma->vm_end, *count);
+}
 
-	switch (q_type) {
-	case 1: /* CQ */
-		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-		cq = idr_find(&ehca_cq_idr, idr_handle);
-		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+static void ehca_mm_close(struct vm_area_struct *vma)
+{
+	u32 *count = (u32*)vma->vm_private_data;
+	if (!count) {
+		ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
+			     vma->vm_start, vma->vm_end);
+		return;
+	}
+	(*count)--;
+	ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
+		     vma->vm_start, vma->vm_end, *count);
+}
 
-		/* make sure this mmap really belongs to the authorized user */
-		if (!cq) {
-			ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
-			return NOPAGE_SIGBUS;
+static struct vm_operations_struct vm_ops = {
+	.open =	ehca_mm_open,
+	.close = ehca_mm_close,
+};
+
+static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
+			u32 *mm_count)
+{
+	int ret;
+	u64 vsize, physical;
+
+	vsize = vma->vm_end - vma->vm_start;
+	if (vsize != EHCA_PAGESIZE) {
+		ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
+		return -EINVAL;
+	}
+
+	physical = galpas->user.fw_handle;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
+	/* VM_IO | VM_RESERVED are set by remap_pfn_range() */
+	ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT,
+			      vsize, vma->vm_page_prot);
+	if (unlikely(ret)) {
+		ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
+		return -ENOMEM;
+	}
+
+	vma->vm_private_data = mm_count;
+	(*mm_count)++;
+	vma->vm_ops = &vm_ops;
+
+	return 0;
+}
+
+static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
+			   u32 *mm_count)
+{
+	int ret;
+	u64 start, ofs;
+	struct page *page;
+
+	vma->vm_flags |= VM_RESERVED;
+	start = vma->vm_start;
+	for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
+		u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
+		page = virt_to_page(virt_addr);
+		ret = vm_insert_page(vma, start, page);
+		if (unlikely(ret)) {
+			ehca_gen_err("vm_insert_page() failed rc=%x", ret);
+			return ret;
 		}
+		start +=  PAGE_SIZE;
+	}
+	vma->vm_private_data = mm_count;
+	(*mm_count)++;
+	vma->vm_ops = &vm_ops;
 
-		if (cq->ownpid != cur_pid) {
+	return 0;
+}
+
+static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
+			u32 rsrc_type)
+{
+	int ret;
+
+	switch (rsrc_type) {
+	case 1: /* galpa fw handle */
+		ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
+		ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
+		if (unlikely(ret)) {
 			ehca_err(cq->ib_cq.device,
-				 "Invalid caller pid=%x ownpid=%x",
-				 cur_pid, cq->ownpid);
-			return NOPAGE_SIGBUS;
+				 "ehca_mmap_fw() failed rc=%x cq_num=%x",
+				 ret, cq->cq_number);
+			return ret;
 		}
+		break;
 
-		if (rsrc_type == 2) {
-			ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
-			offset = address - vma->vm_start;
-			vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
-			ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
-				 offset, vaddr);
-			mypage = virt_to_page(vaddr);
+	case 2: /* cq queue_addr */
+		ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
+		ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
+		if (unlikely(ret)) {
+			ehca_err(cq->ib_cq.device,
+				 "ehca_mmap_queue() failed rc=%x cq_num=%x",
+				 ret, cq->cq_number);
+			return ret;
 		}
 		break;
 
-	case 2: /* QP */
-		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
-		qp = idr_find(&ehca_qp_idr, idr_handle);
-		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+	default:
+		ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
+			 rsrc_type, cq->cq_number);
+		return -EINVAL;
+	}
 
-		/* make sure this mmap really belongs to the authorized user */
-		if (!qp) {
-			ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
-			return NOPAGE_SIGBUS;
+	return 0;
+}
+
+static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
+			u32 rsrc_type)
+{
+	int ret;
+
+	switch (rsrc_type) {
+	case 1: /* galpa fw handle */
+		ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
+		ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
+		if (unlikely(ret)) {
+			ehca_err(qp->ib_qp.device,
+				 "remap_pfn_range() failed ret=%x qp_num=%x",
+				 ret, qp->ib_qp.qp_num);
+			return -ENOMEM;
 		}
+		break;
 
-		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
-		if (pd->ownpid != cur_pid) {
+	case 2: /* qp rqueue_addr */
+		ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue",
+			 qp->ib_qp.qp_num);
+		ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue);
+		if (unlikely(ret)) {
 			ehca_err(qp->ib_qp.device,
-				 "Invalid caller pid=%x ownpid=%x",
-				 cur_pid, pd->ownpid);
-			return NOPAGE_SIGBUS;
+				 "ehca_mmap_queue(rq) failed rc=%x qp_num=%x",
+				 ret, qp->ib_qp.qp_num);
+			return ret;
 		}
+		break;
 
-		if (rsrc_type == 2) {	/* rqueue */
-			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
-			offset = address - vma->vm_start;
-			vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
-			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
-				 offset, vaddr);
-			mypage = virt_to_page(vaddr);
-		} else if (rsrc_type == 3) {	/* squeue */
-			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
-			offset = address - vma->vm_start;
-			vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
-			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
-				 offset, vaddr);
-			mypage = virt_to_page(vaddr);
+	case 3: /* qp squeue_addr */
+		ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue",
+			 qp->ib_qp.qp_num);
+		ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue);
+		if (unlikely(ret)) {
+			ehca_err(qp->ib_qp.device,
+				 "ehca_mmap_queue(sq) failed rc=%x qp_num=%x",
+				 ret, qp->ib_qp.qp_num);
+			return ret;
 		}
 		break;
 
 	default:
-		ehca_gen_err("bad queue type %x", q_type);
-		return NOPAGE_SIGBUS;
-	}
-
-	if (!mypage) {
-		ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS");
-		return NOPAGE_SIGBUS;
+		ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
+			 rsrc_type, qp->ib_qp.qp_num);
+		return -EINVAL;
 	}
-	get_page(mypage);
 
-	return mypage;
+	return 0;
 }
 
-static struct vm_operations_struct ehcau_vm_ops = {
-	.nopage = ehca_nopage,
-};
-
 int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
 	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
@@ -175,7 +253,6 @@ int ehca_mmap(struct ib_ucontext *contex
 	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
 	u32 cur_pid = current->tgid;
 	u32 ret;
-	u64 vsize, physical;
 	unsigned long flags;
 	struct ehca_cq *cq;
 	struct ehca_qp *qp;
@@ -201,44 +278,12 @@ int ehca_mmap(struct ib_ucontext *contex
 		if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
 			return -EINVAL;
 
-		switch (rsrc_type) {
-		case 1: /* galpa fw handle */
-			ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq);
-			vma->vm_flags |= VM_RESERVED;
-			vsize = vma->vm_end - vma->vm_start;
-			if (vsize != EHCA_PAGESIZE) {
-				ehca_err(cq->ib_cq.device, "invalid vsize=%lx",
-					 vma->vm_end - vma->vm_start);
-				return -EINVAL;
-			}
-
-			physical = cq->galpas.user.fw_handle;
-			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-			vma->vm_flags |= VM_IO | VM_RESERVED;
-
-			ehca_dbg(cq->ib_cq.device,
-				 "vsize=%lx physical=%lx", vsize, physical);
-			ret = remap_pfn_range(vma, vma->vm_start,
-					      physical >> PAGE_SHIFT, vsize,
-					      vma->vm_page_prot);
-			if (ret) {
-				ehca_err(cq->ib_cq.device,
-					 "remap_pfn_range() failed ret=%x",
-					 ret);
-				return -ENOMEM;
-			}
-			break;
-
-		case 2: /* cq queue_addr */
-			ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq);
-			vma->vm_flags |= VM_RESERVED;
-			vma->vm_ops = &ehcau_vm_ops;
-			break;
-
-		default:
-			ehca_err(cq->ib_cq.device, "bad resource type %x",
-				 rsrc_type);
-			return -EINVAL;
+		ret = ehca_mmap_cq(vma, cq, rsrc_type);
+		if (unlikely(ret)) {
+			ehca_err(cq->ib_cq.device,
+				 "ehca_mmap_cq() failed rc=%x cq_num=%x",
+				 ret, cq->cq_number);
+			return ret;
 		}
 		break;
 
@@ -262,50 +307,12 @@ int ehca_mmap(struct ib_ucontext *contex
 		if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
 			return -EINVAL;
 
-		switch (rsrc_type) {
-		case 1: /* galpa fw handle */
-			ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp);
-			vma->vm_flags |= VM_RESERVED;
-			vsize = vma->vm_end - vma->vm_start;
-			if (vsize != EHCA_PAGESIZE) {
-				ehca_err(qp->ib_qp.device, "invalid vsize=%lx",
-					 vma->vm_end - vma->vm_start);
-				return -EINVAL;
-			}
-
-			physical = qp->galpas.user.fw_handle;
-			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-			vma->vm_flags |= VM_IO | VM_RESERVED;
-
-			ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx",
-				 vsize, physical);
-			ret = remap_pfn_range(vma, vma->vm_start,
-					      physical >> PAGE_SHIFT, vsize,
-					      vma->vm_page_prot);
-			if (ret) {
-				ehca_err(qp->ib_qp.device,
-					 "remap_pfn_range() failed ret=%x",
-					 ret);
-				return -ENOMEM;
-			}
-			break;
-
-		case 2: /* qp rqueue_addr */
-			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp);
-			vma->vm_flags |= VM_RESERVED;
-			vma->vm_ops = &ehcau_vm_ops;
-			break;
-
-		case 3: /* qp squeue_addr */
-			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp);
-			vma->vm_flags |= VM_RESERVED;
-			vma->vm_ops = &ehcau_vm_ops;
-			break;
-
-		default:
-			ehca_err(qp->ib_qp.device, "bad resource type %x",
-				 rsrc_type);
-			return -EINVAL;
+		ret = ehca_mmap_qp(vma, qp, rsrc_type);
+		if (unlikely(ret)) {
+			ehca_err(qp->ib_qp.device,
+				 "ehca_mmap_qp() failed rc=%x qp_num=%x",
+				 ret, qp->ib_qp.qp_num);
+			return ret;
 		}
 		break;
 
@@ -316,77 +323,3 @@ int ehca_mmap(struct ib_ucontext *contex
 
 	return 0;
 }
-
-int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped,
-		     struct vm_area_struct **vma)
-{
-	down_write(&current->mm->mmap_sem);
-	*mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE,
-				 MAP_SHARED | MAP_ANONYMOUS,
-				 foffset);
-	up_write(&current->mm->mmap_sem);
-	if (!(*mapped)) {
-		ehca_gen_err("couldn't mmap foffset=%lx length=%lx",
-			     foffset, length);
-		return -EINVAL;
-	}
-
-	*vma = find_vma(current->mm, (u64)*mapped);
-	if (!(*vma)) {
-		down_write(&current->mm->mmap_sem);
-		do_munmap(current->mm, 0, length);
-		up_write(&current->mm->mmap_sem);
-		ehca_gen_err("couldn't find vma queue=%p", *mapped);
-		return -EINVAL;
-	}
-	(*vma)->vm_flags |= VM_RESERVED;
-	(*vma)->vm_ops = &ehcau_vm_ops;
-
-	return 0;
-}
-
-int ehca_mmap_register(u64 physical, void **mapped,
-		       struct vm_area_struct **vma)
-{
-	int ret;
-	unsigned long vsize;
-	/* ehca hw supports only 4k page */
-	ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
-	if (ret) {
-		ehca_gen_err("could'nt mmap physical=%lx", physical);
-		return ret;
-	}
-
-	(*vma)->vm_flags |= VM_RESERVED;
-	vsize = (*vma)->vm_end - (*vma)->vm_start;
-	if (vsize != EHCA_PAGESIZE) {
-		ehca_gen_err("invalid vsize=%lx",
-			     (*vma)->vm_end - (*vma)->vm_start);
-		return -EINVAL;
-	}
-
-	(*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
-	(*vma)->vm_flags |= VM_IO | VM_RESERVED;
-
-	ret = remap_pfn_range((*vma), (*vma)->vm_start,
-			      physical >> PAGE_SHIFT, vsize,
-			      (*vma)->vm_page_prot);
-	if (ret) {
-		ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
-		return -ENOMEM;
-	}
-
-	return 0;
-
-}
-
-int ehca_munmap(unsigned long addr, size_t len) {
-	int ret = 0;
-	struct mm_struct *mm = current->mm;
-	if (mm) {
-		down_write(&mm->mmap_sem);
-		ret = do_munmap(mm, addr, len);
-		up_write(&mm->mmap_sem);
-	}
-	return ret;
-}
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/hcp_if.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/hcp_if.c
@@ -48,27 +48,27 @@
 #include "hipz_fns.h"
 #include "ipz_pt_fn.h"
 
-#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9,11)
-#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12,12)
-#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13,15)
-#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18,18)
-#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19,21)
-#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22,23)
-#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31,31)
-#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56,63)
-
-#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0,15)
-#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16,31)
-#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32,39)
-#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40,47)
-
-#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16,31)
-#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48,63)
-#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8,15)
-#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24,31)
+#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
+#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
+#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
+#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
+#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
+#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
+#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
+#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
+
+#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
+#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
+#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
+
+#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
+#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
 
-#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0,31)
-#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32,63)
+#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
 
 /* direct access qp controls */
 #define DAQP_CTRL_ENABLE    0x01
@@ -95,35 +95,25 @@ static u32 get_longbusy_msecs(int longbu
 	}
 }
 
-static long ehca_hcall_7arg_7ret(unsigned long opcode,
-				 unsigned long arg1,
-				 unsigned long arg2,
-				 unsigned long arg3,
-				 unsigned long arg4,
-				 unsigned long arg5,
-				 unsigned long arg6,
-				 unsigned long arg7,
-				 unsigned long *out1,
-				 unsigned long *out2,
-				 unsigned long *out3,
-				 unsigned long *out4,
-				 unsigned long *out5,
-				 unsigned long *out6,
-				 unsigned long *out7)
+static long ehca_plpar_hcall_norets(unsigned long opcode,
+				    unsigned long arg1,
+				    unsigned long arg2,
+				    unsigned long arg3,
+				    unsigned long arg4,
+				    unsigned long arg5,
+				    unsigned long arg6,
+				    unsigned long arg7)
 {
 	long ret;
 	int i, sleep_msecs;
 
-	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx arg5=%lx "
-		     "arg6=%lx arg7=%lx", opcode, arg1, arg2, arg3, arg4, arg5,
-		     arg6, arg7);
+	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+		     "arg5=%lx arg6=%lx arg7=%lx",
+		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
 
 	for (i = 0; i < 5; i++) {
-		ret = plpar_hcall_7arg_7ret(opcode,
-					    arg1, arg2, arg3, arg4,
-					    arg5, arg6, arg7,
-					    out1, out2, out3, out4,
-					    out5, out6,out7);
+		ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
+					 arg5, arg6, arg7);
 
 		if (H_IS_LONG_BUSY(ret)) {
 			sleep_msecs = get_longbusy_msecs(ret);
@@ -134,61 +124,52 @@ static long ehca_hcall_7arg_7ret(unsigne
 		if (ret < H_SUCCESS)
 			ehca_gen_err("opcode=%lx ret=%lx"
 				     " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
-				     " arg5=%lx arg6=%lx arg7=%lx"
-				     " out1=%lx out2=%lx out3=%lx out4=%lx"
-				     " out5=%lx out6=%lx out7=%lx",
+				     " arg5=%lx arg6=%lx arg7=%lx ",
 				     opcode, ret,
-				     arg1, arg2, arg3, arg4,
-				     arg5, arg6, arg7,
-				     *out1, *out2, *out3, *out4,
-				     *out5, *out6, *out7);
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7);
 
-		ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
-			     "out4=%lx out5=%lx out6=%lx out7=%lx",
-			     opcode, ret, *out1, *out2, *out3, *out4, *out5,
-			     *out6, *out7);
+		ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
 		return ret;
+
 	}
 
 	return H_BUSY;
 }
-
-static long ehca_hcall_9arg_9ret(unsigned long opcode,
-				 unsigned long arg1,
-				 unsigned long arg2,
-				 unsigned long arg3,
-				 unsigned long arg4,
-				 unsigned long arg5,
-				 unsigned long arg6,
-				 unsigned long arg7,
-				 unsigned long arg8,
-				 unsigned long arg9,
-				 unsigned long *out1,
-				 unsigned long *out2,
-				 unsigned long *out3,
-				 unsigned long *out4,
-				 unsigned long *out5,
-				 unsigned long *out6,
-				 unsigned long *out7,
-				 unsigned long *out8,
-				 unsigned long *out9)
+static long ehca_plpar_hcall9(unsigned long opcode,
+			      unsigned long *outs, /* array of 9 outputs */
+			      unsigned long arg1,
+			      unsigned long arg2,
+			      unsigned long arg3,
+			      unsigned long arg4,
+			      unsigned long arg5,
+			      unsigned long arg6,
+			      unsigned long arg7,
+			      unsigned long arg8,
+			      unsigned long arg9)
 {
 	long ret;
-	int i, sleep_msecs;
+	int i, sleep_msecs, lock_is_set = 0;
+	unsigned long flags;
+
 
 	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
 		     "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
 		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
 		     arg8, arg9);
-
+	
 	for (i = 0; i < 5; i++) {
-		ret = plpar_hcall_9arg_9ret(opcode,
-					    arg1, arg2, arg3, arg4,
-					    arg5, arg6, arg7, arg8,
-					    arg9,
-					    out1, out2, out3, out4,
-					    out5, out6, out7, out8,
-					    out9);
+		if ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)) {
+			spin_lock_irqsave(&hcall_lock, flags);
+			lock_is_set = 1;
+		}
+
+		ret = plpar_hcall9(opcode, outs,
+				   arg1, arg2, arg3, arg4, arg5,
+				   arg6, arg7, arg8, arg9);
+		
+		if (lock_is_set)
+			spin_unlock_irqrestore(&hcall_lock, flags);
 
 		if (H_IS_LONG_BUSY(ret)) {
 			sleep_msecs = get_longbusy_msecs(ret);
@@ -205,37 +186,34 @@ static long ehca_hcall_9arg_9ret(unsigne
 				     " out5=%lx out6=%lx out7=%lx out8=%lx"
 				     " out9=%lx",
 				     opcode, ret,
-				     arg1, arg2, arg3, arg4,
-				     arg5, arg6, arg7, arg8,
-				     arg9,
-				     *out1, *out2, *out3, *out4,
-				     *out5, *out6, *out7, *out8,
-				     *out9);
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7, arg8, arg9,
+				     outs[0], outs[1], outs[2], outs[3],
+				     outs[4], outs[5], outs[6], outs[7],
+				     outs[8]);
 
 		ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
 			     "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
-			     "out9=%lx", opcode, ret,*out1, *out2, *out3, *out4,
-			     *out5, *out6, *out7, *out8, *out9);
+			     "out9=%lx",
+			     opcode, ret, outs[0], outs[1], outs[2], outs[3],
+			     outs[4], outs[5], outs[6], outs[7], outs[8]);
 		return ret;
-
 	}
 
 	return H_BUSY;
 }
-
 u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
 			     struct ehca_pfeq *pfeq,
 			     const u32 neq_control,
 			     const u32 number_of_entries,
 			     struct ipz_eq_handle *eq_handle,
-			     u32 * act_nr_of_entries,
-			     u32 * act_pages,
-			     u32 * eq_ist)
+			     u32 *act_nr_of_entries,
+			     u32 *act_pages,
+			     u32 *eq_ist)
 {
 	u64 ret;
-	u64 dummy;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 	u64 allocate_controls;
-	u64 act_nr_of_entries_out, act_pages_out, eq_ist_out;
 
 	/* resource type */
 	allocate_controls = 3ULL;
@@ -246,22 +224,15 @@ u64 hipz_h_alloc_resource_eq(const struc
 	else /* notification event queue */
 		allocate_controls = (1ULL << 63) | allocate_controls;
 
-	ret = ehca_hcall_7arg_7ret(H_ALLOC_RESOURCE,
-				   adapter_handle.handle,  /* r4 */
-				   allocate_controls,      /* r5 */
-				   number_of_entries,      /* r6 */
-				   0, 0, 0, 0,
-				   &eq_handle->handle,     /* r4 */
-				   &dummy,	           /* r5 */
-				   &dummy,	           /* r6 */
-				   &act_nr_of_entries_out, /* r7 */
-				   &act_pages_out,	   /* r8 */
-				   &eq_ist_out,            /* r8 */
-				   &dummy);
-
-	*act_nr_of_entries = (u32)act_nr_of_entries_out;
-	*act_pages         = (u32)act_pages_out;
-	*eq_ist            = (u32)eq_ist_out;
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,  /* r4 */
+				allocate_controls,      /* r5 */
+				number_of_entries,      /* r6 */
+				0, 0, 0, 0, 0, 0);
+	eq_handle->handle = outs[0];
+	*act_nr_of_entries = (u32)outs[3];
+	*act_pages = (u32)outs[4];
+	*eq_ist = (u32)outs[5];
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 		ehca_gen_err("Not enough resource - ret=%lx ", ret);
@@ -273,20 +244,11 @@ u64 hipz_h_reset_event(const struct ipz_
 		       struct ipz_eq_handle eq_handle,
 		       const u64 event_mask)
 {
-	u64 dummy;
-
-	return ehca_hcall_7arg_7ret(H_RESET_EVENTS,
-				    adapter_handle.handle, /* r4 */
-				    eq_handle.handle,      /* r5 */
-				    event_mask,	           /* r6 */
-				    0, 0, 0, 0,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ehca_plpar_hcall_norets(H_RESET_EVENTS,
+				       adapter_handle.handle, /* r4 */
+				       eq_handle.handle,      /* r5 */
+				       event_mask,	      /* r6 */
+				       0, 0, 0, 0);
 }
 
 u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
@@ -294,30 +256,21 @@ u64 hipz_h_alloc_resource_cq(const struc
 			     struct ehca_alloc_cq_parms *param)
 {
 	u64 ret;
-	u64 dummy;
-	u64 act_nr_of_entries_out, act_pages_out;
-	u64 g_la_privileged_out, g_la_user_out;
-
-	ret = ehca_hcall_7arg_7ret(H_ALLOC_RESOURCE,
-				   adapter_handle.handle,     /* r4  */
-				   2,	                      /* r5  */
-				   param->eq_handle.handle,   /* r6  */
-				   cq->token,	              /* r7  */
-				   param->nr_cqe,             /* r8  */
-				   0, 0,
-				   &cq->ipz_cq_handle.handle, /* r4  */
-				   &dummy,	              /* r5  */
-				   &dummy,	              /* r6  */
-				   &act_nr_of_entries_out,    /* r7  */
-				   &act_pages_out,	      /* r8  */
-				   &g_la_privileged_out,      /* r9  */
-				   &g_la_user_out);           /* r10 */
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 
-	param->act_nr_of_entries = (u32)act_nr_of_entries_out;
-	param->act_pages = (u32)act_pages_out;
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,   /* r4  */
+				2,	                 /* r5  */
+				param->eq_handle.handle, /* r6  */
+				cq->token,	         /* r7  */
+				param->nr_cqe,           /* r8  */
+				0, 0, 0, 0);
+	cq->ipz_cq_handle.handle = outs[0];
+	param->act_nr_of_entries = (u32)outs[3];
+	param->act_pages = (u32)outs[4];
 
 	if (ret == H_SUCCESS)
-		hcp_galpas_ctor(&cq->galpas, g_la_privileged_out, g_la_user_out);
+		hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 		ehca_gen_err("Not enough resources. ret=%lx", ret);
@@ -330,8 +283,9 @@ u64 hipz_h_alloc_resource_qp(const struc
 			     struct ehca_alloc_qp_parms *parms)
 {
 	u64 ret;
-	u64 dummy, allocate_controls, max_r10_reg;
-	u64 qp_nr_out, r6_out, r7_out, r8_out, g_la_user_out, r11_out;
+	u64 allocate_controls;
+	u64 max_r10_reg;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 	u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
 	u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
 	int daqp_ctrl = parms->daqp_ctrl;
@@ -360,48 +314,36 @@ u64 hipz_h_alloc_resource_qp(const struc
 		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
 				 parms->max_recv_sge);
 
-
-	ret = ehca_hcall_9arg_9ret(H_ALLOC_RESOURCE,
-				   adapter_handle.handle,	      /* r4  */
-				   allocate_controls,	              /* r5  */
-				   qp->send_cq->ipz_cq_handle.handle,
-				   qp->recv_cq->ipz_cq_handle.handle,
-				   parms->ipz_eq_handle.handle,
-				   ((u64)qp->token << 32) | parms->pd.value,
-				   max_r10_reg,	                      /* r10 */
-				   parms->ud_av_l_key_ctl,            /* r11 */
-				   0,
-				   &qp->ipz_qp_handle.handle,
-				   &qp_nr_out,	                      /* r5  */
-				   &r6_out,	                      /* r6  */
-				   &r7_out,	                      /* r7  */
-				   &r8_out,	                      /* r8  */
-				   &dummy,	                      /* r9  */
-				   &g_la_user_out,	              /* r10 */
-				   &r11_out,
-				   &dummy);
-
-	/* extract outputs */
-	qp->real_qp_num = (u32)qp_nr_out;
-
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,	           /* r4  */
+				allocate_controls,	           /* r5  */
+				qp->send_cq->ipz_cq_handle.handle,
+				qp->recv_cq->ipz_cq_handle.handle,
+				parms->ipz_eq_handle.handle,
+				((u64)qp->token << 32) | parms->pd.value,
+				max_r10_reg,	                   /* r10 */
+				parms->ud_av_l_key_ctl,            /* r11 */
+				0);
+	qp->ipz_qp_handle.handle = outs[0];
+	qp->real_qp_num = (u32)outs[1];
 	parms->act_nr_send_sges =
-		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, r6_out);
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
 	parms->act_nr_recv_wqes =
-		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, r6_out);
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
 	parms->act_nr_send_sges =
-		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, r7_out);
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
 	parms->act_nr_recv_sges =
-		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, r7_out);
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
 	parms->nr_sq_pages =
-		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, r8_out);
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
 	parms->nr_rq_pages =
-		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, r8_out);
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
 
 	if (ret == H_SUCCESS)
-		hcp_galpas_ctor(&qp->galpas, g_la_user_out, g_la_user_out);
+		hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Not enough resources. ret=%lx",ret);
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
 
 	return ret;
 }
@@ -411,7 +353,6 @@ u64 hipz_h_query_port(const struct ipz_a
 		      struct hipz_query_port *query_port_response_block)
 {
 	u64 ret;
-	u64 dummy;
 	u64 r_cb = virt_to_abs(query_port_response_block);
 
 	if (r_cb & (EHCA_PAGESIZE-1)) {
@@ -419,18 +360,11 @@ u64 hipz_h_query_port(const struct ipz_a
 		return H_PARAMETER;
 	}
 
-	ret = ehca_hcall_7arg_7ret(H_QUERY_PORT,
-				   adapter_handle.handle, /* r4 */
-				   port_id,	          /* r5 */
-				   r_cb,	          /* r6 */
-				   0, 0, 0, 0,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy);
+	ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
+				      adapter_handle.handle, /* r4 */
+				      port_id,	             /* r5 */
+				      r_cb,	             /* r6 */
+				      0, 0, 0, 0);
 
 	if (ehca_debug_level)
 		ehca_dmp(query_port_response_block, 64, "response_block");
@@ -441,7 +375,6 @@ u64 hipz_h_query_port(const struct ipz_a
 u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
 		     struct hipz_query_hca *query_hca_rblock)
 {
-	u64 dummy;
 	u64 r_cb = virt_to_abs(query_hca_rblock);
 
 	if (r_cb & (EHCA_PAGESIZE-1)) {
@@ -450,17 +383,10 @@ u64 hipz_h_query_hca(const struct ipz_ad
 		return H_PARAMETER;
 	}
 
-	return ehca_hcall_7arg_7ret(H_QUERY_HCA,
-				    adapter_handle.handle, /* r4 */
-				    r_cb,                  /* r5 */
-				    0, 0, 0, 0, 0,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ehca_plpar_hcall_norets(H_QUERY_HCA,
+				       adapter_handle.handle, /* r4 */
+				       r_cb,                  /* r5 */
+				       0, 0, 0, 0, 0);
 }
 
 u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
@@ -470,22 +396,13 @@ u64 hipz_h_register_rpage(const struct i
 			  const u64 logical_address_of_page,
 			  u64 count)
 {
-	u64 dummy;
-
-	return ehca_hcall_7arg_7ret(H_REGISTER_RPAGES,
-				    adapter_handle.handle,      /* r4  */
-				    queue_type | pagesize << 8, /* r5  */
-				    resource_handle,	        /* r6  */
-				    logical_address_of_page,    /* r7  */
-				    count,	                /* r8  */
-				    0, 0,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
+				       adapter_handle.handle,      /* r4  */
+				       queue_type | pagesize << 8, /* r5  */
+				       resource_handle,	           /* r6  */
+				       logical_address_of_page,    /* r7  */
+				       count,	                   /* r8  */
+				       0, 0);
 }
 
 u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
@@ -507,23 +424,14 @@ u64 hipz_h_register_rpage_eq(const struc
 				     logical_address_of_page, count);
 }
 
-u32 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
 			   u32 ist)
 {
-	u32 ret;
-	u64 dummy;
-
-	ret = ehca_hcall_7arg_7ret(H_QUERY_INT_STATE,
-				   adapter_handle.handle, /* r4 */
-				   ist,                   /* r5 */
-				   0, 0, 0, 0, 0,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy);
+	u64 ret;
+	ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
+				      adapter_handle.handle, /* r4 */
+				      ist,                   /* r5 */
+				      0, 0, 0, 0, 0);
 
 	if (ret != H_SUCCESS && ret != H_BUSY)
 		ehca_gen_err("Could not query interrupt state.");
@@ -576,25 +484,20 @@ u64 hipz_h_disable_and_get_wqe(const str
 			       void **log_addr_next_rq_wqe2processed,
 			       int dis_and_get_function_code)
 {
-	u64 dummy, dummy1, dummy2;
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				dis_and_get_function_code, /* r5 */
+				qp_handle.handle,	   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	if (log_addr_next_sq_wqe2processed)
+		*log_addr_next_sq_wqe2processed = (void*)outs[0];
+	if (log_addr_next_rq_wqe2processed)
+		*log_addr_next_rq_wqe2processed = (void*)outs[1];
 
-	if (!log_addr_next_sq_wqe2processed)
-		log_addr_next_sq_wqe2processed = (void**)&dummy1;
-	if (!log_addr_next_rq_wqe2processed)
-		log_addr_next_rq_wqe2processed = (void**)&dummy2;
-
-	return ehca_hcall_7arg_7ret(H_DISABLE_AND_GETC,
-				    adapter_handle.handle,     /* r4 */
-				    dis_and_get_function_code, /* r5 */
-				    qp_handle.handle,	       /* r6 */
-				    0, 0, 0, 0,
-				    (void*)log_addr_next_sq_wqe2processed,
-				    (void*)log_addr_next_rq_wqe2processed,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ret;
 }
 
 u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
@@ -605,22 +508,13 @@ u64 hipz_h_modify_qp(const struct ipz_ad
 		     struct h_galpa gal)
 {
 	u64 ret;
-	u64 dummy;
-	u64 invalid_attribute_identifier, rc_attrib_mask;
-
-	ret = ehca_hcall_7arg_7ret(H_MODIFY_QP,
-				   adapter_handle.handle,         /* r4 */
-				   qp_handle.handle,	          /* r5 */
-				   update_mask,	                  /* r6 */
-				   virt_to_abs(mqpcb),	          /* r7 */
-				   0, 0, 0,
-				   &invalid_attribute_identifier, /* r4 */
-				   &dummy,	                  /* r5 */
-				   &dummy,	                  /* r6 */
-				   &dummy,                        /* r7 */
-				   &dummy,	                  /* r8 */
-				   &rc_attrib_mask,               /* r9 */
-				   &dummy);
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				update_mask,	       /* r6 */
+				virt_to_abs(mqpcb),    /* r7 */
+				0, 0, 0, 0, 0);
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 		ehca_gen_err("Insufficient resources ret=%lx", ret);
@@ -634,61 +528,37 @@ u64 hipz_h_query_qp(const struct ipz_ada
 		    struct hcp_modify_qp_control_block *qqpcb,
 		    struct h_galpa gal)
 {
-	u64 dummy;
-
-	return ehca_hcall_7arg_7ret(H_QUERY_QP,
-				    adapter_handle.handle, /* r4 */
-				    qp_handle.handle,      /* r5 */
-				    virt_to_abs(qqpcb),	   /* r6 */
-				    0, 0, 0, 0,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ehca_plpar_hcall_norets(H_QUERY_QP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       virt_to_abs(qqpcb),    /* r6 */
+				       0, 0, 0, 0);
 }
 
 u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
 		      struct ehca_qp *qp)
 {
 	u64 ret;
-	u64 dummy;
-	u64 ladr_next_sq_wqe_out, ladr_next_rq_wqe_out;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 
 	ret = hcp_galpas_dtor(&qp->galpas);
 	if (ret) {
 		ehca_gen_err("Could not destruct qp->galpas");
 		return H_RESOURCE;
 	}
-	ret = ehca_hcall_7arg_7ret(H_DISABLE_AND_GETC,
-				   adapter_handle.handle,     /* r4 */
-				   /* function code */
-				   1,	                      /* r5 */
-				   qp->ipz_qp_handle.handle,  /* r6 */
-				   0, 0, 0, 0,
-				   &ladr_next_sq_wqe_out,     /* r4 */
-				   &ladr_next_rq_wqe_out,     /* r5 */
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy);
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				/* function code */
+				1,	                   /* r5 */
+				qp->ipz_qp_handle.handle,  /* r6 */
+				0, 0, 0, 0, 0, 0);
 	if (ret == H_HARDWARE)
 		ehca_gen_err("HCA not operational. ret=%lx", ret);
 
-	ret = ehca_hcall_7arg_7ret(H_FREE_RESOURCE,
-				   adapter_handle.handle,     /* r4 */
-				   qp->ipz_qp_handle.handle,  /* r5 */
-				   0, 0, 0, 0, 0,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy);
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      qp->ipz_qp_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
 
 	if (ret == H_RESOURCE)
 		ehca_gen_err("Resource still in use. ret=%lx", ret);
@@ -701,20 +571,11 @@ u64 hipz_h_define_aqp0(const struct ipz_
 		       struct h_galpa gal,
 		       u32 port)
 {
-	u64 dummy;
-
-	return ehca_hcall_7arg_7ret(H_DEFINE_AQP0,
-				    adapter_handle.handle, /* r4 */
-				    qp_handle.handle,      /* r5 */
-				    port,                  /* r6 */
-				    0, 0, 0, 0,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       port,                  /* r6 */
+				       0, 0, 0, 0);
 }
 
 u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
@@ -724,24 +585,15 @@ u64 hipz_h_define_aqp1(const struct ipz_
 		       u32 * bma_qp_nr)
 {
 	u64 ret;
-	u64 dummy;
-	u64 pma_qp_nr_out, bma_qp_nr_out;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 
-	ret = ehca_hcall_7arg_7ret(H_DEFINE_AQP1,
-				   adapter_handle.handle, /* r4 */
-				   qp_handle.handle,      /* r5 */
-				   port,	          /* r6 */
-				   0, 0, 0, 0,
-				   &pma_qp_nr_out,        /* r4 */
-				   &bma_qp_nr_out,        /* r5 */
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy);
-
-	*pma_qp_nr = (u32)pma_qp_nr_out;
-	*bma_qp_nr = (u32)bma_qp_nr_out;
+	ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				port,	               /* r6 */
+				0, 0, 0, 0, 0, 0);
+	*pma_qp_nr = (u32)outs[0];
+	*bma_qp_nr = (u32)outs[1];
 
 	if (ret == H_ALIAS_EXIST)
 		ehca_gen_err("AQP1 already exists. ret=%lx", ret);
@@ -756,22 +608,14 @@ u64 hipz_h_attach_mcqp(const struct ipz_
 		       u64 subnet_prefix, u64 interface_id)
 {
 	u64 ret;
-	u64 dummy;
 
-	ret = ehca_hcall_7arg_7ret(H_ATTACH_MCQP,
-				   adapter_handle.handle,     /* r4 */
-				   qp_handle.handle,          /* r5 */
-				   mcg_dlid,                  /* r6 */
-				   interface_id,              /* r7 */
-				   subnet_prefix,             /* r8 */
-				   0, 0,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy);
+	ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
+				      adapter_handle.handle,  /* r4 */
+				      qp_handle.handle,       /* r5 */
+				      mcg_dlid,               /* r6 */
+				      interface_id,           /* r7 */
+				      subnet_prefix,          /* r8 */
+				      0, 0);
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 		ehca_gen_err("Not enough resources. ret=%lx", ret);
@@ -785,22 +629,13 @@ u64 hipz_h_detach_mcqp(const struct ipz_
 		       u16 mcg_dlid,
 		       u64 subnet_prefix, u64 interface_id)
 {
-	u64 dummy;
-
-	return ehca_hcall_7arg_7ret(H_DETACH_MCQP,
-				    adapter_handle.handle, /* r4 */
-				    qp_handle.handle,	   /* r5 */
-				    mcg_dlid,	           /* r6 */
-				    interface_id,          /* r7 */
-				    subnet_prefix,         /* r8 */
-				    0, 0,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ehca_plpar_hcall_norets(H_DETACH_MCQP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       mcg_dlid,              /* r6 */
+				       interface_id,          /* r7 */
+				       subnet_prefix,         /* r8 */
+				       0, 0);
 }
 
 u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
@@ -808,7 +643,6 @@ u64 hipz_h_destroy_cq(const struct ipz_a
 		      u8 force_flag)
 {
 	u64 ret;
-	u64 dummy;
 
 	ret = hcp_galpas_dtor(&cq->galpas);
 	if (ret) {
@@ -816,18 +650,11 @@ u64 hipz_h_destroy_cq(const struct ipz_a
 		return H_RESOURCE;
 	}
 
-	ret = ehca_hcall_7arg_7ret(H_FREE_RESOURCE,
-				   adapter_handle.handle,     /* r4 */
-				   cq->ipz_cq_handle.handle,  /* r5 */
-				   force_flag != 0 ? 1L : 0L, /* r6 */
-				   0, 0, 0, 0,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy);
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      cq->ipz_cq_handle.handle,  /* r5 */
+				      force_flag != 0 ? 1L : 0L, /* r6 */
+				      0, 0, 0, 0);
 
 	if (ret == H_RESOURCE)
 		ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
@@ -839,7 +666,6 @@ u64 hipz_h_destroy_eq(const struct ipz_a
 		      struct ehca_eq *eq)
 {
 	u64 ret;
-	u64 dummy;
 
 	ret = hcp_galpas_dtor(&eq->galpas);
 	if (ret) {
@@ -847,18 +673,10 @@ u64 hipz_h_destroy_eq(const struct ipz_a
 		return H_RESOURCE;
 	}
 
-	ret = ehca_hcall_7arg_7ret(H_FREE_RESOURCE,
-				   adapter_handle.handle,     /* r4 */
-				   eq->ipz_eq_handle.handle,  /* r5 */
-				   0, 0, 0, 0, 0,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy,
-				   &dummy);
-
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      eq->ipz_eq_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
 
 	if (ret == H_RESOURCE)
 		ehca_gen_err("Resource in use. ret=%lx ", ret);
@@ -875,27 +693,19 @@ u64 hipz_h_alloc_resource_mr(const struc
 			     struct ehca_mr_hipzout_parms *outparms)
 {
 	u64 ret;
-	u64 dummy;
-	u64 lkey_out;
-	u64 rkey_out;
-
-	ret = ehca_hcall_7arg_7ret(H_ALLOC_RESOURCE,
-				   adapter_handle.handle,            /* r4 */
-				   5,                                /* r5 */
-				   vaddr,                            /* r6 */
-				   length,                           /* r7 */
-				   (((u64)access_ctrl) << 32ULL),    /* r8 */
-				   pd.value,                         /* r9 */
-				   0,
-				   &(outparms->handle.handle),       /* r4 */
-				   &dummy,                           /* r5 */
-				   &lkey_out,                        /* r6 */
-				   &rkey_out,                        /* r7 */
-				   &dummy,
-				   &dummy,
-				   &dummy);
-	outparms->lkey = (u32)lkey_out;
-	outparms->rkey = (u32)rkey_out;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,            /* r4 */
+				5,                                /* r5 */
+				vaddr,                            /* r6 */
+				length,                           /* r7 */
+				(((u64)access_ctrl) << 32ULL),    /* r8 */
+				pd.value,                         /* r9 */
+				0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
 
 	return ret;
 }
@@ -923,7 +733,6 @@ u64 hipz_h_register_rpage_mr(const struc
 					    queue_type,
 					    mr->ipz_mr_handle.handle,
 					    logical_address_of_page, count);
-
 	return ret;
 }
 
@@ -932,24 +741,17 @@ u64 hipz_h_query_mr(const struct ipz_ada
 		    struct ehca_mr_hipzout_parms *outparms)
 {
 	u64 ret;
-	u64 dummy;
-	u64 remote_len_out, remote_vaddr_out, acc_ctrl_pd_out, r9_out;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 
-	ret = ehca_hcall_7arg_7ret(H_QUERY_MR,
-				   adapter_handle.handle,     /* r4 */
-				   mr->ipz_mr_handle.handle,  /* r5 */
-				   0, 0, 0, 0, 0,
-				   &outparms->len,            /* r4 */
-				   &outparms->vaddr,          /* r5 */
-				   &remote_len_out,           /* r6 */
-				   &remote_vaddr_out,         /* r7 */
-				   &acc_ctrl_pd_out,          /* r8 */
-				   &r9_out,
-				   &dummy);
-
-	outparms->acl  = acc_ctrl_pd_out >> 32;
-	outparms->lkey = (u32)(r9_out >> 32);
-	outparms->rkey = (u32)(r9_out & (0xffffffff));
+	ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
+				adapter_handle.handle,     /* r4 */
+				mr->ipz_mr_handle.handle,  /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->len = outs[0];
+	outparms->vaddr = outs[1];
+	outparms->acl  = outs[4] >> 32;
+	outparms->lkey = (u32)(outs[5] >> 32);
+	outparms->rkey = (u32)(outs[5] & (0xffffffff));
 
 	return ret;
 }
@@ -957,19 +759,10 @@ u64 hipz_h_query_mr(const struct ipz_ada
 u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
 			    const struct ehca_mr *mr)
 {
-	u64 dummy;
-
-	return ehca_hcall_7arg_7ret(H_FREE_RESOURCE,
-				    adapter_handle.handle,    /* r4 */
-				    mr->ipz_mr_handle.handle, /* r5 */
-				    0, 0, 0, 0, 0,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mr->ipz_mr_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
 }
 
 u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
@@ -982,28 +775,20 @@ u64 hipz_h_reregister_pmr(const struct i
 			  struct ehca_mr_hipzout_parms *outparms)
 {
 	u64 ret;
-	u64 dummy;
-	u64 lkey_out, rkey_out;
-
-	ret = ehca_hcall_7arg_7ret(H_REREGISTER_PMR,
-				   adapter_handle.handle,    /* r4 */
-				   mr->ipz_mr_handle.handle, /* r5 */
-				   vaddr_in,	             /* r6 */
-				   length,                   /* r7 */
-				   /* r8 */
-				   ((((u64)access_ctrl) << 32ULL) | pd.value),
-				   mr_addr_cb,               /* r9 */
-				   0,
-				   &dummy,                   /* r4 */
-				   &outparms->vaddr,         /* r5 */
-				   &lkey_out,                /* r6 */
-				   &rkey_out,                /* r7 */
-				   &dummy,
-				   &dummy,
-				   &dummy);
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 
-	outparms->lkey = (u32)lkey_out;
-	outparms->rkey = (u32)rkey_out;
+	ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
+				adapter_handle.handle,    /* r4 */
+				mr->ipz_mr_handle.handle, /* r5 */
+				vaddr_in,	          /* r6 */
+				length,                   /* r7 */
+				/* r8 */
+				((((u64)access_ctrl) << 32ULL) | pd.value),
+				mr_addr_cb,               /* r9 */
+				0, 0, 0);
+	outparms->vaddr = outs[1];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
 
 	return ret;
 }
@@ -1017,25 +802,18 @@ u64 hipz_h_register_smr(const struct ipz
 			struct ehca_mr_hipzout_parms *outparms)
 {
 	u64 ret;
-	u64 dummy;
-	u64 lkey_out, rkey_out;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 
-	ret = ehca_hcall_7arg_7ret(H_REGISTER_SMR,
-				   adapter_handle.handle,            /* r4 */
-				   orig_mr->ipz_mr_handle.handle,    /* r5 */
-				   vaddr_in,                         /* r6 */
-				   (((u64)access_ctrl) << 32ULL),    /* r7 */
-				   pd.value,                         /* r8 */
-				   0, 0,
-				   &(outparms->handle.handle),       /* r4 */
-				   &dummy,                           /* r5 */
-				   &lkey_out,                        /* r6 */
-				   &rkey_out,                        /* r7 */
-				   &dummy,
-				   &dummy,
-				   &dummy);
-	outparms->lkey = (u32)lkey_out;
-	outparms->rkey = (u32)rkey_out;
+	ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
+				adapter_handle.handle,            /* r4 */
+				orig_mr->ipz_mr_handle.handle,    /* r5 */
+				vaddr_in,                         /* r6 */
+				(((u64)access_ctrl) << 32ULL),    /* r7 */
+				pd.value,                         /* r8 */
+				0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
 
 	return ret;
 }
@@ -1046,23 +824,15 @@ u64 hipz_h_alloc_resource_mw(const struc
 			     struct ehca_mw_hipzout_parms *outparms)
 {
 	u64 ret;
-	u64 dummy;
-	u64 rkey_out;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 
-	ret = ehca_hcall_7arg_7ret(H_ALLOC_RESOURCE,
-				   adapter_handle.handle,      /* r4 */
-				   6,                          /* r5 */
-				   pd.value,                   /* r6 */
-				   0, 0, 0, 0,
-				   &(outparms->handle.handle), /* r4 */
-				   &dummy,                     /* r5 */
-				   &dummy,                     /* r6 */
-				   &rkey_out,                  /* r7 */
-				   &dummy,
-				   &dummy,
-				   &dummy);
-
-	outparms->rkey = (u32)rkey_out;
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,      /* r4 */
+				6,                          /* r5 */
+				pd.value,                   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->rkey = (u32)outs[3];
 
 	return ret;
 }
@@ -1072,21 +842,13 @@ u64 hipz_h_query_mw(const struct ipz_ada
 		    struct ehca_mw_hipzout_parms *outparms)
 {
 	u64 ret;
-	u64 dummy;
-	u64 pd_out, rkey_out;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
 
-	ret = ehca_hcall_7arg_7ret(H_QUERY_MW,
-				   adapter_handle.handle,    /* r4 */
-				   mw->ipz_mw_handle.handle, /* r5 */
-				   0, 0, 0, 0, 0,
-				   &dummy,                   /* r4 */
-				   &dummy,                   /* r5 */
-				   &dummy,                   /* r6 */
-				   &rkey_out,                /* r7 */
-				   &pd_out,                  /* r8 */
-				   &dummy,
-				   &dummy);
-	outparms->rkey = (u32)rkey_out;
+	ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
+				adapter_handle.handle,    /* r4 */
+				mw->ipz_mw_handle.handle, /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->rkey = (u32)outs[3];
 
 	return ret;
 }
@@ -1094,19 +856,10 @@ u64 hipz_h_query_mw(const struct ipz_ada
 u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
 			    const struct ehca_mw *mw)
 {
-	u64 dummy;
-
-	return ehca_hcall_7arg_7ret(H_FREE_RESOURCE,
-				    adapter_handle.handle,    /* r4 */
-				    mw->ipz_mw_handle.handle, /* r5 */
-				    0, 0, 0, 0, 0,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mw->ipz_mw_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
 }
 
 u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
@@ -1114,7 +867,6 @@ u64 hipz_h_error_data(const struct ipz_a
 		      void *rblock,
 		      unsigned long *byte_count)
 {
-	u64 dummy;
 	u64 r_cb = virt_to_abs(rblock);
 
 	if (r_cb & (EHCA_PAGESIZE-1)) {
@@ -1122,16 +874,9 @@ u64 hipz_h_error_data(const struct ipz_a
 		return H_PARAMETER;
 	}
 
-	return ehca_hcall_7arg_7ret(H_ERROR_DATA,
-				    adapter_handle.handle,
-				    ressource_handle,
-				    r_cb,
-				    0, 0, 0, 0,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy,
-				    &dummy);
+	return ehca_plpar_hcall_norets(H_ERROR_DATA,
+				       adapter_handle.handle,
+				       ressource_handle,
+				       r_cb,
+				       0, 0, 0, 0);
 }
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/hcp_if.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/hcp_if.h
@@ -107,7 +107,7 @@ u64 hipz_h_register_rpage_eq(const struc
 			     const u64 logical_address_of_page,
 			     const u64 count);
 
-u32 hipz_h_query_int_state(const struct ipz_adapter_handle
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle
 			   hcp_adapter_handle,
 			   u32 ist);
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/hcp_phyp.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -44,14 +44,13 @@
 
 int hcall_map_page(u64 physaddr, u64 *mapaddr)
 {
-	*mapaddr = (u64)ioremap((physaddr & PAGE_MASK), PAGE_SIZE) +
-		(physaddr & (~PAGE_MASK));
+	*mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
 	return 0;
 }
 
 int hcall_unmap_page(u64 mapaddr)
 {
-	iounmap((void __iomem*)(mapaddr & PAGE_MASK));
+	iounmap((volatile void __iomem*)mapaddr);
 	return 0;
 }
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/hipz_hw.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -45,6 +45,8 @@
 
 #include "ehca_tools.h"
 
+#define EHCA_MAX_MTU 4
+
 /* QP Table Entry Memory Map */
 struct hipz_qptemm {
 	u64 qpx_hcr;
@@ -184,8 +186,6 @@ struct hipz_mrmwmm {
 
 };
 
-#define MRX_HCR_LPARID_VALID EHCA_BMASK_IBM(0,0)
-
 #define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x)
 
 struct hipz_qpedmm {
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -70,6 +70,19 @@ void *ipz_qeit_eq_get_inc(struct ipz_que
 	return ret;
 }
 
+int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
+{
+	int i;
+	for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
+		u64 page = (u64)virt_to_abs(queue->queue_pages[i]);
+		if (addr >= page && addr < page + queue->pagesize) {
+			*q_offset = addr - page + i * queue->pagesize;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
 int ipz_queue_ctor(struct ipz_queue *queue,
 		   const u32 nr_of_pages,
 		   const u32 pagesize, const u32 qe_size, const u32 nr_of_sg)
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -150,6 +150,21 @@ static inline void *ipz_qeit_reset(struc
 	return ipz_qeit_get(queue);
 }
 
+/*
+ * return the q_offset corresponding to an absolute address
+ */
+int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset);
+
+/*
+ * return the next queue offset. don't modify the queue.
+ */
+static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset)
+{
+	offset += queue->qe_size;
+	if (offset >= queue->queue_length) offset = 0;
+	return offset;
+}
+
 /* struct generic page table */
 struct ipz_pt {
 	u64 entries[EHCA_PT_ENTRIES];
@@ -226,10 +241,18 @@ static inline void *ipz_eqit_eq_get_inc_
 {
 	void *ret = ipz_qeit_get(queue);
 	u32 qe = *(u8 *) ret;
-	if ((qe >> 7) == (queue->toggle_state & 1))
-		ipz_qeit_eq_get_inc(queue); /* this is a good one */
-	else
-		ret = NULL;
+	if ((qe >> 7) != (queue->toggle_state & 1))
+		return NULL;
+	ipz_qeit_eq_get_inc(queue); /* this is a good one */
+	return ret;
+}
+
+static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u32 qe = *(u8 *) ret;
+	if ((qe >> 7) != (queue->toggle_state & 1))
+		return NULL;
 	return ret;
 }
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/Kconfig
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/Kconfig
@@ -7,11 +7,3 @@ config INFINIBAND_EHCA
 	To compile the driver as a module, choose M here. The module
 	will be called ib_ehca.
 
-config INFINIBAND_EHCA_SCALING
-	bool "Scaling support (EXPERIMENTAL)"
-	depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
-	default y
-	---help---
-	eHCA scaling support schedules the CQ callbacks to different CPUs.
-
-	To enable this feature choose Y here.
--- linux-2.6.18.noarch/drivers/infiniband/hw/ehca/Makefile
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ehca/Makefile
@@ -10,7 +10,6 @@
 
 obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
 
-
 ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
 		ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
 		ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_common.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -78,6 +78,8 @@
 #define IPATH_IB_LINKINIT		3
 #define IPATH_IB_LINKDOWN_SLEEP		4
 #define IPATH_IB_LINKDOWN_DISABLE	5
+#define IPATH_IB_LINK_LOOPBACK	6 /* enable local loopback */
+#define IPATH_IB_LINK_EXTERNAL	7 /* normal, disable local loopback */
 
 /*
  * stats maintained by the driver.  For now, at least, this is global
@@ -141,8 +143,9 @@ struct infinipath_stats {
 	 * packets if ipath not configured, etc.)
 	 */
 	__u64 sps_krdrops;
+	__u64 sps_txeparity; /* PIO buffer parity error, recovered */
 	/* pad for future growth */
-	__u64 __sps_pad[46];
+	__u64 __sps_pad[45];
 };
 
 /*
@@ -185,6 +188,9 @@ typedef enum _ipath_ureg {
 #define IPATH_RUNTIME_PCIE	0x2
 #define IPATH_RUNTIME_FORCE_WC_ORDER	0x4
 #define IPATH_RUNTIME_RCVHDR_COPY	0x8
+#define IPATH_RUNTIME_MASTER	0x10
+#define IPATH_RUNTIME_PBC_REWRITE 0x20
+#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
 
 /*
  * This structure is returned by ipath_userinit() immediately after
@@ -202,7 +208,8 @@ struct ipath_base_info {
 	/* version of software, for feature checking. */
 	__u32 spi_sw_version;
 	/* InfiniPath port assigned, goes into sent packets */
-	__u32 spi_port;
+	__u16 spi_port;
+	__u16 spi_subport;
 	/*
 	 * IB MTU, packets IB data must be less than this.
 	 * The MTU is in bytes, and will be a multiple of 4 bytes.
@@ -218,7 +225,7 @@ struct ipath_base_info {
 	__u32 spi_tidcnt;
 	/* size of the TID Eager list in infinipath, in entries */
 	__u32 spi_tidegrcnt;
-	/* size of a single receive header queue entry. */
+	/* size of a single receive header queue entry in words. */
 	__u32 spi_rcvhdrent_size;
 	/*
 	 * Count of receive header queue entries allocated.
@@ -310,6 +317,18 @@ struct ipath_base_info {
 	__u32 spi_filler_for_align;
 	/* address of readonly memory copy of the rcvhdrq tail register. */
 	__u64 spi_rcvhdr_tailaddr;
+
+	/* shared memory pages for subports if port is shared */
+	__u64 spi_subport_uregbase;
+	__u64 spi_subport_rcvegrbuf;
+	__u64 spi_subport_rcvhdr_base;
+
+	/* shared memory page for hardware port if it is shared */
+	__u64 spi_port_uregbase;
+	__u64 spi_port_rcvegrbuf;
+	__u64 spi_port_rcvhdr_base;
+	__u64 spi_port_rcvhdr_tailaddr;
+
 } __attribute__ ((aligned(8)));
 
 
@@ -328,12 +347,12 @@ struct ipath_base_info {
 
 /*
  * Minor version differences are always compatible
- * a within a major version, however if if user software is larger
+ * a within a major version, however if user software is larger
  * than driver software, some new features and/or structure fields
  * may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference
+ * cares, or it must abort after initialization reports the difference.
  */
-#define IPATH_USER_SWMINOR 2
+#define IPATH_USER_SWMINOR 4
 
 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
 
@@ -379,7 +398,16 @@ struct ipath_user_info {
 	 */
 	__u32 spu_rcvhdrsize;
 
-	__u64 spu_unused; /* kept for compatible layout */
+	/*
+	 * If two or more processes wish to share a port, each process
+	 * must set the spu_subport_cnt and spu_subport_id to the same
+	 * values.  The only restriction on the spu_subport_id is that
+	 * it be unique for a given node.
+	 */
+	__u16 spu_subport_cnt;
+	__u16 spu_subport_id;
+
+	__u32 spu_unused; /* kept for compatible layout */
 
 	/*
 	 * address of struct base_info to write to
@@ -392,19 +420,25 @@ struct ipath_user_info {
 
 #define IPATH_CMD_MIN		16
 
-#define IPATH_CMD_USER_INIT	16	/* set up userspace */
+#define __IPATH_CMD_USER_INIT	16	/* old set up userspace (for old user code) */
 #define IPATH_CMD_PORT_INFO	17	/* find out what resources we got */
 #define IPATH_CMD_RECV_CTRL	18	/* control receipt of packets */
 #define IPATH_CMD_TID_UPDATE	19	/* update expected TID entries */
 #define IPATH_CMD_TID_FREE	20	/* free expected TID entries */
 #define IPATH_CMD_SET_PART_KEY	21	/* add partition key */
+#define __IPATH_CMD_SLAVE_INFO	22	/* return info on slave processes (for old user code) */
+#define IPATH_CMD_ASSIGN_PORT	23	/* allocate HCA and port */
+#define IPATH_CMD_USER_INIT 	24	/* set up userspace */
 
-#define IPATH_CMD_MAX		21
+#define IPATH_CMD_MAX		24
 
 struct ipath_port_info {
 	__u32 num_active;	/* number of active units */
 	__u32 unit;		/* unit (chip) assigned to caller */
-	__u32 port;		/* port on unit assigned to caller */
+	__u16 port;		/* port on unit assigned to caller */
+	__u16 subport;		/* subport on unit assigned to caller */
+	__u16 num_ports;	/* number of ports available on unit */
+	__u16 num_subports;	/* number of subports opened on port */
 };
 
 struct ipath_tid_info {
@@ -435,6 +469,8 @@ struct ipath_cmd {
 		__u32 recv_ctrl;
 		/* partition key to set */
 		__u16 part_key;
+		/* user address of __u32 bitmask of active slaves */
+		__u64 slave_mask_addr;
 	} cmd;
 };
 
@@ -596,6 +632,10 @@ struct infinipath_counters {
 
 /* K_PktFlags bits */
 #define INFINIPATH_KPF_INTR 0x1
+#define INFINIPATH_KPF_SUBPORT_MASK 0x3
+#define INFINIPATH_KPF_SUBPORT_SHIFT 1
+
+#define INFINIPATH_MAX_SUBPORT	4
 
 /* SendPIO per-buffer control */
 #define INFINIPATH_SP_TEST    0x40
@@ -610,7 +650,7 @@ struct ipath_header {
 	/*
 	 * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
 	 * 14 bits before ECO change ~28 Dec 03.  After that, Vers 4,
-	 * Port 3, TID 11, offset 14.
+	 * Port 4, TID 11, offset 13.
 	 */
 	__le32 ver_port_tid_offset;
 	__le16 chksum;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_cq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -42,20 +42,29 @@
  * @entry: work completion entry to add
  * @sig: true if @entry is a solicitated entry
  *
- * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
+ * This may be called with qp->s_lock held.
  */
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 {
+	struct ipath_cq_wc *wc;
 	unsigned long flags;
+	u32 head;
 	u32 next;
 
 	spin_lock_irqsave(&cq->lock, flags);
 
-	if (cq->head == cq->ibcq.cqe)
+	/*
+	 * Note that the head pointer might be writable by user processes.
+	 * Take care to verify it is a sane value.
+	 */
+	wc = cq->queue;
+	head = wc->head;
+	if (head >= (unsigned) cq->ibcq.cqe) {
+		head = cq->ibcq.cqe;
 		next = 0;
-	else
-		next = cq->head + 1;
-	if (unlikely(next == cq->tail)) {
+	} else
+		next = head + 1;
+	if (unlikely(next == wc->tail)) {
 		spin_unlock_irqrestore(&cq->lock, flags);
 		if (cq->ibcq.event_handler) {
 			struct ib_event ev;
@@ -67,8 +76,21 @@ void ipath_cq_enter(struct ipath_cq *cq,
 		}
 		return;
 	}
-	cq->queue[cq->head] = *entry;
-	cq->head = next;
+	wc->queue[head].wr_id = entry->wr_id;
+	wc->queue[head].status = entry->status;
+	wc->queue[head].opcode = entry->opcode;
+	wc->queue[head].vendor_err = entry->vendor_err;
+	wc->queue[head].byte_len = entry->byte_len;
+	wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
+	wc->queue[head].qp_num = entry->qp->qp_num;
+	wc->queue[head].src_qp = entry->src_qp;
+	wc->queue[head].wc_flags = entry->wc_flags;
+	wc->queue[head].pkey_index = entry->pkey_index;
+	wc->queue[head].slid = entry->slid;
+	wc->queue[head].sl = entry->sl;
+	wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
+	wc->queue[head].port_num = entry->port_num;
+	wc->head = next;
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
 	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -101,20 +123,48 @@ void ipath_cq_enter(struct ipath_cq *cq,
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
+	struct ipath_cq_wc *wc;
 	unsigned long flags;
 	int npolled;
+	u32 tail;
 
 	spin_lock_irqsave(&cq->lock, flags);
 
+	wc = cq->queue;
+	tail = wc->tail;
+	if (tail > (u32) cq->ibcq.cqe)
+		tail = (u32) cq->ibcq.cqe;
 	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (cq->tail == cq->head)
+		struct ipath_qp *qp;
+
+		if (tail == wc->head)
 			break;
-		*entry = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+
+		qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
+				      wc->queue[tail].qp_num);
+		entry->qp = &qp->ibqp;
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+
+		entry->wr_id = wc->queue[tail].wr_id;
+		entry->status = wc->queue[tail].status;
+		entry->opcode = wc->queue[tail].opcode;
+		entry->vendor_err = wc->queue[tail].vendor_err;
+		entry->byte_len = wc->queue[tail].byte_len;
+		entry->imm_data = wc->queue[tail].imm_data;
+		entry->src_qp = wc->queue[tail].src_qp;
+		entry->wc_flags = wc->queue[tail].wc_flags;
+		entry->pkey_index = wc->queue[tail].pkey_index;
+		entry->slid = wc->queue[tail].slid;
+		entry->sl = wc->queue[tail].sl;
+		entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
+		entry->port_num = wc->queue[tail].port_num;
+		if (tail >= cq->ibcq.cqe)
+			tail = 0;
 		else
-			cq->tail++;
+			tail++;
 	}
+	wc->tail = tail;
 
 	spin_unlock_irqrestore(&cq->lock, flags);
 
@@ -160,7 +210,7 @@ struct ib_cq *ipath_create_cq(struct ib_
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	struct ipath_cq *cq;
-	struct ib_wc *wc;
+	struct ipath_cq_wc *wc;
 	struct ib_cq *ret;
 
 	if (entries < 1 || entries > ib_ipath_max_cqes) {
@@ -168,10 +218,7 @@ struct ib_cq *ipath_create_cq(struct ib_
 		goto done;
 	}
 
-	/*
-	 * Need to use vmalloc() if we want to support large #s of
-	 * entries.
-	 */
+	/* Allocate the completion queue structure. */
 	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq) {
 		ret = ERR_PTR(-ENOMEM);
@@ -179,14 +226,53 @@ struct ib_cq *ipath_create_cq(struct ib_
 	}
 
 	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
+	 * Allocate the completion queue entries and head/tail pointers.
+	 * This is allocated separately so that it can be resized and
+	 * also mapped into user space.
+	 * We need to use vmalloc() in order to support mmap and large
+	 * numbers of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (entries + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
 	if (!wc) {
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_cq;
 	}
 
+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) wc;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wc;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wc;
+		}
+		cq->ip = ip;
+		ip->context = context;
+		ip->obj = wc;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * entries);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		cq->ip = NULL;
+
 	spin_lock(&dev->n_cqs_lock);
 	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
 		spin_unlock(&dev->n_cqs_lock);
@@ -207,8 +293,8 @@ struct ib_cq *ipath_create_cq(struct ib_
 	cq->triggered = 0;
 	spin_lock_init(&cq->lock);
 	tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-	cq->head = 0;
-	cq->tail = 0;
+	wc->head = 0;
+	wc->tail = 0;
 	cq->queue = wc;
 
 	ret = &cq->ibcq;
@@ -242,7 +328,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
 	spin_lock(&dev->n_cqs_lock);
 	dev->n_cqs_allocated--;
 	spin_unlock(&dev->n_cqs_lock);
-	vfree(cq->queue);
+	if (cq->ip)
+		kref_put(&cq->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(cq->queue);
 	kfree(cq);
 
 	return 0;
@@ -266,7 +355,7 @@ int ipath_req_notify_cq(struct ib_cq *ib
 	spin_lock_irqsave(&cq->lock, flags);
 	/*
 	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions.
+	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
 	 */
 	if (cq->notify != IB_CQ_NEXT_COMP)
 		cq->notify = notify;
@@ -274,11 +363,18 @@ int ipath_req_notify_cq(struct ib_cq *ib
 	return 0;
 }
 
+/**
+ * ipath_resize_cq - change the size of the CQ
+ * @ibcq: the completion queue
+ *
+ * Returns 0 for success.
+ */
 int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
-	struct ib_wc *wc, *old_wc;
-	u32 n;
+	struct ipath_cq_wc *old_wc;
+	struct ipath_cq_wc *wc;
+	u32 head, tail, n;
 	int ret;
 
 	if (cqe < 1 || cqe > ib_ipath_max_cqes) {
@@ -289,39 +385,74 @@ int ipath_resize_cq(struct ib_cq *ibcq, 
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (cqe + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
 	if (!wc) {
 		ret = -ENOMEM;
 		goto bail;
 	}
 
+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		__u64 offset = (__u64) wc;
+
+		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (ret)
+			goto bail;
+	}
+
 	spin_lock_irq(&cq->lock);
-	if (cq->head < cq->tail)
-		n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
+	/*
+	 * Make sure head and tail are sane since they
+	 * might be user writable.
+	 */
+	old_wc = cq->queue;
+	head = old_wc->head;
+	if (head > (u32) cq->ibcq.cqe)
+		head = (u32) cq->ibcq.cqe;
+	tail = old_wc->tail;
+	if (tail > (u32) cq->ibcq.cqe)
+		tail = (u32) cq->ibcq.cqe;
+	if (head < tail)
+		n = cq->ibcq.cqe + 1 + head - tail;
 	else
-		n = cq->head - cq->tail;
+		n = head - tail;
 	if (unlikely((u32)cqe < n)) {
 		spin_unlock_irq(&cq->lock);
 		vfree(wc);
 		ret = -EOVERFLOW;
 		goto bail;
 	}
-	for (n = 0; cq->tail != cq->head; n++) {
-		wc[n] = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+	for (n = 0; tail != head; n++) {
+		wc->queue[n] = old_wc->queue[tail];
+		if (tail == (u32) cq->ibcq.cqe)
+			tail = 0;
 		else
-			cq->tail++;
+			tail++;
 	}
 	cq->ibcq.cqe = cqe;
-	cq->head = n;
-	cq->tail = 0;
-	old_wc = cq->queue;
+	wc->head = n;
+	wc->tail = 0;
 	cq->queue = wc;
 	spin_unlock_irq(&cq->lock);
 
 	vfree(old_wc);
 
+	if (cq->ip) {
+		struct ipath_ibdev *dev = to_idev(ibcq->device);
+		struct ipath_mmap_info *ip = cq->ip;
+
+		ip->obj = wc;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * cqe);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	}
+
 	ret = 0;
 
 bail:
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_debug.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -57,6 +57,7 @@
 #define __IPATH_PROCDBG     0x100
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG       0x200
+#define __IPATH_ERRPKTDBG   0x400
 #define __IPATH_USER_SEND   0x1000	/* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_diag.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -43,6 +43,7 @@
 
 #include <linux/io.h>
 #include <linux/pci.h>
+#include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 
 #include "ipath_kernel.h"
@@ -66,19 +67,54 @@ static struct file_operations diag_file_
 	.release = ipath_diag_release
 };
 
+static ssize_t ipath_diagpkt_write(struct file *fp,
+				   const char __user *data,
+				   size_t count, loff_t *off);
+
+static struct file_operations diagpkt_file_ops = {
+	.owner = THIS_MODULE,
+	.write = ipath_diagpkt_write,
+};
+
+static atomic_t diagpkt_count = ATOMIC_INIT(0);
+static struct cdev *diagpkt_cdev;
+static struct class_device *diagpkt_class_dev;
+
 int ipath_diag_add(struct ipath_devdata *dd)
 {
 	char name[16];
+	int ret = 0;
+
+	if (atomic_inc_return(&diagpkt_count) == 1) {
+		ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+				      "ipath_diagpkt", &diagpkt_file_ops,
+				      &diagpkt_cdev, &diagpkt_class_dev);
+
+		if (ret) {
+			ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
+				      "device: %d", ret);
+			goto done;
+		}
+	}
 
 	snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
 
-	return ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
-			       &diag_file_ops, &dd->diag_cdev,
-			       &dd->diag_class_dev);
+	ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
+			      &diag_file_ops, &dd->diag_cdev,
+			      &dd->diag_class_dev);
+	if (ret)
+		ipath_dev_err(dd, "Couldn't create %s device: %d",
+			      name, ret);
+
+done:
+	return ret;
 }
 
 void ipath_diag_remove(struct ipath_devdata *dd)
 {
+	if (atomic_dec_and_test(&diagpkt_count))
+		ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
+
 	ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_class_dev);
 }
 
@@ -274,33 +310,6 @@ bail:
 	return ret;
 }
 
-static ssize_t ipath_diagpkt_write(struct file *fp,
-				   const char __user *data,
-				   size_t count, loff_t *off);
-
-static struct file_operations diagpkt_file_ops = {
-	.owner = THIS_MODULE,
-	.write = ipath_diagpkt_write,
-};
-
-static struct cdev *diagpkt_cdev;
-static struct class_device *diagpkt_class_dev;
-static atomic_t diagpkt_count = ATOMIC_INIT(0);
-
-void ipath_diagpkt_add(void)
-{
-	if (atomic_inc_return(&diagpkt_count) == 1)
-		ipath_cdev_init(IPATH_DIAGPKT_MINOR,
-				"ipath_diagpkt", &diagpkt_file_ops,
-				&diagpkt_cdev, &diagpkt_class_dev);
-}
-
-void ipath_diagpkt_remove(void)
-{
-	if (atomic_dec_and_test(&diagpkt_count))
-		ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
-}
-
 /**
  * ipath_diagpkt_write - write an IB packet
  * @fp: the diag data device file pointer
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_dma.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2006 QLogic, Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_verbs.h>
+
+#include "ipath_verbs.h"
+
+#define BAD_DMA_ADDRESS ((u64) 0)
+
+/*
+ * The following functions implement driver specific replacements
+ * for the ib_dma_*() functions.
+ *
+ * These functions return kernel virtual addresses instead of
+ * device bus addresses since the driver uses the CPU to copy
+ * data instead of using hardware DMA.
+ */
+
+static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+	return dma_addr == BAD_DMA_ADDRESS;
+}
+
+static u64 ipath_dma_map_single(struct ib_device *dev,
+			        void *cpu_addr, size_t size,
+			        enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+	return (u64) cpu_addr;
+}
+
+static void ipath_dma_unmap_single(struct ib_device *dev,
+				   u64 addr, size_t size,
+				   enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+}
+
+static u64 ipath_dma_map_page(struct ib_device *dev,
+			      struct page *page,
+			      unsigned long offset,
+			      size_t size,
+			      enum dma_data_direction direction)
+{
+	u64 addr;
+
+	BUG_ON(!valid_dma_direction(direction));
+
+	if (offset + size > PAGE_SIZE) {
+		addr = BAD_DMA_ADDRESS;
+		goto done;
+	}
+
+	addr = (u64) page_address(page);
+	if (addr)
+		addr += offset;
+	/* TODO: handle highmem pages */
+
+done:
+	return addr;
+}
+
+static void ipath_dma_unmap_page(struct ib_device *dev,
+				 u64 addr, size_t size,
+				 enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+}
+
+int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
+		 enum dma_data_direction direction)
+{
+	u64 addr;
+	int i;
+	int ret = nents;
+
+	BUG_ON(!valid_dma_direction(direction));
+
+	for (i = 0; i < nents; i++) {
+		addr = (u64) page_address(sg[i].page);
+		/* TODO: handle highmem pages */
+		if (!addr) {
+			ret = 0;
+			break;
+		}
+	}
+	return ret;
+}
+
+static void ipath_unmap_sg(struct ib_device *dev,
+			   struct scatterlist *sg, int nents,
+			   enum dma_data_direction direction)
+{
+	BUG_ON(!valid_dma_direction(direction));
+}
+
+static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
+{
+	u64 addr = (u64) page_address(sg->page);
+
+	if (addr)
+		addr += sg->offset;
+	return addr;
+}
+
+static unsigned int ipath_sg_dma_len(struct ib_device *dev,
+				     struct scatterlist *sg)
+{
+	return sg->length;
+}
+
+static void ipath_sync_single_for_cpu(struct ib_device *dev,
+				      u64 addr,
+				      size_t size,
+				      enum dma_data_direction dir)
+{
+}
+
+static void ipath_sync_single_for_device(struct ib_device *dev,
+					 u64 addr,
+					 size_t size,
+					 enum dma_data_direction dir)
+{
+}
+
+static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
+				      u64 *dma_handle, gfp_t flag)
+{
+	struct page *p;
+	void *addr = NULL;
+
+	p = alloc_pages(flag, get_order(size));
+	if (p)
+		addr = page_address(p);
+	if (dma_handle)
+		*dma_handle = (u64) addr;
+	return addr;
+}
+
+static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
+				    void *cpu_addr, dma_addr_t dma_handle)
+{
+	free_pages((unsigned long) cpu_addr, get_order(size));
+}
+
+struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
+	ipath_mapping_error,
+	ipath_dma_map_single,
+	ipath_dma_unmap_single,
+	ipath_dma_map_page,
+	ipath_dma_unmap_page,
+	ipath_map_sg,
+	ipath_unmap_sg,
+	ipath_sg_dma_address,
+	ipath_sg_dma_len,
+	ipath_sync_single_for_cpu,
+	ipath_sync_single_for_device,
+	ipath_dma_alloc_coherent,
+	ipath_dma_free_coherent
+};
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_driver.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -42,6 +42,8 @@
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
+#define CONFIG_HT_IRQ
+
 static void ipath_update_pio_bufs(struct ipath_devdata *);
 
 const char *ipath_get_unit_name(int unit)
@@ -95,16 +97,6 @@ const char *ipath_ibcstatus_str[] = {
 	"RecovIdle",
 };
 
-/*
- * These variables are initialized in the chip-specific files
- * but are defined here.
- */
-u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-u64 ipath_gpio_sda, ipath_gpio_scl;
-u64 infinipath_i_bitsextant;
-ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
 static void __devexit ipath_remove_one(struct pci_dev *);
 static int __devinit ipath_init_one(struct pci_dev *,
 				    const struct pci_device_id *);
@@ -137,27 +129,33 @@ static struct pci_driver ipath_driver = 
 };
 
 
-static void check_link_status(void *data)
+static void check_link_status(void *work)
 {
-	struct ipath_devdata *dd = data;
+	struct ipath_devdata *dd = container_of(work, struct ipath_devdata,
+						link_work);
 
 	/*
 	 * If we're in the NOCABLE state, try again in another minute.
 	 */
 
-	if (dd->ipath_flags & IPATH_STATUS_IB_NOCABLE) {
-		schedule_delayed_work(&dd->link_task, HZ * LID_TIMEOUT);
+	if (*dd->ipath_statusp & IPATH_STATUS_IB_NOCABLE) {
+		schedule_delayed_work(&dd->link_work, HZ * LID_TIMEOUT);
 		return;
 	}
 
 	/*
-	 * If we don't have a LID, let the user know and don't bother
-	 * checking again.
+	 * If we don't have a LID or interrupts, let the user know and
+	 * don't bother checking again.
 	 */
 
-	if (dd->ipath_lid == 0)
+	if (dd->ipath_int_counter == 0)
+		dev_err(&dd->pcidev->dev, "No interrupts detected.\n");
+	else if (dd->ipath_lid == 0)
+		dev_info(&dd->pcidev->dev,
+			 "We don't have a LID yet (no subnet manager?)\n");
+	else if (!(*dd->ipath_statusp & IPATH_STATUS_IB_READY))
 		dev_info(&dd->pcidev->dev,
-			 "We don't have a LID yet (no subnet manager?)");
+			 "LID assigned, but IB link is not ACTIVE\n");
 }
 
 static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
@@ -227,7 +225,7 @@ static struct ipath_devdata *ipath_alloc
 	dd->pcidev = pdev;
 	pci_set_drvdata(pdev, dd);
 
-	INIT_WORK(&dd->link_task, check_link_status, dd);
+	INIT_WORK(&dd->link_work, check_link_status, &dd->link_work);
 
 	list_add(&dd->ipath_list, &ipath_dev_list);
 
@@ -433,11 +431,23 @@ static int __devinit ipath_init_one(stru
 	/* setup the chip-specific functions, as early as possible. */
 	switch (ent->device) {
 	case PCI_DEVICE_ID_INFINIPATH_HT:
+#ifdef CONFIG_HT_IRQ
 		ipath_init_iba6110_funcs(dd);
 		break;
+#else
+		ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
+			      "CONFIG_HT_IRQ is not enabled\n", ent->device);
+		return -ENODEV;
+#endif
 	case PCI_DEVICE_ID_INFINIPATH_PE800:
+#ifdef CONFIG_PCI_MSI
 		ipath_init_iba6120_funcs(dd);
 		break;
+#else
+		ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
+			      "CONFIG_PCI_MSI is not enabled\n", ent->device);
+		return -ENODEV;
+#endif
 	default:
 		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
 			      "failing\n", ent->device);
@@ -517,14 +527,14 @@ static int __devinit ipath_init_one(stru
 				  IPATH_DRV_NAME, dd);
 		if (ret) {
 			ipath_dev_err(dd, "Couldn't setup irq handler, "
-				      "irq=%u: %d\n", pdev->irq, ret);
+				      "irq=%d: %d\n", pdev->irq, ret);
 			goto bail_iounmap;
 		}
 	}
 
 	ret = ipath_init_chip(dd, 0);	/* do the chip-specific init */
 	if (ret)
-		goto bail_iounmap;
+		goto bail_irqsetup;
 
 	ret = ipath_enable_wc(dd);
 
@@ -539,14 +549,16 @@ static int __devinit ipath_init_one(stru
 	ipathfs_add_device(dd);
 	ipath_user_add(dd);
 	ipath_diag_add(dd);
-	ipath_diagpkt_add();
 	ipath_register_ib_device(dd);
 
 	/* Check that we have a LID in LID_TIMEOUT seconds. */
-	schedule_delayed_work(&dd->link_task, HZ * LID_TIMEOUT);
+	schedule_delayed_work(&dd->link_work, HZ * LID_TIMEOUT);
 
 	goto bail;
 
+bail_irqsetup:
+	if (pdev->irq) free_irq(pdev->irq, dd);
+
 bail_iounmap:
 	iounmap((volatile void __iomem *) dd->ipath_kregbase);
 
@@ -563,32 +575,149 @@ bail:
 	return ret;
 }
 
-static void __devexit ipath_remove_one(struct pci_dev *pdev)
+static void __devexit cleanup_device(struct ipath_devdata *dd)
 {
-	struct ipath_devdata *dd;
+	int port;
 
-	ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev);
-	if (!pdev)
-		return;
+	ipath_shutdown_device(dd);
 
-	dd = pci_get_drvdata(pdev);
+	if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
+		/* can't do anything more with chip; needs re-init */
+		*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
+		if (dd->ipath_kregbase) {
+			/*
+			 * if we haven't already cleaned up before these are
+			 * to ensure any register reads/writes "fail" until
+			 * re-init
+			 */
+			dd->ipath_kregbase = NULL;
+			dd->ipath_uregbase = 0;
+			dd->ipath_sregbase = 0;
+			dd->ipath_cregbase = 0;
+			dd->ipath_kregsize = 0;
+		}
+		ipath_disable_wc(dd);
+	}
 
-	cancel_delayed_work(&dd->link_task);
+	if (dd->ipath_pioavailregs_dma) {
+		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+				  (void *) dd->ipath_pioavailregs_dma,
+				  dd->ipath_pioavailregs_phys);
+		dd->ipath_pioavailregs_dma = NULL;
+	}
+	if (dd->ipath_dummy_hdrq) {
+		dma_free_coherent(&dd->pcidev->dev,
+			dd->ipath_pd[0]->port_rcvhdrq_size,
+			dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
+		dd->ipath_dummy_hdrq = NULL;
+	}
+
+	if (dd->ipath_pageshadow) {
+		struct page **tmpp = dd->ipath_pageshadow;
+		dma_addr_t *tmpd = dd->ipath_physshadow;
+		int i, cnt = 0;
+
+		ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
+			   "locked\n");
+		for (port = 0; port < dd->ipath_cfgports; port++) {
+			int port_tidbase = port * dd->ipath_rcvtidcnt;
+			int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
+			for (i = port_tidbase; i < maxtid; i++) {
+				if (!tmpp[i])
+					continue;
+				pci_unmap_page(dd->pcidev, tmpd[i],
+					PAGE_SIZE, PCI_DMA_FROMDEVICE);
+				ipath_release_user_pages(&tmpp[i], 1);
+				tmpp[i] = NULL;
+				cnt++;
+			}
+		}
+		if (cnt) {
+			ipath_stats.sps_pageunlocks += cnt;
+			ipath_cdbg(VERBOSE, "There were still %u expTID "
+				   "entries locked\n", cnt);
+		}
+		if (ipath_stats.sps_pagelocks ||
+		    ipath_stats.sps_pageunlocks)
+			ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
+				   "unlocked via ipath_m{un}lock\n",
+				   (unsigned long long)
+				   ipath_stats.sps_pagelocks,
+				   (unsigned long long)
+				   ipath_stats.sps_pageunlocks);
+
+		ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
+			   dd->ipath_pageshadow);
+		vfree(dd->ipath_pageshadow);
+		dd->ipath_pageshadow = NULL;
+	}
+
+	/*
+	 * free any resources still in use (usually just kernel ports)
+	 * at unload; we do for portcnt, not cfgports, because cfgports
+	 * could have changed while we were loaded.
+	 */
+	for (port = 0; port < dd->ipath_portcnt; port++) {
+		struct ipath_portdata *pd = dd->ipath_pd[port];
+		dd->ipath_pd[port] = NULL;
+		ipath_free_pddata(dd, pd);
+	}
+	kfree(dd->ipath_pd);
+	/*
+	 * debuggability, in case some cleanup path tries to use it
+	 * after this
+	 */
+	dd->ipath_pd = NULL;
+}
+
+static void __devexit ipath_remove_one(struct pci_dev *pdev)
+{
+	struct ipath_devdata *dd = pci_get_drvdata(pdev);
+
+	ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
+
+ 	cancel_delayed_work(&dd->link_work);
+	flush_scheduled_work();
+ 
+	if (dd->verbs_dev)
+		ipath_unregister_ib_device(dd->verbs_dev);
 
-	ipath_unregister_ib_device(dd->verbs_dev);
-	ipath_diagpkt_remove();
 	ipath_diag_remove(dd);
 	ipath_user_remove(dd);
 	ipathfs_remove_device(dd);
 	ipath_device_remove_group(&pdev->dev, dd);
+
 	ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
 		   "unit %u\n", dd, (u32) dd->ipath_unit);
-	if (dd->ipath_kregbase) {
-		ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n",
-			   dd->ipath_kregbase);
-		iounmap((volatile void __iomem *) dd->ipath_kregbase);
-		dd->ipath_kregbase = NULL;
-	}
+
+	cleanup_device(dd);
+
+	/*
+	 * turn off rcv, send, and interrupts for all ports, all drivers
+	 * should also hard reset the chip here?
+	 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
+	 * for all versions of the driver, if they were allocated
+	 */
+	if (pdev->irq) {
+		ipath_cdbg(VERBOSE,
+			   "unit %u free_irq of irq %x\n",
+			   dd->ipath_unit, pdev->irq);
+		free_irq(pdev->irq, dd);
+	} else
+		ipath_dbg("irq is 0, not doing free_irq "
+			  "for unit %u\n", dd->ipath_unit);
+	/*
+	 * we check for NULL here, because it's outside
+	 * the kregbase check, and we need to call it
+	 * after the free_irq.	Thus it's possible that
+	 * the function pointers were never initialized.
+	 */
+	if (dd->ipath_f_cleanup)
+		/* clean up chip-specific stuff */
+		dd->ipath_f_cleanup(dd);
+
+	ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
+	iounmap((volatile void __iomem *) dd->ipath_kregbase);
 	pci_release_regions(pdev);
 	ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
 	pci_disable_device(pdev);
@@ -683,9 +812,42 @@ static int ipath_wait_linkstate(struct i
 	return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
 }
 
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else.   Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 {
+	int iserr = 1;
 	*buf = '\0';
+	if(err & INFINIPATH_E_PKTERRS) {
+		if(!(err & ~INFINIPATH_E_PKTERRS))
+			iserr = 0; // if only packet errors.
+		if(ipath_debug & __IPATH_ERRPKTDBG) {
+			if (err & INFINIPATH_E_REBP)
+				strlcat(buf, "EBP ", blen);
+			if (err & INFINIPATH_E_RVCRC)
+				strlcat(buf, "VCRC ", blen);
+			if (err & INFINIPATH_E_RICRC) {
+				strlcat(buf, "CRC ", blen);
+				// clear for check below, so only once
+				err &= INFINIPATH_E_RICRC; 
+			}
+			if (err & INFINIPATH_E_RSHORTPKTLEN)
+				strlcat(buf, "rshortpktlen ", blen);
+			if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+				strlcat(buf, "sdroppeddatapkt ", blen);
+			if (err & INFINIPATH_E_SPKTLEN)
+				strlcat(buf, "spktlen ", blen);
+		}
+		if ((err & INFINIPATH_E_RICRC) &&
+			!(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
+			strlcat(buf, "CRC ", blen);
+		if(!iserr)
+			goto done;
+	}
 	if (err & INFINIPATH_E_RHDRLEN)
 		strlcat(buf, "rhdrlen ", blen);
 	if (err & INFINIPATH_E_RBADTID)
@@ -696,12 +858,12 @@ void ipath_decode_err(char *buf, size_t 
 		strlcat(buf, "rhdr ", blen);
 	if (err & INFINIPATH_E_RLONGPKTLEN)
 		strlcat(buf, "rlongpktlen ", blen);
-	if (err & INFINIPATH_E_RSHORTPKTLEN)
-		strlcat(buf, "rshortpktlen ", blen);
 	if (err & INFINIPATH_E_RMAXPKTLEN)
 		strlcat(buf, "rmaxpktlen ", blen);
 	if (err & INFINIPATH_E_RMINPKTLEN)
 		strlcat(buf, "rminpktlen ", blen);
+	if (err & INFINIPATH_E_SMINPKTLEN)
+		strlcat(buf, "sminpktlen ", blen);
 	if (err & INFINIPATH_E_RFORMATERR)
 		strlcat(buf, "rformaterr ", blen);
 	if (err & INFINIPATH_E_RUNSUPVL)
@@ -710,32 +872,20 @@ void ipath_decode_err(char *buf, size_t 
 		strlcat(buf, "runexpchar ", blen);
 	if (err & INFINIPATH_E_RIBFLOW)
 		strlcat(buf, "ribflow ", blen);
-	if (err & INFINIPATH_E_REBP)
-		strlcat(buf, "EBP ", blen);
 	if (err & INFINIPATH_E_SUNDERRUN)
 		strlcat(buf, "sunderrun ", blen);
 	if (err & INFINIPATH_E_SPIOARMLAUNCH)
 		strlcat(buf, "spioarmlaunch ", blen);
 	if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
 		strlcat(buf, "sunexperrpktnum ", blen);
-	if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-		strlcat(buf, "sdroppeddatapkt ", blen);
 	if (err & INFINIPATH_E_SDROPPEDSMPPKT)
 		strlcat(buf, "sdroppedsmppkt ", blen);
 	if (err & INFINIPATH_E_SMAXPKTLEN)
 		strlcat(buf, "smaxpktlen ", blen);
-	if (err & INFINIPATH_E_SMINPKTLEN)
-		strlcat(buf, "sminpktlen ", blen);
 	if (err & INFINIPATH_E_SUNSUPVL)
 		strlcat(buf, "sunsupVL ", blen);
-	if (err & INFINIPATH_E_SPKTLEN)
-		strlcat(buf, "spktlen ", blen);
 	if (err & INFINIPATH_E_INVALIDADDR)
 		strlcat(buf, "invalidaddr ", blen);
-	if (err & INFINIPATH_E_RICRC)
-		strlcat(buf, "CRC ", blen);
-	if (err & INFINIPATH_E_RVCRC)
-		strlcat(buf, "VCRC ", blen);
 	if (err & INFINIPATH_E_RRCVEGRFULL)
 		strlcat(buf, "rcvegrfull ", blen);
 	if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -748,6 +898,8 @@ void ipath_decode_err(char *buf, size_t 
 		strlcat(buf, "hardware ", blen);
 	if (err & INFINIPATH_E_RESET)
 		strlcat(buf, "reset ", blen);
+done:
+	return iserr;
 }
 
 /**
@@ -800,8 +952,8 @@ static void get_rhf_errstring(u32 err, c
 static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
 				     int err)
 {
-	return dd->ipath_port0_skbs ?
-		(void *)dd->ipath_port0_skbs[bufnum]->data : NULL;
+	return dd->ipath_port0_skbinfo ?
+		(void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
 }
 
 /**
@@ -823,31 +975,34 @@ struct sk_buff *ipath_alloc_skb(struct i
 	 */
 
 	/*
-	 * We need 4 extra bytes for unaligned transfer copying
+	 * We need 2 extra bytes for ipath_ether data sent in the
+	 * key header.  In order to keep everything dword aligned,
+	 * we'll reserve 4 bytes.
 	 */
+	len = dd->ipath_ibmaxlen + 4;
+
 	if (dd->ipath_flags & IPATH_4BYTE_TID) {
-		/* we need a 4KB multiple alignment, and there is no way
+		/* We need a 2KB multiple alignment, and there is no way
 		 * to do it except to allocate extra and then skb_reserve
 		 * enough to bring it up to the right alignment.
 		 */
-		len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1;
+		len += 2047;
 	}
-	else
-		len = dd->ipath_ibmaxlen + 4;
+
 	skb = __dev_alloc_skb(len, gfp_mask);
 	if (!skb) {
 		ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
 			      len);
 		goto bail;
 	}
+
+	skb_reserve(skb, 4);
+
 	if (dd->ipath_flags & IPATH_4BYTE_TID) {
-		u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4);
+		u32 una = (unsigned long)skb->data & 2047;
 		if (una)
-			skb_reserve(skb, 4 + (1 << 11) - una);
-		else
-			skb_reserve(skb, 4);
-	} else
-		skb_reserve(skb, 4);
+			skb_reserve(skb, 2048 - una);
+	}
 
 bail:
 	return skb;
@@ -1366,6 +1521,9 @@ int ipath_create_rcvhdrq(struct ipath_de
 				      "for port %u rcvhdrqtailaddr failed\n",
 				      pd->port_port);
 			ret = -ENOMEM;
+			dma_free_coherent(&dd->pcidev->dev, amt,
+					  pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
+			pd->port_rcvhdrq = NULL;
 			goto bail;
 		}
 		pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1387,12 +1545,13 @@ int ipath_create_rcvhdrq(struct ipath_de
 		ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
 			   "hdrtailaddr@%p %llx physical\n",
 			   pd->port_port, pd->port_rcvhdrq,
-			   pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr,
-			   (unsigned long long)pd->port_rcvhdrqtailaddr_phys);
+			   (unsigned long long) pd->port_rcvhdrq_phys,
+			   pd->port_rcvhdrtail_kvaddr, (unsigned long long)
+			   pd->port_rcvhdrqtailaddr_phys);
 
 	/* clear for security and sanity on each use */
 	memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
-	memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
+	memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
 
 	/*
 	 * tell chip each time we init it, even if we are re-using previous
@@ -1584,6 +1743,22 @@ int ipath_set_linkstate(struct ipath_dev
 		lstate = IPATH_LINKACTIVE;
 		break;
 
+	case IPATH_IB_LINK_LOOPBACK:
+		dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
+		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		ret = 0;
+		goto bail; // no state change to wait for
+
+	case IPATH_IB_LINK_EXTERNAL:
+		dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
+		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		ret = 0;
+		goto bail; // no state change to wait for
+
 	default:
 		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
 		ret = -EINVAL;
@@ -1684,34 +1859,11 @@ int ipath_set_lid(struct ipath_devdata *
 	dd->ipath_lid = arg;
 	dd->ipath_lmc = lmc;
 
-	dev_info(&dd->pcidev->dev, "We got a lid: %u\n", arg);
+	dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", arg);
 
 	return 0;
 }
 
-/**
- * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to read
- * @port: the port containing the register
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
-			   unsigned port)
-{
-	u16 where;
-
-	if (port < dd->ipath_portcnt &&
-	    (regno == dd->ipath_kregs->kr_rcvhdraddr ||
-	     regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
-		where = regno + port;
-	else
-		where = -1;
-
-	return ipath_read_kreg64(dd, where);
-}
 
 /**
  * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
@@ -1749,8 +1901,6 @@ void ipath_write_kreg_port(const struct 
  */
 void ipath_shutdown_device(struct ipath_devdata *dd)
 {
-	u64 val;
-
 	ipath_dbg("Shutting down the device\n");
 
 	dd->ipath_flags |= IPATH_LINKUNK;
@@ -1773,7 +1923,7 @@ void ipath_shutdown_device(struct ipath_
 	 */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL);
 	/* flush it */
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 	/*
 	 * enough for anything that's going to trickle out to have actually
 	 * done so.
@@ -1847,7 +1997,7 @@ void ipath_free_pddata(struct ipath_devd
 		pd->port_rcvhdrq = NULL;
 		if (pd->port_rcvhdrtail_kvaddr) {
 			dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-					 (void *)pd->port_rcvhdrtail_kvaddr,
+					 pd->port_rcvhdrtail_kvaddr,
 					 pd->port_rcvhdrqtailaddr_phys);
 			pd->port_rcvhdrtail_kvaddr = NULL;
 		}
@@ -1866,24 +2016,32 @@ void ipath_free_pddata(struct ipath_devd
 			dma_free_coherent(&dd->pcidev->dev, size,
 				base, pd->port_rcvegrbuf_phys[e]);
 		}
-		vfree(pd->port_rcvegrbuf);
+		kfree(pd->port_rcvegrbuf);
 		pd->port_rcvegrbuf = NULL;
-		vfree(pd->port_rcvegrbuf_phys);
+		kfree(pd->port_rcvegrbuf_phys);
 		pd->port_rcvegrbuf_phys = NULL;
 		pd->port_rcvegrbuf_chunks = 0;
-	} else if (pd->port_port == 0 && dd->ipath_port0_skbs) {
+	} else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
 		unsigned e;
-		struct sk_buff **skbs = dd->ipath_port0_skbs;
+		struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
 
-		dd->ipath_port0_skbs = NULL;
-		ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs "
-			   "@ %p\n", pd->port_port, skbs);
+		dd->ipath_port0_skbinfo = NULL;
+		ipath_cdbg(VERBOSE, "free closed port %d "
+			   "ipath_port0_skbinfo @ %p\n", pd->port_port,
+			   skbinfo);
 		for (e = 0; e < dd->ipath_rcvegrcnt; e++)
-			if (skbs[e])
-				dev_kfree_skb(skbs[e]);
-		vfree(skbs);
+		if (skbinfo[e].skb) {
+			pci_unmap_single(dd->pcidev, skbinfo[e].phys,
+					 dd->ipath_ibmaxlen,
+					 PCI_DMA_FROMDEVICE);
+			dev_kfree_skb(skbinfo[e].skb);
+		}
+		vfree(skbinfo);
 	}
 	kfree(pd->port_tid_pg_list);
+	vfree(pd->subport_uregbase);
+	vfree(pd->subport_rcvegrbuf);
+	vfree(pd->subport_rcvhdr_base);
 	kfree(pd);
 }
 
@@ -1891,7 +2049,8 @@ static int __init infinipath_init(void)
 {
 	int ret;
 
-	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
+	if (ipath_debug & __IPATH_DBG)
+		printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
 
 	/*
 	 * These must be called before the driver is registered with
@@ -1939,150 +2098,12 @@ bail:
 	return ret;
 }
 
-static void cleanup_device(struct ipath_devdata *dd)
-{
-	int port;
-
-	ipath_shutdown_device(dd);
-
-	if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
-		/* can't do anything more with chip; needs re-init */
-		*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
-		if (dd->ipath_kregbase) {
-			/*
-			 * if we haven't already cleaned up before these are
-			 * to ensure any register reads/writes "fail" until
-			 * re-init
-			 */
-			dd->ipath_kregbase = NULL;
-			dd->ipath_uregbase = 0;
-			dd->ipath_sregbase = 0;
-			dd->ipath_cregbase = 0;
-			dd->ipath_kregsize = 0;
-		}
-		ipath_disable_wc(dd);
-	}
-
-	if (dd->ipath_pioavailregs_dma) {
-		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-				  (void *) dd->ipath_pioavailregs_dma,
-				  dd->ipath_pioavailregs_phys);
-		dd->ipath_pioavailregs_dma = NULL;
-	}
-	if (dd->ipath_dummy_hdrq) {
-		dma_free_coherent(&dd->pcidev->dev,
-			dd->ipath_pd[0]->port_rcvhdrq_size,
-			dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
-		dd->ipath_dummy_hdrq = NULL;
-	}
-
-	if (dd->ipath_pageshadow) {
-		struct page **tmpp = dd->ipath_pageshadow;
-		int i, cnt = 0;
-
-		ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
-			   "locked\n");
-		for (port = 0; port < dd->ipath_cfgports; port++) {
-			int port_tidbase = port * dd->ipath_rcvtidcnt;
-			int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-			for (i = port_tidbase; i < maxtid; i++) {
-				if (!tmpp[i])
-					continue;
-				ipath_release_user_pages(&tmpp[i], 1);
-				tmpp[i] = NULL;
-				cnt++;
-			}
-		}
-		if (cnt) {
-			ipath_stats.sps_pageunlocks += cnt;
-			ipath_cdbg(VERBOSE, "There were still %u expTID "
-				   "entries locked\n", cnt);
-		}
-		if (ipath_stats.sps_pagelocks ||
-		    ipath_stats.sps_pageunlocks)
-			ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
-				   "unlocked via ipath_m{un}lock\n",
-				   (unsigned long long)
-				   ipath_stats.sps_pagelocks,
-				   (unsigned long long)
-				   ipath_stats.sps_pageunlocks);
-
-		ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
-			   dd->ipath_pageshadow);
-		vfree(dd->ipath_pageshadow);
-		dd->ipath_pageshadow = NULL;
-	}
-
-	/*
-	 * free any resources still in use (usually just kernel ports)
-	 * at unload; we do for portcnt, not cfgports, because cfgports
-	 * could have changed while we were loaded.
-	 */
-	for (port = 0; port < dd->ipath_portcnt; port++) {
-		struct ipath_portdata *pd = dd->ipath_pd[port];
-		dd->ipath_pd[port] = NULL;
-		ipath_free_pddata(dd, pd);
-	}
-	kfree(dd->ipath_pd);
-	/*
-	 * debuggability, in case some cleanup path tries to use it
-	 * after this
-	 */
-	dd->ipath_pd = NULL;
-}
-
 static void __exit infinipath_cleanup(void)
 {
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-
-	ipath_diagpkt_remove();
-
 	ipath_exit_ipathfs();
 
 	ipath_driver_remove_group(&ipath_driver.driver);
 
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	/*
-	 * turn off rcv, send, and interrupts for all ports, all drivers
-	 * should also hard reset the chip here?
-	 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
-	 * for all versions of the driver, if they were allocated
-	 */
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-		if (dd->ipath_kregbase)
-			cleanup_device(dd);
-
-		if (dd->pcidev) {
-			if (dd->pcidev->irq) {
-				ipath_cdbg(VERBOSE,
-					   "unit %u free_irq of irq %x\n",
-					   dd->ipath_unit, dd->pcidev->irq);
-				free_irq(dd->pcidev->irq, dd);
-			} else
-				ipath_dbg("irq is 0, not doing free_irq "
-					  "for unit %u\n", dd->ipath_unit);
-
-			/*
-			 * we check for NULL here, because it's outside
-			 * the kregbase check, and we need to call it
-			 * after the free_irq.  Thus it's possible that
-			 * the function pointers were never initialized.
-			 */
-			if (dd->ipath_f_cleanup)
-				/* clean up chip-specific stuff */
-				dd->ipath_f_cleanup(dd);
-
-			dd->pcidev = NULL;
-		}
-		spin_lock_irqsave(&ipath_devs_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
 	ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
 	pci_unregister_driver(&ipath_driver);
 
@@ -2158,9 +2179,9 @@ int ipath_set_rx_pol_inv(struct ipath_de
 		dd->ipath_rx_pol_inv = new_pol_inv;
 		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
 		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-                         INFINIPATH_XGXS_RX_POL_SHIFT);
-                val |= ((u64)dd->ipath_rx_pol_inv) <<
-                        INFINIPATH_XGXS_RX_POL_SHIFT;
+			 INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= ((u64)dd->ipath_rx_pol_inv) <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 	}
 	return 0;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -100,9 +100,9 @@ static int i2c_gpio_set(struct ipath_dev
 	gpioval = &dd->ipath_gpio_out;
 	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
 	if (line == i2c_line_scl)
-		mask = ipath_gpio_scl;
+		mask = dd->ipath_gpio_scl;
 	else
-		mask = ipath_gpio_sda;
+		mask = dd->ipath_gpio_sda;
 
 	if (new_line_state == i2c_line_high)
 		/* tri-state the output rather than force high */
@@ -119,12 +119,12 @@ static int i2c_gpio_set(struct ipath_dev
 		write_val = 0x0UL;
 
 	if (line == i2c_line_scl) {
-		write_val <<= ipath_gpio_scl_num;
-		*gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
+		write_val <<= dd->ipath_gpio_scl_num;
+		*gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
 		*gpioval |= write_val;
 	} else {
-		write_val <<= ipath_gpio_sda_num;
-		*gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
+		write_val <<= dd->ipath_gpio_sda_num;
+		*gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
 		*gpioval |= write_val;
 	}
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
@@ -157,9 +157,9 @@ static int i2c_gpio_get(struct ipath_dev
 	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
 	/* config line to be an input */
 	if (line == i2c_line_scl)
-		mask = ipath_gpio_scl;
+		mask = dd->ipath_gpio_scl;
 	else
-		mask = ipath_gpio_sda;
+		mask = dd->ipath_gpio_sda;
 	write_val = read_val & ~mask;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
 	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
@@ -187,6 +187,7 @@ bail:
 static void i2c_wait_for_writes(struct ipath_devdata *dd)
 {
 	(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+	rmb();
 }
 
 static void scl_out(struct ipath_devdata *dd, u8 bit)
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -57,18 +57,53 @@ static struct file_operations ipath_file
 	.mmap = ipath_mmap
 };
 
-static int ipath_get_base_info(struct ipath_portdata *pd,
+/*
+ * Convert kernel virtual addresses to physical addresses so they don't
+ * potentially conflict with the chip addresses used as mmap offsets.
+ * It doesn't really matter what mmap offset we use as long as we can
+ * interpret it correctly. 
+ */
+static u64 cvt_kvaddr(void *p)
+{
+	struct page *page;
+	u64 paddr = 0;
+
+	page = vmalloc_to_page(p);
+	if (page)
+		paddr = page_to_pfn(page) << PAGE_SHIFT;
+
+	return paddr;
+}
+
+static int ipath_get_base_info(struct file *fp,
 			       void __user *ubase, size_t ubase_size)
 {
+	struct ipath_portdata *pd = port_fp(fp);
 	int ret = 0;
 	struct ipath_base_info *kinfo = NULL;
 	struct ipath_devdata *dd = pd->port_dd;
+	unsigned subport_cnt;
+	int shared, master;
+	size_t sz;
+
+	subport_cnt = pd->port_subport_cnt;
+	if (!subport_cnt) {
+		shared = 0;
+		master = 0;
+		subport_cnt = 1;
+	} else {
+		shared = 1;
+		master = !subport_fp(fp);
+	}
 
-	if (ubase_size < sizeof(*kinfo)) {
+	sz = sizeof(*kinfo);
+	/* If port sharing is not requested, allow the old size structure */
+	if (!shared)
+		sz -= 7 * sizeof(u64);
+	if (ubase_size < sz) {
 		ipath_cdbg(PROC,
-			   "Base size %lu, need %lu (version mismatch?)\n",
-			   (unsigned long) ubase_size,
-			   (unsigned long) sizeof(*kinfo));
+			   "Base size %zu, need %zu (version mismatch?)\n",
+			   ubase_size, sz);
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -95,7 +130,9 @@ static int ipath_get_base_info(struct ip
 	kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
 	kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
 		pd->port_rcvegrbuf_chunks;
-	kinfo->spi_tidcnt = dd->ipath_rcvtidcnt;
+	kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
+	if (master)
+		kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
 	/*
 	 * for this use, may be ipath_cfgports summed over all chips that
 	 * are are configured and present
@@ -118,32 +155,83 @@ static int ipath_get_base_info(struct ip
 	 * page_address() macro worked, but in 2.6.11, even that returns the
 	 * full 64 bit address (upper bits all 1's).  So far, using the
 	 * physical addresses (or chip offsets, for chip mapping) works, but
-	 * no doubt some future kernel release will chang that, and we'll be
-	 * on to yet another method of dealing with this
+	 * no doubt some future kernel release will change that, and we'll be
+	 * on to yet another method of dealing with this.
 	 */
 	kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
-	kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys;
+	kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
 	kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
 	kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
 	kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
 		(void *) dd->ipath_statusp -
 		(void *) dd->ipath_pioavailregs_dma;
-	kinfo->spi_piobufbase = (u64) pd->port_piobufs;
-	kinfo->__spi_uregbase =
-		dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+	if (!shared) {
+		kinfo->spi_piocnt = dd->ipath_pbufsport;
+		kinfo->spi_piobufbase = (u64) pd->port_piobufs;
+		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+			dd->ipath_palign * pd->port_port;
+	} else if (master) {
+		kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
+				    (dd->ipath_pbufsport % subport_cnt);
+		/* Master's PIO buffers are after all the slave's */
+		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+			dd->ipath_palign *
+			(dd->ipath_pbufsport - kinfo->spi_piocnt);
+	} else {
+		unsigned slave = subport_fp(fp) - 1;
 
-	kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1);
-	kinfo->spi_piocnt = dd->ipath_pbufsport;
+		kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
+		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+			dd->ipath_palign * kinfo->spi_piocnt * slave;
+	}
+	if (shared) {
+		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
+			dd->ipath_palign * pd->port_port;
+		kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
+		kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
+		kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
+
+		kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
+			PAGE_SIZE * subport_fp(fp));
+
+		kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
+			pd->port_rcvhdrq_size * subport_fp(fp));
+		kinfo->spi_rcvhdr_tailaddr = 0;
+		kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
+			pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
+			subport_fp(fp));
+
+		kinfo->spi_subport_uregbase =
+			cvt_kvaddr(pd->subport_uregbase);
+		kinfo->spi_subport_rcvegrbuf =
+			cvt_kvaddr(pd->subport_rcvegrbuf);
+		kinfo->spi_subport_rcvhdr_base =
+			cvt_kvaddr(pd->subport_rcvhdr_base);
+		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+			kinfo->spi_port, kinfo->spi_runtime_flags,
+			(unsigned long long) kinfo->spi_subport_uregbase,
+			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
+			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
+	}
+
+	kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
+		dd->ipath_palign;
 	kinfo->spi_pioalign = dd->ipath_palign;
 
 	kinfo->spi_qpair = IPATH_KD_QP;
 	kinfo->spi_piosize = dd->ipath_ibmaxlen;
 	kinfo->spi_mtu = dd->ipath_ibmaxlen;	/* maxlen, not ibmtu */
 	kinfo->spi_port = pd->port_port;
+	kinfo->spi_subport = subport_fp(fp);
 	kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
 	kinfo->spi_hw_version = dd->ipath_revision;
 
-	if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
+	if (master) {
+		kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
+	}
+
+	sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
+	if (copy_to_user(ubase, kinfo, sz))
 		ret = -EFAULT;
 
 bail:
@@ -154,6 +242,7 @@ bail:
 /**
  * ipath_tid_update - update a port TID
  * @pd: the port
+ * @fp: the ipath device file
  * @ti: the TID information
  *
  * The new implementation as of Oct 2004 is that the driver assigns
@@ -176,11 +265,11 @@ bail:
  * virtually contiguous pages, that should change to improve
  * performance.
  */
-static int ipath_tid_update(struct ipath_portdata *pd,
+static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
 			    const struct ipath_tid_info *ti)
 {
 	int ret = 0, ntids;
-	u32 tid, porttid, cnt, i, tidcnt;
+	u32 tid, porttid, cnt, i, tidcnt, tidoff;
 	u16 *tidlist;
 	struct ipath_devdata *dd = pd->port_dd;
 	u64 physaddr;
@@ -188,6 +277,7 @@ static int ipath_tid_update(struct ipath
 	u64 __iomem *tidbase;
 	unsigned long tidmap[8];
 	struct page **pagep = NULL;
+	unsigned subport = subport_fp(fp);
 
 	if (!dd->ipath_pageshadow) {
 		ret = -ENOMEM;
@@ -204,20 +294,34 @@ static int ipath_tid_update(struct ipath
 		ret = -EFAULT;
 		goto done;
 	}
-	tidcnt = dd->ipath_rcvtidcnt;
-	if (cnt >= tidcnt) {
+	porttid = pd->port_port * dd->ipath_rcvtidcnt;
+	if (!pd->port_subport_cnt) {
+		tidcnt = dd->ipath_rcvtidcnt;
+		tid = pd->port_tidcursor;
+		tidoff = 0;
+	} else if (!subport) {
+		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+		tidoff = dd->ipath_rcvtidcnt - tidcnt;
+		porttid += tidoff;
+		tid = tidcursor_fp(fp);
+	} else {
+		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+		tidoff = tidcnt * (subport - 1);
+		porttid += tidoff;
+		tid = tidcursor_fp(fp);
+	}
+	if (cnt > tidcnt) {
 		/* make sure it all fits in port_tid_pg_list */
 		dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
 			 "TIDs, only trying max (%u)\n", cnt, tidcnt);
 		cnt = tidcnt;
 	}
-	pagep = (struct page **)pd->port_tid_pg_list;
-	tidlist = (u16 *) (&pagep[cnt]);
+	pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
+	tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
 
 	memset(tidmap, 0, sizeof(tidmap));
-	tid = pd->port_tidcursor;
 	/* before decrement; chip actual # */
-	porttid = pd->port_port * tidcnt;
 	ntids = tidcnt;
 	tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
 				   dd->ipath_rcvtidbase +
@@ -274,16 +378,19 @@ static int ipath_tid_update(struct ipath
 			ret = -ENOMEM;
 			break;
 		}
-		tidlist[i] = tid;
+		tidlist[i] = tid + tidoff;
 		ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
-			   "vaddr %lx\n", i, tid, vaddr);
+			   "vaddr %lx\n", i, tid + tidoff, vaddr);
 		/* we "know" system pages and TID pages are same size */
 		dd->ipath_pageshadow[porttid + tid] = pagep[i];
+		dd->ipath_physshadow[porttid + tid] = ipath_map_page(
+			dd->pcidev, pagep[i], 0, PAGE_SIZE,
+			PCI_DMA_FROMDEVICE);
 		/*
 		 * don't need atomic or it's overhead
 		 */
 		__set_bit(tid, tidmap);
-		physaddr = page_to_phys(pagep[i]);
+		physaddr = dd->ipath_physshadow[porttid + tid];
 		ipath_stats.sps_pagelocks++;
 		ipath_cdbg(VERBOSE,
 			   "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
@@ -317,6 +424,9 @@ static int ipath_tid_update(struct ipath
 					   tid);
 				dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
 						    dd->ipath_tidinvalid);
+				pci_unmap_page(dd->pcidev,
+					dd->ipath_physshadow[porttid + tid],
+					PAGE_SIZE, PCI_DMA_FROMDEVICE);
 				dd->ipath_pageshadow[porttid + tid] = NULL;
 				ipath_stats.sps_pageunlocks++;
 			}
@@ -341,7 +451,10 @@ static int ipath_tid_update(struct ipath
 		}
 		if (tid == tidcnt)
 			tid = 0;
-		pd->port_tidcursor = tid;
+		if (!pd->port_subport_cnt)
+			pd->port_tidcursor = tid;
+		else
+			tidcursor_fp(fp) = tid;
 	}
 
 done:
@@ -354,6 +467,7 @@ done:
 /**
  * ipath_tid_free - free a port TID
  * @pd: the port
+ * @subport: the subport
  * @ti: the TID info
  *
  * right now we are unlocking one page at a time, but since
@@ -367,7 +481,7 @@ done:
  * they pass in to us.
  */
 
-static int ipath_tid_free(struct ipath_portdata *pd,
+static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
 			  const struct ipath_tid_info *ti)
 {
 	int ret = 0;
@@ -388,11 +502,20 @@ static int ipath_tid_free(struct ipath_p
 	}
 
 	porttid = pd->port_port * dd->ipath_rcvtidcnt;
+	if (!pd->port_subport_cnt)
+		tidcnt = dd->ipath_rcvtidcnt;
+	else if (!subport) {
+		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+		porttid += dd->ipath_rcvtidcnt - tidcnt;
+	} else {
+		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+		porttid += tidcnt * (subport - 1);
+	}
 	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
 				   dd->ipath_rcvtidbase +
 				   porttid * sizeof(*tidbase));
 
-	tidcnt = dd->ipath_rcvtidcnt;
 	limit = sizeof(tidmap) * BITS_PER_BYTE;
 	if (limit > tidcnt)
 		/* just in case size changes in future */
@@ -417,6 +540,9 @@ static int ipath_tid_free(struct ipath_p
 				   pd->port_pid, tid);
 			dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
 					    dd->ipath_tidinvalid);
+			pci_unmap_page(dd->pcidev,
+				dd->ipath_physshadow[porttid + tid],
+				PAGE_SIZE, PCI_DMA_FROMDEVICE);
 			ipath_release_user_pages(
 				&dd->ipath_pageshadow[porttid + tid], 1);
 			dd->ipath_pageshadow[porttid + tid] = NULL;
@@ -581,20 +707,23 @@ bail:
 /**
  * ipath_manage_rcvq - manage a port's receive queue
  * @pd: the port
+ * @subport: the subport
  * @start_stop: action to carry out
  *
  * start_stop == 0 disables receive on the port, for use in queue
  * overflow conditions.  start_stop==1 re-enables, to be used to
  * re-init the software copy of the head register
  */
-static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
+static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
+			     int start_stop)
 {
 	struct ipath_devdata *dd = pd->port_dd;
-	u64 tval;
 
-	ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",
+	ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
 		   start_stop ? "en" : "dis", dd->ipath_unit,
-		   pd->port_port);
+		   pd->port_port, subport);
+	if (subport)
+		goto bail;
 	/* atomically clear receive enable port. */
 	if (start_stop) {
 		/*
@@ -609,7 +738,7 @@ static int ipath_manage_rcvq(struct ipat
 		 * updated and correct itself, even in the face of software
 		 * bugs.
 		 */
-		*pd->port_rcvhdrtail_kvaddr = 0;
+		*(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0;
 		set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
 			&dd->ipath_rcvctrl);
 	} else
@@ -618,7 +747,7 @@ static int ipath_manage_rcvq(struct ipat
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 	/* now be sure chip saw it before we return */
-	tval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 	if (start_stop) {
 		/*
 		 * And try to be sure that tail reg update has happened too.
@@ -627,9 +756,10 @@ static int ipath_manage_rcvq(struct ipat
 		 * in memory copy, since we could overwrite an update by the
 		 * chip if we did.
 		 */
-		tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
+		ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
 	}
 	/* always; new head should be equal to new tail; see above */
+bail:
 	return 0;
 }
 
@@ -687,6 +817,36 @@ static void ipath_clean_part_key(struct 
 	}
 }
 
+/*
+ * Initialize the port data with the receive buffer sizes
+ * so this can be done while the master port is locked.
+ * Otherwise, there is a race with a slave opening the port
+ * and seeing these fields uninitialized.
+ */
+static void init_user_egr_sizes(struct ipath_portdata *pd)
+{
+	struct ipath_devdata *dd = pd->port_dd;
+	unsigned egrperchunk, egrcnt, size;
+
+	/*
+	 * to avoid wasting a lot of memory, we allocate 32KB chunks of
+	 * physically contiguous memory, advance through it until used up
+	 * and then allocate more.  Of course, we need memory to store those
+	 * extra pointers, now.  Started out with 256KB, but under heavy
+	 * memory pressure (creating large files and then copying them over
+	 * NFS while doing lots of MPI jobs), we hit some allocation
+	 * failures, even though we can sleep...  (2.6.10) Still get
+	 * failures at 64K.  32K is the lowest we can go without wasting
+	 * additional memory.
+	 */
+	size = 0x8000;
+	egrperchunk = size / dd->ipath_rcvegrbufsize;
+	egrcnt = dd->ipath_rcvegrcnt;
+	pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
+	pd->port_rcvegrbufs_perchunk = egrperchunk;
+	pd->port_rcvegrbuf_size = size;
+}
+
 /**
  * ipath_create_user_egr - allocate eager TID buffers
  * @pd: the port to allocate TID buffers for
@@ -702,7 +862,7 @@ static void ipath_clean_part_key(struct 
 static int ipath_create_user_egr(struct ipath_portdata *pd)
 {
 	struct ipath_devdata *dd = pd->port_dd;
-	unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;
+	unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
 	size_t size;
 	int ret;
 	gfp_t gfp_flags;
@@ -722,31 +882,18 @@ static int ipath_create_user_egr(struct 
 	ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
 		   "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
 
-	/*
-	 * to avoid wasting a lot of memory, we allocate 32KB chunks of
-	 * physically contiguous memory, advance through it until used up
-	 * and then allocate more.  Of course, we need memory to store those
-	 * extra pointers, now.  Started out with 256KB, but under heavy
-	 * memory pressure (creating large files and then copying them over
-	 * NFS while doing lots of MPI jobs), we hit some allocation
-	 * failures, even though we can sleep...  (2.6.10) Still get
-	 * failures at 64K.  32K is the lowest we can go without wasting
-	 * additional memory.
-	 */
-	size = 0x8000;
-	alloced = ALIGN(egrsize * egrcnt, size);
-	egrperchunk = size / egrsize;
-	chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
-	pd->port_rcvegrbuf_chunks = chunk;
-	pd->port_rcvegrbufs_perchunk = egrperchunk;
-	pd->port_rcvegrbuf_size = size;
-	pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));
+	chunk = pd->port_rcvegrbuf_chunks;
+	egrperchunk = pd->port_rcvegrbufs_perchunk;
+	size = pd->port_rcvegrbuf_size;
+	pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
+				     GFP_KERNEL);
 	if (!pd->port_rcvegrbuf) {
 		ret = -ENOMEM;
 		goto bail;
 	}
 	pd->port_rcvegrbuf_phys =
-		vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));
+		kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
+			GFP_KERNEL);
 	if (!pd->port_rcvegrbuf_phys) {
 		ret = -ENOMEM;
 		goto bail_rcvegrbuf;
@@ -791,105 +938,23 @@ bail_rcvegrbuf_phys:
 				  pd->port_rcvegrbuf_phys[e]);
 
 	}
-	vfree(pd->port_rcvegrbuf_phys);
+	kfree(pd->port_rcvegrbuf_phys);
 	pd->port_rcvegrbuf_phys = NULL;
 bail_rcvegrbuf:
-	vfree(pd->port_rcvegrbuf);
+	kfree(pd->port_rcvegrbuf);
 	pd->port_rcvegrbuf = NULL;
 bail:
 	return ret;
 }
 
-static int ipath_do_user_init(struct ipath_portdata *pd,
-			      const struct ipath_user_info *uinfo)
-{
-	int ret = 0;
-	struct ipath_devdata *dd = pd->port_dd;
-	u32 head32;
-
-	/* for now, if major version is different, bail */
-	if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
-		dev_info(&dd->pcidev->dev,
-			 "User major version %d not same as driver "
-			 "major %d\n", uinfo->spu_userversion >> 16,
-			 IPATH_USER_SWMAJOR);
-		ret = -ENODEV;
-		goto done;
-	}
-
-	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
-		ipath_dbg("User minor version %d not same as driver "
-			  "minor %d\n", uinfo->spu_userversion & 0xffff,
-			  IPATH_USER_SWMINOR);
-
-	if (uinfo->spu_rcvhdrsize) {
-		ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
-		if (ret)
-			goto done;
-	}
-
-	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
-	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
-	pd->port_piobufs = dd->ipath_piobufbase +
-		dd->ipath_pbufsport * (pd->port_port -
-				       1) * dd->ipath_palign;
-	ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
-		   pd->port_port, pd->port_piobufs);
-
-	/*
-	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
-	 * array for time being.  If pd->port_port > chip-supported,
-	 * we need to do extra stuff here to handle by handling overflow
-	 * through port 0, someday
-	 */
-	ret = ipath_create_rcvhdrq(dd, pd);
-	if (!ret)
-		ret = ipath_create_user_egr(pd);
-	if (ret)
-		goto done;
-
-	/*
-	 * set the eager head register for this port to the current values
-	 * of the tail pointers, since we don't know if they were
-	 * updated on last use of the port.
-	 */
-	head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
-	ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
-	dd->ipath_lastegrheads[pd->port_port] = -1;
-	dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
-	ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
-		pd->port_port, head32);
-	pd->port_tidcursor = 0;	/* start at beginning after open */
-	/*
-	 * now enable the port; the tail registers will be written to memory
-	 * by the chip as soon as it sees the write to
-	 * dd->ipath_kregs->kr_rcvctrl.  The update only happens on
-	 * transition from 0 to 1, so clear it first, then set it as part of
-	 * enabling the port.  This will (very briefly) affect any other
-	 * open ports, but it shouldn't be long enough to be an issue.
-	 * We explictly set the in-memory copy to 0 beforehand, so we don't
-	 * have to wait to be sure the DMA update has happened.
-	 */
-	*pd->port_rcvhdrtail_kvaddr = 0ULL;
-	set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
-		&dd->ipath_rcvctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-done:
-	return ret;
-}
-
 
 /* common code for the mappings on dma_alloc_coherent mem */
 static int ipath_mmap_mem(struct vm_area_struct *vma,
-			     struct ipath_portdata *pd, unsigned len,
-			     int write_ok, dma_addr_t addr, char *what)
+	struct ipath_portdata *pd, unsigned len, int write_ok,
+	void *kvaddr, char *what)
 {
 	struct ipath_devdata *dd = pd->port_dd;
-	unsigned pfn = (unsigned long)addr >> PAGE_SHIFT;
+	unsigned long pfn;
 	int ret;
 
 	if ((vma->vm_end - vma->vm_start) > len) {
@@ -912,17 +977,17 @@ static int ipath_mmap_mem(struct vm_area
 		vma->vm_flags &= ~VM_MAYWRITE;
 	}
 
+	pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
 	ret = remap_pfn_range(vma, vma->vm_start, pfn,
 			      len, vma->vm_page_prot);
 	if (ret)
-		dev_info(&dd->pcidev->dev,
-			 "%s port%u mmap of %lx, %x bytes r%c failed: %d\n",
-			 what, pd->port_port, (unsigned long)addr, len,
-			 write_ok?'w':'o', ret);
+		dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
+			 "bytes r%c failed: %d\n", what, pd->port_port,
+			 pfn, len, write_ok?'w':'o', ret);
 	else
-		ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n",
-			what, pd->port_port, (unsigned long)addr, len,
-			 write_ok?'w':'o');
+		ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
+			   "r%c\n", what, pd->port_port, pfn, len,
+			   write_ok?'w':'o');
 bail:
 	return ret;
 }
@@ -957,7 +1022,8 @@ static int mmap_ureg(struct vm_area_stru
 
 static int mmap_piobufs(struct vm_area_struct *vma,
 			struct ipath_devdata *dd,
-			struct ipath_portdata *pd)
+			struct ipath_portdata *pd,
+			unsigned piobufs, unsigned piocnt)
 {
 	unsigned long phys;
 	int ret;
@@ -968,16 +1034,15 @@ static int mmap_piobufs(struct vm_area_s
 	 * process data, and catches users who might try to read the i/o
 	 * space due to a bug.
 	 */
-	if ((vma->vm_end - vma->vm_start) >
-	    (dd->ipath_pbufsport * dd->ipath_palign)) {
+	if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
 		dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
 			 "reqlen %lx > PAGE\n",
 			 vma->vm_end - vma->vm_start);
-		ret = -EFAULT;
+		ret = -EINVAL;
 		goto bail;
 	}
 
-	phys = dd->ipath_physaddr + pd->port_piobufs;
+	phys = dd->ipath_physaddr + piobufs;
 
 	/*
 	 * Don't mark this as non-cached, or we don't get the
@@ -1011,7 +1076,7 @@ static int mmap_rcvegrbufs(struct vm_are
 	struct ipath_devdata *dd = pd->port_dd;
 	unsigned long start, size;
 	size_t total_size, i;
-	dma_addr_t *phys;
+	unsigned long pfn;
 	int ret;
 
 	size = pd->port_rcvegrbuf_size;
@@ -1021,7 +1086,7 @@ static int mmap_rcvegrbufs(struct vm_are
 			 "reqlen %lx > actual %lx\n",
 			 vma->vm_end - vma->vm_start,
 			 (unsigned long) total_size);
-		ret = -EFAULT;
+		ret = -EINVAL;
 		goto bail;
 	}
 
@@ -1035,11 +1100,11 @@ static int mmap_rcvegrbufs(struct vm_are
 	vma->vm_flags &= ~VM_MAYWRITE;
 
 	start = vma->vm_start;
-	phys = pd->port_rcvegrbuf_phys;
 
 	for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
-		ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
-				      size, vma->vm_page_prot);
+		pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
+		ret = remap_pfn_range(vma, start, pfn, size,
+				      vma->vm_page_prot);
 		if (ret < 0)
 			goto bail;
 	}
@@ -1049,6 +1114,110 @@ bail:
 	return ret;
 }
 
+/*
+ * ipath_file_vma_nopage - handle a VMA page fault.
+ */
+static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma,
+					  unsigned long address, int *type)
+{
+	unsigned long offset = address - vma->vm_start;
+	struct page *page = NOPAGE_SIGBUS;
+	void *pageptr;
+
+	/*
+	 * Convert the vmalloc address into a struct page.
+	 */
+	pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT));
+	page = vmalloc_to_page(pageptr);
+	if (!page)
+		goto out;
+
+	/* Increment the reference count. */
+	get_page(page);
+	if (type)
+		*type = VM_FAULT_MINOR;
+out:
+	return page;
+}
+
+static struct vm_operations_struct ipath_file_vm_ops = {
+	.nopage = ipath_file_vma_nopage,
+};
+
+static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
+		       struct ipath_portdata *pd, unsigned subport)
+{
+	unsigned long len;
+	struct ipath_devdata *dd;
+	void *addr;
+	size_t size;
+	int ret = 0;
+
+	/* If the port is not shared, all addresses should be physical */
+	if (!pd->port_subport_cnt)
+		goto bail;
+
+	dd = pd->port_dd;
+	size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
+
+	/*
+	 * Each process has all the subport uregbase, rcvhdrq, and
+	 * rcvegrbufs mmapped - as an array for all the processes,
+	 * and also separately for this process.
+	 */
+	if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
+		addr = pd->subport_uregbase;
+		size = PAGE_SIZE * pd->port_subport_cnt;
+	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
+		addr = pd->subport_rcvhdr_base;
+		size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
+	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
+		addr = pd->subport_rcvegrbuf;
+		size *= pd->port_subport_cnt;
+        } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
+                                        PAGE_SIZE * subport)) {
+                addr = pd->subport_uregbase + PAGE_SIZE * subport;
+                size = PAGE_SIZE;
+        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
+                                pd->port_rcvhdrq_size * subport)) {
+                addr = pd->subport_rcvhdr_base +
+                        pd->port_rcvhdrq_size * subport;
+                size = pd->port_rcvhdrq_size;
+        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
+                               size * subport)) {
+                addr = pd->subport_rcvegrbuf + size * subport;
+                /* rcvegrbufs are read-only on the slave */
+                if (vma->vm_flags & VM_WRITE) {
+                        dev_info(&dd->pcidev->dev,
+                                 "Can't map eager buffers as "
+                                 "writable (flags=%lx)\n", vma->vm_flags);
+                        ret = -EPERM;
+                        goto bail;
+                }
+                /*
+                 * Don't allow permission to later change to writeable
+                 * with mprotect.
+                 */
+                vma->vm_flags &= ~VM_MAYWRITE;
+	} else {
+		goto bail;
+	}
+	len = vma->vm_end - vma->vm_start;
+	if (len > size) {
+		ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
+	vma->vm_ops = &ipath_file_vm_ops;
+	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	ret = 1;
+
+bail:
+	return ret;
+}
+
 /**
  * ipath_mmap - mmap various structures into user space
  * @fp: the file pointer
@@ -1064,73 +1233,96 @@ static int ipath_mmap(struct file *fp, s
 	struct ipath_portdata *pd;
 	struct ipath_devdata *dd;
 	u64 pgaddr, ureg;
+	unsigned piobufs, piocnt;
 	int ret;
 
 	pd = port_fp(fp);
+	if (!pd) {
+		ret = -EINVAL;
+		goto bail;
+	}
 	dd = pd->port_dd;
 
 	/*
 	 * This is the ipath_do_user_init() code, mapping the shared buffers
 	 * into the user process. The address referred to by vm_pgoff is the
-	 * virtual, not physical, address; we only do one mmap for each
-	 * space mapped.
+	 * file offset passed via mmap().  For shared ports, this is the
+	 * kernel vmalloc() address of the pages to share with the master.
+	 * For non-shared or master ports, this is a physical address.
+	 * We only do one mmap for each space mapped.
 	 */
 	pgaddr = vma->vm_pgoff << PAGE_SHIFT;
 
 	/*
-	 * Must fit in 40 bits for our hardware; some checked elsewhere,
-	 * but we'll be paranoid.  Check for 0 is mostly in case one of the
-	 * allocations failed, but user called mmap anyway.   We want to catch
-	 * that before it can match.
-	 */
-	if (!pgaddr || pgaddr >= (1ULL<<40))  {
-		ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n",
-			(unsigned long long)pgaddr, vma->vm_start, vma->vm_end);
-		return -EINVAL;
+	 * Check for 0 in case one of the allocations failed, but user
+	 * called mmap anyway.
+	 */
+	if (!pgaddr)  {
+		ret = -EINVAL;
+		goto bail;
 	}
 
-	/* just the offset of the port user registers, not physical addr */
-	ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
-
-	ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
+	ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
 		   (unsigned long long) pgaddr, vma->vm_start,
-		   vma->vm_end - vma->vm_start);
+		   vma->vm_end - vma->vm_start, dd->ipath_unit,
+		   pd->port_port, subport_fp(fp));
 
-	if (vma->vm_start & (PAGE_SIZE-1)) {
-		ipath_dev_err(dd,
-			"vm_start not aligned: %lx, end=%lx phys %lx\n",
-			vma->vm_start, vma->vm_end, (unsigned long)pgaddr);
-		ret = -EINVAL;
+	/*
+	 * Physical addresses must fit in 40 bits for our hardware.
+	 * Check for kernel virtual addresses first, anything else must
+	 * match a HW or memory address.
+	 */
+	ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+	if (ret) {
+		if (ret > 0)
+			ret = 0;
+		goto bail;
+	}
+
+	ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+	if (!pd->port_subport_cnt) {
+		/* port is not shared */
+		piocnt = dd->ipath_pbufsport;
+		piobufs = pd->port_piobufs;
+	} else if (!subport_fp(fp)) {
+		/* caller is the master */
+		piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
+			 (dd->ipath_pbufsport % pd->port_subport_cnt);
+		piobufs = pd->port_piobufs +
+			dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
+	} else {
+		unsigned slave = subport_fp(fp) - 1;
+
+		/* caller is a slave */
+		piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
+		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
 	}
-	else if (pgaddr == ureg)
+
+	if (pgaddr == ureg)
 		ret = mmap_ureg(vma, dd, ureg);
-	else if (pgaddr == pd->port_piobufs)
-		ret = mmap_piobufs(vma, dd, pd);
-	else if (pgaddr == (u64) pd->port_rcvegr_phys)
+	else if (pgaddr == piobufs)
+		ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
+	else if (pgaddr == dd->ipath_pioavailregs_phys)
+		/* in-memory copy of pioavail registers */
+		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
+			      	     (void *) dd->ipath_pioavailregs_dma,
+				     "pioavail registers");
+	else if (pgaddr == pd->port_rcvegr_phys)
 		ret = mmap_rcvegrbufs(vma, pd);
-	else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
+	else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
 		/*
 		 * The rcvhdrq itself; readonly except on HT (so have
 		 * to allow writable mapping), multiple pages, contiguous
 		 * from an i/o perspective.
 		 */
-		unsigned total_size =
-			ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize
-			   * sizeof(u32), PAGE_SIZE);
-		ret = ipath_mmap_mem(vma, pd, total_size, 1,
-				     pd->port_rcvhdrq_phys,
+		ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
+				     pd->port_rcvhdrq,
 				     "rcvhdrq");
-	}
-	else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys)
+	else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
 		/* in-memory copy of rcvhdrq tail register */
 		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-				     pd->port_rcvhdrqtailaddr_phys,
+				     pd->port_rcvhdrtail_kvaddr,
 				     "rcvhdrq tail");
-	else if (pgaddr == dd->ipath_pioavailregs_phys)
-		/* in-memory copy of pioavail registers */
-		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-				     dd->ipath_pioavailregs_phys,
-				     "pioavail registers");
 	else
 		ret = -EINVAL;
 
@@ -1138,9 +1330,10 @@ static int ipath_mmap(struct file *fp, s
 
 	if (ret < 0)
 		dev_info(&dd->pcidev->dev,
-			 "Failure %d on addr %lx, off %lx\n",
-			 -ret, vma->vm_start, vma->vm_pgoff);
-
+			 "Failure %d on off %llx len %lx\n",
+			 -ret, (unsigned long long)pgaddr,
+			 vma->vm_end - vma->vm_start);
+bail:
 	return ret;
 }
 
@@ -1154,6 +1347,8 @@ static unsigned int ipath_poll(struct fi
 	struct ipath_devdata *dd;
 
 	pd = port_fp(fp);
+	if (!pd)
+		goto bail;
 	dd = pd->port_dd;
 
 	bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
@@ -1176,7 +1371,7 @@ static unsigned int ipath_poll(struct fi
 
 	if (tail == head) {
 		set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-		if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
+		if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
 			(void)ipath_write_ureg(dd, ur_rcvhdrhead,
 					       dd->ipath_rhdrhead_intr_off
 					       | head, pd->port_port);
@@ -1200,18 +1395,95 @@ static unsigned int ipath_poll(struct fi
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
+bail:
 	return pollflag;
 }
 
+static int init_subports(struct ipath_devdata *dd,
+			 struct ipath_portdata *pd,
+			 const struct ipath_user_info *uinfo)
+{
+	int ret = 0;
+	unsigned num_subports;
+	size_t size;
+
+	/*
+	 * If the user is requesting zero or one port,
+	 * skip the subport allocation.
+	 */
+	if (uinfo->spu_subport_cnt <= 1)
+		goto bail;
+
+	/* Old user binaries don't know about new subport implementation */
+	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
+		dev_info(&dd->pcidev->dev,
+			 "Mismatched user minor version (%d) and driver "
+                         "minor version (%d) while port sharing. Ensure "
+                         "that driver and library are from the same "
+                         "release.\n", 
+                         (int) (uinfo->spu_userversion & 0xffff),
+	                 IPATH_USER_SWMINOR);
+		goto bail;
+	}
+	if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	num_subports = uinfo->spu_subport_cnt;
+	pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
+	if (!pd->subport_uregbase) {
+		ret = -ENOMEM;
+		goto bail;
+	}
+	/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
+	size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+		     sizeof(u32), PAGE_SIZE) * num_subports;
+	pd->subport_rcvhdr_base = vmalloc(size);
+	if (!pd->subport_rcvhdr_base) {
+		ret = -ENOMEM;
+		goto bail_ureg;
+	}
+
+	pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
+					pd->port_rcvegrbuf_size *
+					num_subports);
+	if (!pd->subport_rcvegrbuf) {
+		ret = -ENOMEM;
+		goto bail_rhdr;
+	}
+
+	pd->port_subport_cnt = uinfo->spu_subport_cnt;
+	pd->port_subport_id = uinfo->spu_subport_id;
+	pd->active_slaves = 1;
+	set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+	memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
+	memset(pd->subport_rcvhdr_base, 0, size);
+	memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
+				         pd->port_rcvegrbuf_size *
+				         num_subports);
+	goto bail;
+
+bail_rhdr:
+	vfree(pd->subport_rcvhdr_base);
+bail_ureg:
+	vfree(pd->subport_uregbase);
+	pd->subport_uregbase = NULL;
+bail:
+	return ret;
+}
+
 static int try_alloc_port(struct ipath_devdata *dd, int port,
-			  struct file *fp)
+			  struct file *fp,
+			  const struct ipath_user_info *uinfo)
 {
+	struct ipath_portdata *pd;
 	int ret;
 
-	if (!dd->ipath_pd[port]) {
-		void *p, *ptmp;
+	if (!(pd = dd->ipath_pd[port])) {
+		void *ptmp;
 
-		p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
+		pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
 
 		/*
 		 * Allocate memory for use in ipath_tid_update() just once
@@ -1221,34 +1493,36 @@ static int try_alloc_port(struct ipath_d
 		ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
 			       dd->ipath_rcvtidcnt * sizeof(struct page **),
 			       GFP_KERNEL);
-		if (!p || !ptmp) {
+		if (!pd || !ptmp) {
 			ipath_dev_err(dd, "Unable to allocate portdata "
 				      "memory, failing open\n");
 			ret = -ENOMEM;
-			kfree(p);
+			kfree(pd);
 			kfree(ptmp);
 			goto bail;
 		}
-		dd->ipath_pd[port] = p;
+		dd->ipath_pd[port] = pd;
 		dd->ipath_pd[port]->port_port = port;
 		dd->ipath_pd[port]->port_dd = dd;
 		dd->ipath_pd[port]->port_tid_pg_list = ptmp;
 		init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
 	}
-	if (!dd->ipath_pd[port]->port_cnt) {
-		dd->ipath_pd[port]->port_cnt = 1;
-		fp->private_data = (void *) dd->ipath_pd[port];
+	if (!pd->port_cnt) {
+		pd->userversion = uinfo->spu_userversion;
+		init_user_egr_sizes(pd);
+		if ((ret = init_subports(dd, pd, uinfo)) != 0)
+			goto bail;
 		ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
 			   current->comm, current->pid, dd->ipath_unit,
 			   port);
-		dd->ipath_pd[port]->port_pid = current->pid;
-		strncpy(dd->ipath_pd[port]->port_comm, current->comm,
-			sizeof(dd->ipath_pd[port]->port_comm));
+		pd->port_cnt = 1;
+		port_fp(fp) = pd;
+		pd->port_pid = current->pid;
+		strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
 		ipath_stats.sps_ports++;
 		ret = 0;
-		goto bail;
-	}
-	ret = -EBUSY;
+	} else
+		ret = -EBUSY;
 
 bail:
 	return ret;
@@ -1264,7 +1538,8 @@ static inline int usable(struct ipath_de
 				     | IPATH_LINKUNK));
 }
 
-static int find_free_port(int unit, struct file *fp)
+static int find_free_port(int unit, struct file *fp,
+			  const struct ipath_user_info *uinfo)
 {
 	struct ipath_devdata *dd = ipath_lookup(unit);
 	int ret, i;
@@ -1279,8 +1554,8 @@ static int find_free_port(int unit, stru
 		goto bail;
 	}
 
-	for (i = 0; i < dd->ipath_cfgports; i++) {
-		ret = try_alloc_port(dd, i, fp);
+	for (i = 1; i < dd->ipath_cfgports; i++) {
+		ret = try_alloc_port(dd, i, fp, uinfo);
 		if (ret != -EBUSY)
 			goto bail;
 	}
@@ -1290,13 +1565,14 @@ bail:
 	return ret;
 }
 
-static int find_best_unit(struct file *fp)
+static int find_best_unit(struct file *fp,
+			  const struct ipath_user_info *uinfo)
 {
 	int ret = 0, i, prefunit = -1, devmax;
 	int maxofallports, npresent, nup;
 	int ndev;
 
-	(void) ipath_count_units(&npresent, &nup, &maxofallports);
+	devmax = ipath_count_units(&npresent, &nup, &maxofallports);
 
 	/*
 	 * This code is present to allow a knowledgeable person to
@@ -1327,7 +1603,7 @@ static int find_best_unit(struct file *f
 		if (curcpu != -1) {
 			if (npresent) {
 				prefunit = curcpu / (ncpus / npresent);
-				ipath_dbg("%s[%u] %d chips, %d cpus, "
+				ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
 					  "%d cpus/chip, select unit %d\n",
 					  current->comm, current->pid,
 					  npresent, ncpus, ncpus / npresent,
@@ -1343,8 +1619,6 @@ static int find_best_unit(struct file *f
 
 	if (prefunit != -1)
 		devmax = prefunit + 1;
-	else
-		devmax = ipath_count_units(NULL, NULL, NULL);
 recheck:
 	for (i = 1; i < maxofallports; i++) {
 		for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
@@ -1359,7 +1633,7 @@ recheck:
 				 * next.
 				 */
 				continue;
-			ret = try_alloc_port(dd, i, fp);
+			ret = try_alloc_port(dd, i, fp, uinfo);
 			if (!ret)
 				goto done;
 		}
@@ -1395,22 +1669,194 @@ done:
 	return ret;
 }
 
+static int find_shared_port(struct file *fp,
+			    const struct ipath_user_info *uinfo)
+{
+	int devmax, ndev, i;
+	int ret = 0;
+
+	devmax = ipath_count_units(NULL, NULL, NULL);
+
+	for (ndev = 0; ndev < devmax; ndev++) {
+		struct ipath_devdata *dd = ipath_lookup(ndev);
+
+		if (!dd)
+			continue;
+		for (i = 1; i < dd->ipath_cfgports; i++) {
+			struct ipath_portdata *pd = dd->ipath_pd[i];
+
+			/* Skip ports which are not yet open */
+			if (!pd || !pd->port_cnt)
+				continue;
+			/* Skip port if it doesn't match the requested one */
+			if (pd->port_subport_id != uinfo->spu_subport_id)
+				continue;
+			/* Verify the sharing process matches the master */
+			if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
+			    pd->userversion != uinfo->spu_userversion ||
+			    pd->port_cnt >= pd->port_subport_cnt) {
+				ret = -EINVAL;
+				goto done;
+			}
+			port_fp(fp) = pd;
+			subport_fp(fp) = pd->port_cnt++;
+			tidcursor_fp(fp) = 0;
+			pd->active_slaves |= 1 << subport_fp(fp);
+			ipath_cdbg(PROC,
+				   "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
+				   current->comm, current->pid,
+				   subport_fp(fp),
+				   pd->port_comm, pd->port_pid,
+				   dd->ipath_unit, pd->port_port);
+			ret = 1;
+			goto done;
+		}
+	}
+
+done:
+	return ret;
+}
+
 static int ipath_open(struct inode *in, struct file *fp)
 {
-	int ret, user_minor;
+	/* The real work is performed later in ipath_assign_port() */
+	fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
+	return fp->private_data ? 0 : -ENOMEM;
+}
+
+
+/* Get port early, so can set affinity prior to memory allocation */
+static int ipath_assign_port(struct file *fp,
+			      const struct ipath_user_info *uinfo)
+{
+	int ret;
+	int i_minor;
+	unsigned swminor;
+
+	/* Check to be sure we haven't already initialized this file */
+	if (port_fp(fp)) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	/* for now, if major version is different, bail */
+	if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
+		ipath_dbg("User major version %d not same as driver "
+			  "major %d\n", uinfo->spu_userversion >> 16,
+			  IPATH_USER_SWMAJOR);
+		ret = -ENODEV;
+		goto done;
+	}
+
+	swminor = uinfo->spu_userversion & 0xffff;
+	if (swminor != IPATH_USER_SWMINOR)
+		ipath_dbg("User minor version %d not same as driver "
+			  "minor %d\n", swminor, IPATH_USER_SWMINOR);
 
 	mutex_lock(&ipath_mutex);
 
-	user_minor = iminor(in) - IPATH_USER_MINOR_BASE;
+	if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt &&
+	    (ret = find_shared_port(fp, uinfo))) {
+		mutex_unlock(&ipath_mutex);
+		if (ret > 0)
+			ret = 0;
+		goto done;
+	}
+
+	i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE;
 	ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
-		   (long)in->i_rdev, user_minor);
+		   (long)fp->f_dentry->d_inode->i_rdev, i_minor);
 
-	if (user_minor)
-		ret = find_free_port(user_minor - 1, fp);
+	if (i_minor)
+		ret = find_free_port(i_minor - 1, fp, uinfo);
 	else
-		ret = find_best_unit(fp);
+		ret = find_best_unit(fp, uinfo);
 
 	mutex_unlock(&ipath_mutex);
+
+done:
+	return ret;
+}
+
+
+static int ipath_do_user_init(struct file *fp,
+			      const struct ipath_user_info *uinfo)
+{
+	int ret;
+	struct ipath_portdata *pd = port_fp(fp);
+	struct ipath_devdata *dd;
+	u32 head32;
+
+	/* Subports don't need to initialize anything since master did it. */
+	if (subport_fp(fp)) {
+		ret = wait_event_interruptible(pd->port_wait,
+			!test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
+		goto done;
+	}
+
+	dd = pd->port_dd;
+
+	if (uinfo->spu_rcvhdrsize) {
+		ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
+		if (ret)
+			goto done;
+	}
+
+	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+
+	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
+	pd->port_piobufs = dd->ipath_piobufbase +
+		dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
+	ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
+		   pd->port_port, pd->port_piobufs);
+
+	/*
+	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+	 * array for time being.  If pd->port_port > chip-supported,
+	 * we need to do extra stuff here to handle by handling overflow
+	 * through port 0, someday
+	 */
+	ret = ipath_create_rcvhdrq(dd, pd);
+	if (!ret)
+		ret = ipath_create_user_egr(pd);
+	if (ret)
+		goto done;
+
+	/*
+	 * set the eager head register for this port to the current values
+	 * of the tail pointers, since we don't know if they were
+	 * updated on last use of the port.
+	 */
+	head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
+	ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
+	dd->ipath_lastegrheads[pd->port_port] = -1;
+	dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
+	ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
+		pd->port_port, head32);
+	pd->port_tidcursor = 0;	/* start at beginning after open */
+	/*
+	 * now enable the port; the tail registers will be written to memory
+	 * by the chip as soon as it sees the write to
+	 * dd->ipath_kregs->kr_rcvctrl.  The update only happens on
+	 * transition from 0 to 1, so clear it first, then set it as part of
+	 * enabling the port.  This will (very briefly) affect any other
+	 * open ports, but it shouldn't be long enough to be an issue.
+	 * We explictly set the in-memory copy to 0 beforehand, so we don't
+	 * have to wait to be sure the DMA update has happened.
+	 */
+	*(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL;
+	set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
+		&dd->ipath_rcvctrl);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+			 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+			 dd->ipath_rcvctrl);
+	/* Notify any waiting slaves */
+	if (pd->port_subport_cnt) {
+		clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+		wake_up(&pd->port_wait);
+	}
+done:
 	return ret;
 }
 
@@ -1433,6 +1879,8 @@ static void unlock_expected_tids(struct 
 		if (!dd->ipath_pageshadow[i])
 			continue;
 
+		pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
+			PAGE_SIZE, PCI_DMA_FROMDEVICE);
 		ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
 						  1);
 		dd->ipath_pageshadow[i] = NULL;
@@ -1453,6 +1901,7 @@ static void unlock_expected_tids(struct 
 static int ipath_close(struct inode *in, struct file *fp)
 {
 	int ret = 0;
+	struct ipath_filedata *fd;
 	struct ipath_portdata *pd;
 	struct ipath_devdata *dd;
 	unsigned port;
@@ -1462,9 +1911,24 @@ static int ipath_close(struct inode *in,
 
 	mutex_lock(&ipath_mutex);
 
-	pd = port_fp(fp);
-	port = pd->port_port;
+	fd = (struct ipath_filedata *) fp->private_data;
 	fp->private_data = NULL;
+	pd = fd->pd;
+	if (!pd) {
+		mutex_unlock(&ipath_mutex);
+		goto bail;
+	}
+	if (--pd->port_cnt) {
+		/*
+		 * XXX If the master closes the port before the slave(s),
+		 * revoke the mmap for the eager receive queue so
+		 * the slave(s) don't wait for receive data forever.
+		 */
+		pd->active_slaves &= ~(1 << fd->subport);
+		mutex_unlock(&ipath_mutex);
+		goto bail;
+	}
+	port = pd->port_port;
 	dd = pd->port_dd;
 
 	if (pd->port_hdrqfull) {
@@ -1503,8 +1967,6 @@ static int ipath_close(struct inode *in,
 
 		/* clean up the pkeys for this port user */
 		ipath_clean_part_key(pd, dd);
-
-
 		/*
 		 * be paranoid, and never write 0's to these, just use an
 		 * unused part of the port 0 tail page.  Of course,
@@ -1523,39 +1985,49 @@ static int ipath_close(struct inode *in,
 		i = dd->ipath_pbufsport * (port - 1);
 		ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
 
+		dd->ipath_f_clear_tids(dd, pd->port_port);
+
 		if (dd->ipath_pageshadow)
 			unlock_expected_tids(pd);
 		ipath_stats.sps_ports--;
 		ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
 			   pd->port_comm, pd->port_pid,
 			   dd->ipath_unit, port);
-
-		dd->ipath_f_clear_tids(dd, pd->port_port);
 	}
 
-	pd->port_cnt = 0;
 	pd->port_pid = 0;
-
 	dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
 	mutex_unlock(&ipath_mutex);
 	ipath_free_pddata(dd, pd); /* after releasing the mutex */
 
+bail:
+	kfree(fd);
 	return ret;
 }
 
-static int ipath_port_info(struct ipath_portdata *pd,
+static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
 			   struct ipath_port_info __user *uinfo)
 {
 	struct ipath_port_info info;
 	int nup;
 	int ret;
+	size_t sz;
 
 	(void) ipath_count_units(NULL, &nup, NULL);
 	info.num_active = nup;
 	info.unit = pd->port_dd->ipath_unit;
 	info.port = pd->port_port;
+	info.subport = subport;
+	/* Don't return new fields if old library opened the port. */
+	if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) {
+		/* Number of user ports available for this device. */
+		info.num_ports = pd->port_dd->ipath_cfgports - 1;
+		info.num_subports = pd->port_subport_cnt;
+		sz = sizeof(info);
+	} else
+		sz = sizeof(info) - 2 * sizeof(u16);
 
-	if (copy_to_user(uinfo, &info, sizeof(info))) {
+	if (copy_to_user(uinfo, &info, sz)) {
 		ret = -EFAULT;
 		goto bail;
 	}
@@ -1565,6 +2037,16 @@ bail:
 	return ret;
 }
 
+static int ipath_get_slave_info(struct ipath_portdata *pd,
+				void __user *slave_mask_addr)
+{
+	int ret = 0;
+
+	if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
+		ret = -EFAULT;
+	return ret;
+}
+
 static ssize_t ipath_write(struct file *fp, const char __user *data,
 			   size_t count, loff_t *off)
 {
@@ -1591,6 +2073,8 @@ static ssize_t ipath_write(struct file *
 	consumed = sizeof(cmd.type);
 
 	switch (cmd.type) {
+	case IPATH_CMD_ASSIGN_PORT:
+	case __IPATH_CMD_USER_INIT:
 	case IPATH_CMD_USER_INIT:
 		copy = sizeof(cmd.cmd.user_info);
 		dest = &cmd.cmd.user_info;
@@ -1617,6 +2101,11 @@ static ssize_t ipath_write(struct file *
 		dest = &cmd.cmd.part_key;
 		src = &ucmd->cmd.part_key;
 		break;
+	case __IPATH_CMD_SLAVE_INFO:
+		copy = sizeof(cmd.cmd.slave_mask_addr);
+		dest = &cmd.cmd.slave_mask_addr;
+		src = &ucmd->cmd.slave_mask_addr;
+		break;
 	default:
 		ret = -EINVAL;
 		goto bail;
@@ -1634,34 +2123,55 @@ static ssize_t ipath_write(struct file *
 
 	consumed += copy;
 	pd = port_fp(fp);
+	if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
+		cmd.type != IPATH_CMD_ASSIGN_PORT) {
+		ret = -EINVAL;
+		goto bail;
+	}
 
 	switch (cmd.type) {
+	case IPATH_CMD_ASSIGN_PORT:
+		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+		if (ret)
+			goto bail;
+		break;
+	case __IPATH_CMD_USER_INIT:
+		/* backwards compatibility, get port first */
+		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+		if (ret)
+			goto bail;
+		/* and fall through to current version. */
 	case IPATH_CMD_USER_INIT:
-		ret = ipath_do_user_init(pd, &cmd.cmd.user_info);
-		if (ret < 0)
+		ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
+		if (ret)
 			goto bail;
 		ret = ipath_get_base_info(
-			pd, (void __user *) (unsigned long)
+			fp, (void __user *) (unsigned long)
 			cmd.cmd.user_info.spu_base_info,
 			cmd.cmd.user_info.spu_base_info_size);
 		break;
 	case IPATH_CMD_RECV_CTRL:
-		ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl);
+		ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
 		break;
 	case IPATH_CMD_PORT_INFO:
-		ret = ipath_port_info(pd,
+		ret = ipath_port_info(pd, subport_fp(fp),
 				      (struct ipath_port_info __user *)
 				      (unsigned long) cmd.cmd.port_info);
 		break;
 	case IPATH_CMD_TID_UPDATE:
-		ret = ipath_tid_update(pd, &cmd.cmd.tid_info);
+		ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
 		break;
 	case IPATH_CMD_TID_FREE:
-		ret = ipath_tid_free(pd, &cmd.cmd.tid_info);
+		ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
 		break;
 	case IPATH_CMD_SET_PART_KEY:
 		ret = ipath_set_part_key(pd, cmd.cmd.part_key);
 		break;
+	case __IPATH_CMD_SLAVE_INFO:
+		ret = ipath_get_slave_info(pd,
+					   (void __user *) (unsigned long)
+					   cmd.cmd.slave_mask_addr);
+		break;
 	}
 
 	if (ret >= 0)
@@ -1858,4 +2368,3 @@ void ipath_user_remove(struct ipath_devd
 bail:
 	return;
 }
-
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_fs.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -66,8 +66,8 @@ static int ipathfs_mknod(struct inode *d
 	inode->i_private = data;
 	if ((mode & S_IFMT) == S_IFDIR) {
 		inode->i_op = &simple_dir_inode_operations;
-		inode->i_nlink++;
-		dir->i_nlink++;
+		inc_nlink(inode);
+		inc_nlink(dir);
 	}
 
 	inode->i_fop = fops;
@@ -356,19 +356,16 @@ static ssize_t flash_write(struct file *
 
 	pos = *ppos;
 
-	if ( pos < 0) {
+	if (pos != 0) {
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	if (pos >= sizeof(struct ipath_flash)) {
-		ret = 0;
+	if (count != sizeof(struct ipath_flash)) {
+		ret = -EINVAL;
 		goto bail;
 	}
 
-	if (count > sizeof(struct ipath_flash) - pos)
-		count = sizeof(struct ipath_flash) - pos;
-
 	tmp = kmalloc(count, GFP_KERNEL);
 	if (!tmp) {
 		ret = -ENOMEM;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -207,8 +207,8 @@ static const struct ipath_kregs ipath_ht
 	.kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
 	.kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
 	/*
-	 * These should not be used directly via ipath_read_kreg64(),
-	 * use them with ipath_read_kreg64_port(),
+	 * These should not be used directly via ipath_write_kreg64(),
+	 * use them with ipath_write_kreg64_port(),
 	 */
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
@@ -252,8 +252,8 @@ static const struct ipath_cregs ipath_ht
 };
 
 /* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1FF
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
+#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
 
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 #define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
@@ -338,7 +338,7 @@ static void hwerr_crcbits(struct ipath_d
 	if (crcbits) {
 		u16 ctrl0, ctrl1;
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[HT%s lane %s CRC (%llx); ignore till reload]",
+			 "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
 			 !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
 			 "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
 				    ? "1 (B)" : "0+1 (A+B)"),
@@ -389,17 +389,28 @@ static void hwerr_crcbits(struct ipath_d
 				     _IPATH_HTLINK1_CRCBITS)));
 }
 
+/* 6110 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
+	INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
+	INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
+	INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
+	INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
+	INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
+	INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
+	INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
 /**
- * ipath_ht_handle_hwerrors - display hardware errors
+ * ipath_ht_handle_hwerrors - display hardware errors.
  * @dd: the infinipath device
  * @msg: the output buffer
  * @msgl: the size of the output buffer
  *
- * Use same msg buffer as regular errors to avoid
- * excessive stack use.  Most hardware errors are catastrophic, but for
- * right now, we'll print them and continue.
- * We reuse the same message buffer as ipath_handle_errors() to avoid
- * excessive stack usage.
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use.  Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue.  We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
  */
 static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 				     size_t msgl)
@@ -440,19 +451,49 @@ static void ipath_ht_handle_hwerrors(str
 	 * make sure we get this much out, unless told to be quiet,
 	 * or it's occurred within the last 5 seconds
 	 */
-	if ((hwerrs & ~dd->ipath_lasthwerror) ||
+	if ((hwerrs & ~(dd->ipath_lasthwerror |
+			((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			  INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
 	    (ipath_debug & __IPATH_VERBDBG))
 		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
 			 "(cleared)\n", (unsigned long long) hwerrs);
 	dd->ipath_lasthwerror |= hwerrs;
 
-	if (hwerrs & ~infinipath_hwe_bitsextant)
+	if (hwerrs & ~dd->ipath_hwe_bitsextant)
 		ipath_dev_err(dd, "hwerror interrupt with unknown errors "
 			      "%llx set\n", (unsigned long long)
-			      (hwerrs & ~infinipath_hwe_bitsextant));
+			      (hwerrs & ~dd->ipath_hwe_bitsextant));
 
 	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
 	if (ctrl & INFINIPATH_C_FREEZEMODE) {
+		/*
+		 * parity errors in send memory are recoverable,
+		 * just cancel the send (if indicated in * sendbuffererror),
+		 * count the occurrence, unfreeze (if no other handled
+		 * hardware error bits are set), and continue. They can
+		 * occur if a processor speculative read is done to the PIO
+		 * buffer while we are sending a packet, for example.
+		 */
+		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+			ipath_stats.sps_txeparity++;
+			ipath_dbg("Recovering from TXE parity error (%llu), "
+			    	  "hwerrstatus=%llx\n",
+				  (unsigned long long) ipath_stats.sps_txeparity,
+				  (unsigned long long) hwerrs);
+			ipath_disarm_senderrbufs(dd);
+			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+			if (!hwerrs) { /* else leave in freeze mode */
+				ipath_write_kreg(dd,
+						 dd->ipath_kregs->kr_control,
+						 dd->ipath_control);
+				return;
+			}
+		}
 		if (hwerrs) {
 			/*
 			 * if any set that we aren't ignoring; only
@@ -499,44 +540,16 @@ static void ipath_ht_handle_hwerrors(str
 			 bits);
 		strlcat(msg, bitsmsg, msgl);
 	}
-	if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
-		      << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
-	if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
-		      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
-	if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
-		strlcat(msg, "[IB2IPATH Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
-		strlcat(msg, "[IPATH2IB Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
-		strlcat(msg, "[HTC Ireq Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
-		strlcat(msg, "[HTC Treq Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
-		strlcat(msg, "[HTC Tresp Parity]", msgl);
+
+	ipath_format_hwerrors(hwerrs,
+			      ipath_6110_hwerror_msgs,
+			      sizeof(ipath_6110_hwerror_msgs) /
+			      sizeof(ipath_6110_hwerror_msgs[0]),
+			      msg, msgl);
 
 	if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
 		hwerr_crcbits(dd, hwerrs, msg, msgl);
 
-	if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
-		strlcat(msg, "[HT core Misc5]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
-		strlcat(msg, "[HT core Misc6]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
-		strlcat(msg, "[HT core Misc7]", msgl);
 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
 		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
@@ -573,11 +586,6 @@ static void ipath_ht_handle_hwerrors(str
 				 dd->ipath_hwerrmask);
 	}
 
-	if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
-		strlcat(msg, "[Rx Dsync]", msgl);
-	if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
-		strlcat(msg, "[SerDes PLL]", msgl);
-
 	ipath_dev_err(dd, "%s hardware error\n", msg);
 	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
 		/*
@@ -649,9 +657,9 @@ static int ipath_ht_boardname(struct ipa
 	if (n)
 		snprintf(name, namelen, "%s", n);
 
-	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
+	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 4)) {
 		/*
-		 * This version of the driver only supports Rev 3.2 and 3.3
+		 * This version of the driver only supports Rev 3.2 - 3.4
 		 */
 		ipath_dev_err(dd,
 			      "Unsupported InfiniPath hardware revision %u.%u!\n",
@@ -742,7 +750,6 @@ static int ipath_setup_ht_reset(struct i
 	return 0;
 }
 
-#define HT_CAPABILITY_ID   0x08	/* HT capabilities not defined in kernel */
 #define HT_INTR_DISC_CONFIG  0x80	/* HT interrupt and discovery cap */
 #define HT_INTR_REG_INDEX    2	/* intconfig requires indirect accesses */
 
@@ -973,7 +980,7 @@ static int ipath_setup_ht_config(struct 
 	 * do this early, before we ever enable errors or hardware errors,
 	 * mostly to avoid causing the chip to enter freeze mode.
 	 */
-	pos = pci_find_capability(pdev, HT_CAPABILITY_ID);
+	pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
 	if (!pos) {
 		ipath_dev_err(dd, "Couldn't find HyperTransport "
 			      "capability; no interrupts\n");
@@ -996,7 +1003,7 @@ static int ipath_setup_ht_config(struct 
 		else if (cap_type == HT_INTR_DISC_CONFIG)
 			ihandler = set_int_handler(dd, pdev, pos);
 	} while ((pos = pci_find_next_capability(pdev, pos,
-						 HT_CAPABILITY_ID)));
+						 PCI_CAP_ID_HT)));
 
 	if (!ihandler) {
 		ipath_dev_err(dd, "Couldn't find interrupt handler in "
@@ -1081,21 +1088,21 @@ static void ipath_setup_ht_setextled(str
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
 }
 
-static void ipath_init_ht_variables(void)
+static void ipath_init_ht_variables(struct ipath_devdata *dd)
 {
-	ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-	ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-	ipath_gpio_sda = IPATH_GPIO_SDA;
-	ipath_gpio_scl = IPATH_GPIO_SCL;
+	dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+	dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+	dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+	dd->ipath_gpio_scl = IPATH_GPIO_SCL;
 
-	infinipath_i_bitsextant =
+	dd->ipath_i_bitsextant =
 		(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
 		(INFINIPATH_I_RCVAVAIL_MASK <<
 		 INFINIPATH_I_RCVAVAIL_SHIFT) |
 		INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
 		INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
 
-	infinipath_e_bitsextant =
+	dd->ipath_e_bitsextant =
 		INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
 		INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
 		INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -1113,7 +1120,7 @@ static void ipath_init_ht_variables(void
 		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
 		INFINIPATH_E_HARDWARE;
 
-	infinipath_hwe_bitsextant =
+	dd->ipath_hwe_bitsextant =
 		(INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
 		 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
 		(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
@@ -1142,8 +1149,8 @@ static void ipath_init_ht_variables(void
 		INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
 		INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
 
-	infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-	infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
 }
 
 /**
@@ -1468,7 +1475,7 @@ static void ipath_ht_tidtemplate(struct 
 static int ipath_ht_early_init(struct ipath_devdata *dd)
 {
 	u32 __iomem *piobuf;
-	u32 pioincr, val32, egrsize;
+	u32 pioincr, val32;
 	int i;
 
 	/*
@@ -1488,7 +1495,6 @@ static int ipath_ht_early_init(struct ip
 	 * errors interrupts if we ever see one).
 	 */
 	dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
-	egrsize = dd->ipath_rcvegrbufsize;
 
 	/*
 	 * the min() check here is currently a nop, but it may not
@@ -1608,5 +1614,5 @@ void ipath_init_iba6110_funcs(struct ipa
 	 * do very early init that is needed before ipath_f_bus is
 	 * called
 	 */
-	ipath_init_ht_variables();
+	ipath_init_ht_variables(dd);
 }
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -207,8 +207,8 @@ static const struct ipath_kregs ipath_pe
 	.kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
 
 	/*
-	 * These should not be used directly via ipath_read_kreg64(),
-	 * use them with ipath_read_kreg64_port()
+	 * These should not be used directly via ipath_write_kreg64(),
+	 * use them with ipath_write_kreg64_port(),
 	 */
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
@@ -263,8 +263,8 @@ static const struct ipath_cregs ipath_pe
 };
 
 /* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1F
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
+#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1)
 
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 #define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK  0x000000000000003fULL
@@ -294,6 +294,33 @@ static const struct ipath_cregs ipath_pe
 #define IPATH_GPIO_SCL (1ULL << \
 	(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
 
+/*
+ * Rev2 silicon allows suppressing check for ArmLaunch errors.
+ * this can speed up short packet sends on systems that do
+ * not guaranteee write-order.
+ */
+#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
+
+/* 6120 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
+	INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
+	INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
+	/*
+	 * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+	 * parity or memory parity error failures, because most likely we
+	 * won't be able to talk to the core of the chip.  Nonetheless, we
+	 * might see them, if they are in parts of the PCIe core that aren't
+	 * essential.
+	 */
+	INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
+	INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
+	INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
+	INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
+	INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
+	INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
 /**
  * ipath_pe_handle_hwerrors - display hardware errors.
  * @dd: the infinipath device
@@ -343,19 +370,49 @@ static void ipath_pe_handle_hwerrors(str
 	 * make sure we get this much out, unless told to be quiet,
 	 * or it's occurred within the last 5 seconds
 	 */
-	if ((hwerrs & ~dd->ipath_lasthwerror) ||
+	if ((hwerrs & ~(dd->ipath_lasthwerror |
+			((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			  INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
 	    (ipath_debug & __IPATH_VERBDBG))
 		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
 			 "(cleared)\n", (unsigned long long) hwerrs);
 	dd->ipath_lasthwerror |= hwerrs;
 
-	if (hwerrs & ~infinipath_hwe_bitsextant)
+	if (hwerrs & ~dd->ipath_hwe_bitsextant)
 		ipath_dev_err(dd, "hwerror interrupt with unknown errors "
 			      "%llx set\n", (unsigned long long)
-			      (hwerrs & ~infinipath_hwe_bitsextant));
+			      (hwerrs & ~dd->ipath_hwe_bitsextant));
 
 	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
 	if (ctrl & INFINIPATH_C_FREEZEMODE) {
+		/*
+		 * parity errors in send memory are recoverable,
+		 * just cancel the send (if indicated in * sendbuffererror),
+		 * count the occurrence, unfreeze (if no other handled
+		 * hardware error bits are set), and continue. They can
+		 * occur if a processor speculative read is done to the PIO
+		 * buffer while we are sending a packet, for example.
+		 */
+		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+			ipath_stats.sps_txeparity++;
+			ipath_dbg("Recovering from TXE parity error (%llu), "
+			    	  "hwerrstatus=%llx\n",
+				  (unsigned long long) ipath_stats.sps_txeparity,
+				  (unsigned long long) hwerrs);
+			ipath_disarm_senderrbufs(dd);
+			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+			if (!hwerrs) { /* else leave in freeze mode */
+				ipath_write_kreg(dd,
+						 dd->ipath_kregs->kr_control,
+						 dd->ipath_control);
+			    return;
+			}
+		}
 		if (hwerrs) {
 			/*
 			 * if any set that we aren't ignoring only make the
@@ -379,9 +436,8 @@ static void ipath_pe_handle_hwerrors(str
 		} else {
 			ipath_dbg("Clearing freezemode on ignored hardware "
 				  "error\n");
-			ctrl &= ~INFINIPATH_C_FREEZEMODE;
 			ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-					 ctrl);
+			   		 dd->ipath_control);
 		}
 	}
 
@@ -396,24 +452,13 @@ static void ipath_pe_handle_hwerrors(str
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
 				 dd->ipath_hwerrmask);
 	}
-	if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
-		      << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
-	if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
-		      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
+
+	ipath_format_hwerrors(hwerrs,
+			      ipath_6120_hwerror_msgs,
+			      sizeof(ipath_6120_hwerror_msgs)/
+			      sizeof(ipath_6120_hwerror_msgs[0]),
+			      msg, msgl);
+
 	if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
 		      << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
 		bits = (u32) ((hwerrs >>
@@ -423,10 +468,6 @@ static void ipath_pe_handle_hwerrors(str
 			 "[PCIe Mem Parity Errs %x] ", bits);
 		strlcat(msg, bitsmsg, msgl);
 	}
-	if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
-		strlcat(msg, "[IB2IPATH Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
-		strlcat(msg, "[IPATH2IB Parity]", msgl);
 
 #define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |	\
 			 INFINIPATH_HWE_COREPLL_RFSLIP )
@@ -452,34 +493,6 @@ static void ipath_pe_handle_hwerrors(str
 				 dd->ipath_hwerrmask);
 	}
 
-	if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
-		strlcat(msg, "[PCIe Poisoned TLP]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
-		strlcat(msg, "[PCIe completion timeout]", msgl);
-
-	/*
-	 * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
-	 * parity or memory parity error failures, because most likely we
-	 * won't be able to talk to the core of the chip.  Nonetheless, we
-	 * might see them, if they are in parts of the PCIe core that aren't
-	 * essential.
-	 */
-	if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
-		strlcat(msg, "[PCIePLL1]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
-		strlcat(msg, "[PCIePLL0]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
-		strlcat(msg, "[PCIe XTLH core parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
-		strlcat(msg, "[PCIe ADM TX core parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
-		strlcat(msg, "[PCIe ADM RX core parity]", msgl);
-
-	if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
-		strlcat(msg, "[Rx Dsync]", msgl);
-	if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
-		strlcat(msg, "[SerDes PLL]", msgl);
-
 	ipath_dev_err(dd, "%s hardware error\n", msg);
 	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
 		/*
@@ -525,6 +538,9 @@ static int ipath_pe_boardname(struct ipa
 	case 5:
 		n = "InfiniPath_QMH7140";
 		break;
+	case 6:
+		n = "InfiniPath_QLE7142";
+		break;
 	default:
 		ipath_dev_err(dd,
 			      "Don't yet know about board with ID %u\n",
@@ -571,9 +587,12 @@ static void ipath_pe_init_hwerrors(struc
 	if (!dd->ipath_boardrev)	// no PLL for Emulator
 		val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
 
-	/* workaround bug 9460 in internal interface bus parity checking */
-	val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
-
+	if (dd->ipath_minrev < 2) {
+		/* workaround bug 9460 in internal interface bus parity
+		 * checking. Fixed (HW bug 9490) in Rev2.
+		 */
+		val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
+	}
 	dd->ipath_hwerrmask = val;
 }
 
@@ -583,8 +602,8 @@ static void ipath_pe_init_hwerrors(struc
  */
 static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
 {
-	u64 val, tmp, config1;
-	int ret = 0, change = 0;
+	u64 val, config1, prev_val;
+	int ret = 0;
 
 	ipath_dbg("Trying to bringup serdes\n");
 
@@ -614,7 +633,7 @@ static int ipath_pe_bringup_serdes(struc
 		| INFINIPATH_SERDC0_L1PWR_DN;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
 	/* be sure chip saw it */
-	tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 	udelay(5);		/* need pll reset set at least for a bit */
 	/*
 	 * after PLL is reset, set the per-lane Resets and TxIdle and
@@ -628,7 +647,7 @@ static int ipath_pe_bringup_serdes(struc
 		   "and txidle (%llx)\n", (unsigned long long) val);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
 	/* be sure chip saw it */
-	tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 	/* need PLL reset clear for at least 11 usec before lane
 	 * resets cleared; give it a few more to be sure */
 	udelay(15);
@@ -641,6 +660,7 @@ static int ipath_pe_bringup_serdes(struc
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+	prev_val = val;
 	if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
 	     INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
 		val &=
@@ -648,11 +668,9 @@ static int ipath_pe_bringup_serdes(struc
 			  INFINIPATH_XGXS_MDIOADDR_SHIFT);
 		/* MDIO address 3 */
 		val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
-		change = 1;
 	}
 	if (val & INFINIPATH_XGXS_RESET) {
 		val &= ~INFINIPATH_XGXS_RESET;
-		change = 1;
 	}
 	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
 	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
@@ -661,9 +679,19 @@ static int ipath_pe_bringup_serdes(struc
 		         INFINIPATH_XGXS_RX_POL_SHIFT);
 		val |= dd->ipath_rx_pol_inv <<
 			INFINIPATH_XGXS_RX_POL_SHIFT;
-		change = 1;
 	}
-	if (change)
+	if (dd->ipath_minrev >= 2) {
+		/* Rev 2. can tolerate multiple writes to PBC, and
+		 * allowing them can provide lower latency on some
+		 * CPUs, but this feature is off by default, only
+		 * turned on by setting D63 of XGXSconfig reg.
+		 * May want to make this conditional more
+		 * fine-grained in future. This is not exactly
+		 * related to XGXS, but where the bit ended up.
+		 */
+		val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
+	}
+	if (val != prev_val)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
@@ -717,9 +745,25 @@ static void ipath_pe_quiet_serdes(struct
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
 }
 
-/* this is not yet needed on this chip, so just return 0. */
 static int ipath_pe_intconfig(struct ipath_devdata *dd)
 {
+	u64 val;
+	u32 chiprev;
+
+	/*
+	 * If the chip supports added error indication via GPIO pins,
+	 * enable interrupts on those bits so the interrupt routine
+	 * can count the events. Also set flag so interrupt routine
+	 * can know they are expected.
+	 */
+	chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT;
+	if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
+		/* Rev2+ reports extra errors via internal GPIO pins */
+		dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+		val |= IPATH_GPIO_ERRINTR_MASK;
+		ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+	}
 	return 0;
 }
 
@@ -853,21 +897,23 @@ static int ipath_setup_pe_config(struct 
 	return 0;
 }
 
-static void ipath_init_pe_variables(void)
+static void ipath_init_pe_variables(struct ipath_devdata *dd)
 {
 	/*
 	 * bits for selecting i2c direction and values,
 	 * used for I2C serial flash
 	 */
-	ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-	ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-	ipath_gpio_sda = IPATH_GPIO_SDA;
-	ipath_gpio_scl = IPATH_GPIO_SCL;
+	dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+	dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+	dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+	dd->ipath_gpio_scl = IPATH_GPIO_SCL;
 
 	/* variables for sanity checking interrupt and errors */
-	infinipath_hwe_bitsextant =
+	dd->ipath_hwe_bitsextant =
 		(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
 		 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+		(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+		 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
 		(INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
 		 INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
 		INFINIPATH_HWE_PCIE1PLLFAILED |
@@ -883,13 +929,13 @@ static void ipath_init_pe_variables(void
 		INFINIPATH_HWE_SERDESPLLFAILED |
 		INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
 		INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
-	infinipath_i_bitsextant =
+	dd->ipath_i_bitsextant =
 		(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
 		(INFINIPATH_I_RCVAVAIL_MASK <<
 		 INFINIPATH_I_RCVAVAIL_SHIFT) |
 		INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
 		INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
-	infinipath_e_bitsextant =
+	dd->ipath_e_bitsextant =
 		INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
 		INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
 		INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -907,8 +953,8 @@ static void ipath_init_pe_variables(void
 		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
 		INFINIPATH_E_HARDWARE;
 
-	infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-	infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
 }
 
 /* setup the MSI stuff again after a reset.  I'd like to just call
@@ -1082,6 +1128,45 @@ static void ipath_pe_put_tid(struct ipat
 	mmiowb();
 	spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
 }
+/**
+ * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for selection of the
+ * appropriate "flavor". The static calls in cleanup just use the
+ * revision-agnostic form, as they are not performance critical.
+ */
+static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
+			     u32 type, unsigned long pa)
+{
+	u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+
+	if (pa != dd->ipath_tidinvalid) {
+		if (pa & ((1U << 11) - 1)) {
+			dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+				 "not 2KB aligned!\n", pa);
+			return;
+		}
+		pa >>= 11;
+		/* paranoia check */
+		if (pa & (7<<29))
+			ipath_dev_err(dd,
+				      "BUG: Physical page address 0x%lx "
+				      "has bits set in 31-29\n", pa);
+
+		if (type == 0)
+			pa |= dd->ipath_tidtemplate;
+		else /* for now, always full 4KB page */
+			pa |= 2 << 29;
+	}
+	if (dd->ipath_kregbase)
+		writel(pa, tidp32);
+	mmiowb();
+}
+
 
 /**
  * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
@@ -1203,7 +1288,7 @@ int __attribute__((weak)) ipath_unordere
 
 /**
  * ipath_init_pe_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
+ * @pd: the infinipath port
  * @kbase: ipath_base_info pointer
  *
  * We set the PCIE flag because the lower bandwidth on PCIe vs
@@ -1212,6 +1297,7 @@ int __attribute__((weak)) ipath_unordere
 static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
 {
 	struct ipath_base_info *kinfo = kbase;
+	struct ipath_devdata *dd;
 
 	if (ipath_unordered_wc()) {
 		kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
@@ -1220,8 +1306,20 @@ static int ipath_pe_get_base_info(struct
 	else
 		ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
 
-	kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
+	if (pd == NULL)
+		goto done;
 
+	dd = pd->port_dd;
+
+	if (dd != NULL && dd->ipath_minrev >= 2) {
+		ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
+		kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
+		ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
+		kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
+	}
+
+done:
+	kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
 	return 0;
 }
 
@@ -1244,7 +1342,10 @@ void ipath_init_iba6120_funcs(struct ipa
 	dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
 	dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
 	dd->ipath_f_clear_tids = ipath_pe_clear_tids;
-	dd->ipath_f_put_tid = ipath_pe_put_tid;
+	if (dd->ipath_minrev >= 2)
+		dd->ipath_f_put_tid = ipath_pe_put_tid_2;
+	else
+		dd->ipath_f_put_tid = ipath_pe_put_tid;
 	dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
 	dd->ipath_f_setextled = ipath_setup_pe_setextled;
 	dd->ipath_f_get_base_info = ipath_pe_get_base_info;
@@ -1259,6 +1360,6 @@ void ipath_init_iba6120_funcs(struct ipa
 	dd->ipath_kregs = &ipath_pe_kregs;
 	dd->ipath_cregs = &ipath_pe_cregs;
 
-	ipath_init_pe_variables();
+	ipath_init_pe_variables(dd);
 }
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -88,13 +88,13 @@ MODULE_PARM_DESC(kpiobufs, "Set number o
 static int create_port0_egr(struct ipath_devdata *dd)
 {
 	unsigned e, egrcnt;
-	struct sk_buff **skbs;
+	struct ipath_skbinfo *skbinfo;
 	int ret;
 
 	egrcnt = dd->ipath_rcvegrcnt;
 
-	skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
-	if (skbs == NULL) {
+	skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
+	if (skbinfo == NULL) {
 		ipath_dev_err(dd, "allocation error for eager TID "
 			      "skb array\n");
 		ret = -ENOMEM;
@@ -109,13 +109,13 @@ static int create_port0_egr(struct ipath
 		 * 4 bytes so that the data buffer stays word aligned.
 		 * See ipath_kreceive() for more details.
 		 */
-		skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL);
-		if (!skbs[e]) {
+		skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
+		if (!skbinfo[e].skb) {
 			ipath_dev_err(dd, "SKB allocation error for "
 				      "eager TID %u\n", e);
 			while (e != 0)
-				dev_kfree_skb(skbs[--e]);
-			vfree(skbs);
+				dev_kfree_skb(skbinfo[--e].skb);
+			vfree(skbinfo);
 			ret = -ENOMEM;
 			goto bail;
 		}
@@ -124,14 +124,17 @@ static int create_port0_egr(struct ipath
 	 * After loop above, so we can test non-NULL to see if ready
 	 * to use at receive, etc.
 	 */
-	dd->ipath_port0_skbs = skbs;
+	dd->ipath_port0_skbinfo = skbinfo;
 
 	for (e = 0; e < egrcnt; e++) {
-		unsigned long phys =
-			virt_to_phys(dd->ipath_port0_skbs[e]->data);
+		dd->ipath_port0_skbinfo[e].phys =
+		  ipath_map_single(dd->pcidev,
+				   dd->ipath_port0_skbinfo[e].skb->data,
+				   dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
 		dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
 				    ((char __iomem *) dd->ipath_kregbase +
-				     dd->ipath_rcvegrbase), 0, phys);
+				     dd->ipath_rcvegrbase), 0,
+				    dd->ipath_port0_skbinfo[e].phys);
 	}
 
 	ret = 0;
@@ -344,10 +347,9 @@ done:
 static int init_chip_reset(struct ipath_devdata *dd,
 			   struct ipath_portdata **pdp)
 {
-	struct ipath_portdata *pd;
 	u32 rtmp;
 
-	*pdp = pd = dd->ipath_pd[0];
+	*pdp = dd->ipath_pd[0];
 	/* ensure chip does no sends or receives while we re-initialize */
 	dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 0);
@@ -432,16 +434,33 @@ done:
  */
 static void init_shadow_tids(struct ipath_devdata *dd)
 {
-	dd->ipath_pageshadow = (struct page **)
-		vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+	struct page **pages;
+	dma_addr_t *addrs;
+
+	pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
 			sizeof(struct page *));
-	if (!dd->ipath_pageshadow)
+	if (!pages) {
 		ipath_dev_err(dd, "failed to allocate shadow page * "
 			      "array, no expected sends!\n");
-	else
-		memset(dd->ipath_pageshadow, 0,
-		       dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-		       sizeof(struct page *));
+		dd->ipath_pageshadow = NULL;
+		return;
+	}
+
+	addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+			sizeof(dma_addr_t));
+	if (!addrs) {
+		ipath_dev_err(dd, "failed to allocate shadow dma handle "
+			      "array, no expected sends!\n");
+		vfree(dd->ipath_pageshadow);
+		dd->ipath_pageshadow = NULL;
+		return;
+	}
+
+	memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+	       sizeof(struct page *));
+
+	dd->ipath_pageshadow = pages;
+	dd->ipath_physshadow = addrs;
 }
 
 static void enable_chip(struct ipath_devdata *dd,
@@ -649,6 +668,7 @@ int ipath_init_chip(struct ipath_devdata
 {
 	int ret = 0, i;
 	u32 val32, kpiobufs;
+	u32 piobufs, uports;
 	u64 val;
 	struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
 	gfp_t gfp_flags = GFP_USER | __GFP_COMP;
@@ -683,16 +703,17 @@ int ipath_init_chip(struct ipath_devdata
 	 * the in memory DMA'ed copies of the registers.  This has to
 	 * be done early, before we calculate lastport, etc.
 	 */
-	val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+	piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
 	/*
 	 * calc number of pioavail registers, and save it; we have 2
 	 * bits per buffer.
 	 */
-	dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
+	dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
+	uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
 	if (ipath_kpiobufs == 0) {
 		/* not set by user (this is default) */
-		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
+		if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
 			kpiobufs = 32;
 		else
 			kpiobufs = 16;
@@ -700,31 +721,25 @@ int ipath_init_chip(struct ipath_devdata
 	else
 		kpiobufs = ipath_kpiobufs;
 
-	if (kpiobufs >
-	    (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-	     (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) {
-		i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-			(dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
+	if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
+		i = (int) piobufs -
+			(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
 		if (i < 0)
 			i = 0;
-		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for "
-			 "kernel leaves too few for %d user ports "
+		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
+			 "%d for kernel leaves too few for %d user ports "
 			 "(%d each); using %u\n", kpiobufs,
-			 dd->ipath_cfgports - 1,
-			 IPATH_MIN_USER_PORT_BUFCNT, i);
+			 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
 		/*
 		 * shouldn't change ipath_kpiobufs, because could be
 		 * different for different devices...
 		 */
 		kpiobufs = i;
 	}
-	dd->ipath_lastport_piobuf =
-		dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs;
-	dd->ipath_pbufsport = dd->ipath_cfgports > 1
-		? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1)
-		: 0;
-	val32 = dd->ipath_lastport_piobuf -
-		(dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
+	dd->ipath_lastport_piobuf = piobufs - kpiobufs;
+	dd->ipath_pbufsport =
+		uports ? dd->ipath_lastport_piobuf / uports : 0;
+	val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
 	if (val32 > 0) {
 		ipath_dbg("allocating %u pbufs/port leaves %u unused, "
 			  "add to kernel\n", dd->ipath_pbufsport, val32);
@@ -735,8 +750,7 @@ int ipath_init_chip(struct ipath_devdata
 	dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
 	ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
 		   "each for %u user ports\n", kpiobufs,
-		   dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
-		   dd->ipath_pbufsport, dd->ipath_cfgports - 1);
+		   piobufs, dd->ipath_pbufsport, uports);
 
 	dd->ipath_f_early_init(dd);
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_intr.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -37,6 +37,50 @@
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ */
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+{
+	u32 piobcnt;
+	unsigned long sbuf[4];
+	/*
+	 * it's possible that sendbuffererror could have bits set; might
+	 * have already done this as a result of hardware error handling
+	 */
+	piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+	/* read these before writing errorclear */
+	sbuf[0] = ipath_read_kreg64(
+		dd, dd->ipath_kregs->kr_sendbuffererror);
+	sbuf[1] = ipath_read_kreg64(
+		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
+	if (piobcnt > 128) {
+		sbuf[2] = ipath_read_kreg64(
+			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
+		sbuf[3] = ipath_read_kreg64(
+			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
+	}
+
+	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
+		int i;
+		if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
+			__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
+					  "SendbufErrs %lx %lx", sbuf[0],
+					  sbuf[1]);
+			if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
+				printk(" %lx %lx ", sbuf[2], sbuf[3]);
+			printk("\n");
+		}
+
+		for (i = 0; i < piobcnt; i++)
+			if (test_bit(i, sbuf))
+				ipath_disarm_piobufs(dd, i, 1);
+		dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
+	}
+}
+
+
 /* These are all rcv-related errors which we want to count for stats */
 #define E_SUM_PKTERRS \
 	(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
@@ -68,53 +112,9 @@
 
 static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
 {
-	unsigned long sbuf[4];
 	u64 ignore_this_time = 0;
-	u32 piobcnt;
 
-	/* if possible that sendbuffererror could be valid */
-	piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	/* read these before writing errorclear */
-	sbuf[0] = ipath_read_kreg64(
-		dd, dd->ipath_kregs->kr_sendbuffererror);
-	sbuf[1] = ipath_read_kreg64(
-		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
-	if (piobcnt > 128) {
-		sbuf[2] = ipath_read_kreg64(
-			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
-		sbuf[3] = ipath_read_kreg64(
-			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
-	}
-
-	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
-		int i;
-
-		ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
-		if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
-			printk("%lx %lx ", sbuf[2], sbuf[3]);
-		for (i = 0; i < piobcnt; i++) {
-			if (test_bit(i, sbuf)) {
-				u32 __iomem *piobuf;
-				if (i < dd->ipath_piobcnt2k)
-					piobuf = (u32 __iomem *)
-						(dd->ipath_pio2kbase +
-						 i * dd->ipath_palign);
-				else
-					piobuf = (u32 __iomem *)
-						(dd->ipath_pio4kbase +
-						 (i - dd->ipath_piobcnt2k) *
-						 dd->ipath_4kalign);
-
-				ipath_cdbg(PKT,
-					   "PIObuf[%u] @%p pbc is %x; ",
-					   i, piobuf, readl(piobuf));
-
-				ipath_disarm_piobufs(dd, i, 1);
-			}
-		}
-		if (ipath_debug & __IPATH_PKTDBG)
-			printk("\n");
-	}
+	ipath_disarm_senderrbufs(dd);
 	if ((errs & E_SUM_LINK_PKTERRS) &&
 	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
 		/*
@@ -132,6 +132,82 @@ static u64 handle_e_sum_errs(struct ipat
 	return ignore_this_time;
 }
 
+/* generic hw error messages... */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
+	{ \
+		.mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a <<    \
+			  INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ),   \
+		.msg = "TXE " #a " Memory Parity"	     \
+	}
+#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
+	{ \
+		.mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a <<    \
+			  INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ),   \
+		.msg = "RXE " #a " Memory Parity"	     \
+	}
+
+static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
+	INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
+	INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
+
+	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
+	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
+	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
+
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
+};
+
+/**
+ * ipath_format_hwmsg - format a single hwerror message
+ * @msg message buffer
+ * @msgl length of message buffer
+ * @hwmsg message to add to message buffer
+ */
+static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
+{
+	strlcat(msg, "[", msgl);
+	strlcat(msg, hwmsg, msgl);
+	strlcat(msg, "]", msgl);
+}
+
+/**
+ * ipath_format_hwerrors - format hardware error messages for display
+ * @hwerrs hardware errors bit vector
+ * @hwerrmsgs hardware error descriptions
+ * @nhwerrmsgs number of hwerrmsgs
+ * @msg message buffer
+ * @msgl message buffer length
+ */
+void ipath_format_hwerrors(u64 hwerrs,
+			   const struct ipath_hwerror_msgs *hwerrmsgs,
+			   size_t nhwerrmsgs,
+			   char *msg, size_t msgl)
+{
+	int i;
+	const int glen =
+	    sizeof(ipath_generic_hwerror_msgs) /
+	    sizeof(ipath_generic_hwerror_msgs[0]);
+
+	for (i=0; i<glen; i++) {
+		if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
+			ipath_format_hwmsg(msg, msgl,
+					   ipath_generic_hwerror_msgs[i].msg);
+		}
+	}
+
+	for (i=0; i<nhwerrmsgs; i++) {
+		if (hwerrs & hwerrmsgs[i].mask) {
+			ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
+		}
+	}
+}
+
 /* return the strings for the most common link states */
 static char *ib_linkstate(u32 linkstate)
 {
@@ -327,10 +403,13 @@ static void handle_supp_msgs(struct ipat
 	 * happens so often we never want to count it.
 	 */
 	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-		ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
-				 ~INFINIPATH_E_IBSTATUSCHANGED);
+		int iserr;
+		iserr = ipath_decode_err(msg, sizeof msg,
+				dd->ipath_lasterror &
+				~INFINIPATH_E_IBSTATUSCHANGED);
 		if (dd->ipath_lasterror &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+		    	~(INFINIPATH_E_RRCVEGRFULL |
+			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
 			ipath_dev_err(dd, "Suppressed %u messages for "
 				      "fast-repeating errors (%s) (%llx)\n",
 				      supp_msgs, msg,
@@ -344,8 +423,13 @@ static void handle_supp_msgs(struct ipat
 			 * them. So only complain about these at debug
 			 * level.
 			 */
-			ipath_dbg("Suppressed %u messages for %s\n",
-				  supp_msgs, msg);
+			if(iserr)
+				ipath_dbg("Suppressed %u messages for %s\n",
+					  supp_msgs, msg);
+			else
+				ipath_cdbg(ERRPKT,
+					"Suppressed %u messages for %s\n",
+					  supp_msgs, msg);
 		}
 	}
 }
@@ -386,7 +470,7 @@ static int handle_errors(struct ipath_de
 {
 	char msg[512];
 	u64 ignore_this_time = 0;
-	int i;
+	int i, iserr = 0;
 	int chkerrpkts = 0, noprint = 0;
 	unsigned supp_msgs;
 
@@ -404,10 +488,10 @@ static int handle_errors(struct ipath_de
 		dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
 	}
 
-	if (!noprint && (errs & ~infinipath_e_bitsextant))
+	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
 		ipath_dev_err(dd, "error interrupt with unknown errors "
 			      "%llx set\n", (unsigned long long)
-			      (errs & ~infinipath_e_bitsextant));
+			      (errs & ~dd->ipath_e_bitsextant));
 
 	if (errs & E_SUM_ERRS)
 		ignore_this_time = handle_e_sum_errs(dd, errs);
@@ -426,6 +510,7 @@ static int handle_errors(struct ipath_de
 	}
 
 	if (supp_msgs == 250000) {
+		int s_iserr;
 		/*
 		 * It's not entirely reasonable assuming that the errors set
 		 * in the last clear period are all responsible for the
@@ -435,17 +520,17 @@ static int handle_errors(struct ipath_de
 		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
 				 ~dd->ipath_maskederrs);
-		ipath_decode_err(msg, sizeof msg,
+		s_iserr = ipath_decode_err(msg, sizeof msg,
 				 (dd->ipath_maskederrs & ~dd->
 				  ipath_ignorederrs));
 
 		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
-			ipath_dev_err(dd, "Disabling error(s) %llx because "
-				      "occurring too frequently (%s)\n",
-				      (unsigned long long)
-				      (dd->ipath_maskederrs &
-				       ~dd->ipath_ignorederrs), msg);
+			~(INFINIPATH_E_RRCVEGRFULL |
+			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+			ipath_dev_err(dd, "Temporarily disabling "
+			    "error(s) %llx reporting; too frequent (%s)\n",
+				(unsigned long long) (dd->ipath_maskederrs &
+				~dd->ipath_ignorederrs), msg);
 		else {
 			/*
 			 * rcvegrfull and rcvhdrqfull are "normal",
@@ -454,8 +539,15 @@ static int handle_errors(struct ipath_de
 			 * processing them.  So only complain about
 			 * these at debug level.
 			 */
-			ipath_dbg("Disabling frequent queue full errors "
-				  "(%s)\n", msg);
+			if(s_iserr)
+				ipath_dbg("Temporarily disabling reporting "
+				    "too frequent queue full errors (%s)\n",
+				    msg);
+			else
+				ipath_cdbg(ERRPKT,
+				    "Temporarily disabling reporting too"
+				    " frequent packet errors (%s)\n",
+				    msg);
 		}
 
 		/*
@@ -478,6 +570,14 @@ static int handle_errors(struct ipath_de
 			~(INFINIPATH_E_HARDWARE |
 			  INFINIPATH_E_IBSTATUSCHANGED);
 	}
+
+	/* likely due to cancel, so suppress */
+	if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
+		dd->ipath_lastcancel > jiffies) {
+		ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n");
+		errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
+	}
+
 	if (!errs)
 		return 0;
 
@@ -505,6 +605,8 @@ static int handle_errors(struct ipath_de
 		ipath_stats.sps_crcerrs++;
 		chkerrpkts = 1;
 	}
+	iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
+
 
 	/*
 	 * We don't want to print these two as they happen, or we can make
@@ -514,10 +616,9 @@ static int handle_errors(struct ipath_de
 	 * on close
 	 */
 	if (errs & INFINIPATH_E_RRCVHDRFULL) {
-		int any;
 		u32 hd, tl;
 		ipath_stats.sps_hdrqfull++;
-		for (any = i = 0; i < dd->ipath_cfgports; i++) {
+		for (i = 0; i < dd->ipath_cfgports; i++) {
 			struct ipath_portdata *pd = dd->ipath_pd[i];
 			if (i == 0) {
 				hd = dd->ipath_port0head;
@@ -529,7 +630,7 @@ static int handle_errors(struct ipath_de
 				 * don't report same point multiple times,
 				 * except kernel
 				 */
-				tl = (u32) * pd->port_rcvhdrtail_kvaddr;
+				tl = *(u64 *) pd->port_rcvhdrtail_kvaddr;
 				if (tl == dd->ipath_lastrcvhdrqtails[i])
 					continue;
 				hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
@@ -594,8 +695,13 @@ static int handle_errors(struct ipath_de
 		*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
 	}
 
-	if (!noprint && *msg)
-		ipath_dev_err(dd, "%s error\n", msg);
+	if (!noprint && *msg) {
+		if(iserr)
+			ipath_dev_err(dd, "%s error\n", msg);
+		else
+			dev_info(&dd->pcidev->dev, "%s packet problems\n",
+				msg);
+	}
 	if (dd->ipath_state_wanted & dd->ipath_flags) {
 		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
 			   "waking\n", dd->ipath_state_wanted,
@@ -729,18 +835,17 @@ static void handle_urcv(struct ipath_dev
 	int rcvdint = 0;
 
 	portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
-		 infinipath_i_rcvavail_mask)
+		 dd->ipath_i_rcvavail_mask)
 		| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
-		   infinipath_i_rcvurg_mask);
+		   dd->ipath_i_rcvurg_mask);
 	for (i = 1; i < dd->ipath_cfgports; i++) {
 		struct ipath_portdata *pd = dd->ipath_pd[i];
 		if (portr & (1 << i) && pd && pd->port_cnt &&
 			test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
-			int rcbit;
 			clear_bit(IPATH_PORT_WAITING_RCV,
 				  &pd->port_flag);
-			rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
-			clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
+			clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
+				  &dd->ipath_rcvctrl);
 			wake_up_interruptible(&pd->port_wait);
 			rcvdint = 1;
 		}
@@ -755,7 +860,7 @@ static void handle_urcv(struct ipath_dev
 	}
 }
 
-irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
+irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *ignored)
 {
 	struct ipath_devdata *dd = data;
 	u32 istat, chk0rcv = 0;
@@ -768,6 +873,9 @@ irqreturn_t ipath_intr(int irq, void *da
 
 	ipath_stats.sps_ints++;
 
+	if (dd->ipath_int_counter != (u32) -1)
+		dd->ipath_int_counter++;
+
 	if (!(dd->ipath_flags & IPATH_PRESENT)) {
 		/*
 		 * This return value is not great, but we do not want the
@@ -808,7 +916,7 @@ irqreturn_t ipath_intr(int irq, void *da
 	if (oldhead != curtail) {
 		if (dd->ipath_flags & IPATH_GPIO_INTR) {
 			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-					 (u64) (1 << 2));
+					 (u64) (1 << IPATH_GPIO_PORT0_BIT));
 			istat = port0rbits | INFINIPATH_I_GPIO;
 		}
 		else
@@ -838,10 +946,10 @@ irqreturn_t ipath_intr(int irq, void *da
 	if (unexpected)
 		unexpected = 0;
 
-	if (unlikely(istat & ~infinipath_i_bitsextant))
+	if (unlikely(istat & ~dd->ipath_i_bitsextant))
 		ipath_dev_err(dd,
 			      "interrupt with unknown interrupts %x set\n",
-			      istat & (u32) ~ infinipath_i_bitsextant);
+			      istat & (u32) ~ dd->ipath_i_bitsextant);
 	else
 		ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
 
@@ -867,26 +975,80 @@ irqreturn_t ipath_intr(int irq, void *da
 
 	if (istat & INFINIPATH_I_GPIO) {
 		/*
-		 * Packets are available in the port 0 rcv queue.
-		 * Eventually this needs to be generalized to check
-		 * IPATH_GPIO_INTR, and the specific GPIO bit, if
-		 * GPIO interrupts are used for anything else.
-		 */
-		if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
-			u32 gpiostatus;
-			gpiostatus = ipath_read_kreg32(
-				dd, dd->ipath_kregs->kr_gpio_status);
-			ipath_dbg("Unexpected GPIO interrupt bits %x\n",
-				  gpiostatus);
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-					 gpiostatus);
+		 * GPIO interrupts fall in two broad classes:
+		 * GPIO_2 indicates (on some HT4xx boards) that a packet
+		 *        has arrived for Port 0. Checking for this
+		 *        is controlled by flag IPATH_GPIO_INTR.
+		 * GPIO_3..5 on IBA6120 Rev2 chips indicate errors
+		 *        that we need to count. Checking for this
+		 *        is controlled by flag IPATH_GPIO_ERRINTRS.
+		 */
+		u32 gpiostatus;
+		u32 to_clear = 0;
+
+		gpiostatus = ipath_read_kreg32(
+			dd, dd->ipath_kregs->kr_gpio_status);
+		/* First the error-counter case.
+		 */
+		if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
+		    (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
+			/* want to clear the bits we see asserted. */
+			to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
+
+			/*
+			 * Count appropriately, clear bits out of our copy,
+			 * as they have been "handled".
+			 */
+			if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
+				ipath_dbg("FlowCtl on UnsupVL\n");
+				dd->ipath_rxfc_unsupvl_errs++;
+			}
+			if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
+				ipath_dbg("Overrun Threshold exceeded\n");
+				dd->ipath_overrun_thresh_errs++;
+			}
+			if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
+				ipath_dbg("Local Link Integrity error\n");
+				dd->ipath_lli_errs++;
+			}
+			gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
 		}
-		else {
-			/* Clear GPIO status bit 2 */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-					(u64) (1 << 2));
+		/* Now the Port0 Receive case */
+		if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
+		    (dd->ipath_flags & IPATH_GPIO_INTR)) {
+			/*
+			 * GPIO status bit 2 is set, and we expected it.
+			 * clear it and indicate in p0bits.
+			 * This probably only happens if a Port0 pkt
+			 * arrives at _just_ the wrong time, and we
+			 * handle that by seting chk0rcv;
+			 */
+			to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
+			gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
 			chk0rcv = 1;
 		}
+		if (unlikely(gpiostatus)) {
+			/*
+			 * Some unexpected bits remain. If they could have
+			 * caused the interrupt, complain and clear.
+			 * MEA: this is almost certainly non-ideal.
+			 * we should look into auto-disable of unexpected
+			 * GPIO interrupts, possibly on a "three strikes"
+			 * basis.
+			 */
+			u32 mask;
+			mask = ipath_read_kreg32(
+				dd, dd->ipath_kregs->kr_gpio_mask);
+			if (mask & gpiostatus) {
+				ipath_dbg("Unexpected GPIO IRQ bits %x\n",
+				  gpiostatus & mask);
+				to_clear |= (gpiostatus & mask);
+			}
+		}
+		if (to_clear) {
+			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
+					(u64) to_clear);
+		}
 	}
 	chk0rcv |= istat & port0rbits;
 
@@ -911,9 +1073,9 @@ irqreturn_t ipath_intr(int irq, void *da
 		istat &= ~port0rbits;
 	}
 
-	if (istat & ((infinipath_i_rcvavail_mask <<
+	if (istat & ((dd->ipath_i_rcvavail_mask <<
 		      INFINIPATH_I_RCVAVAIL_SHIFT)
-		     | (infinipath_i_rcvurg_mask <<
+		     | (dd->ipath_i_rcvurg_mask <<
 			INFINIPATH_I_RCVURG_SHIFT)))
 		handle_urcv(dd, istat);
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -39,6 +39,8 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
 #include <asm/io.h>
 
 #include "ipath_common.h"
@@ -62,7 +64,7 @@ struct ipath_portdata {
 	/* rcvhdrq base, needs mmap before useful */
 	void *port_rcvhdrq;
 	/* kernel virtual address where hdrqtail is updated */
-	volatile __le64 *port_rcvhdrtail_kvaddr;
+	void *port_rcvhdrtail_kvaddr;
 	/*
 	 * temp buffer for expected send setup, allocated at open, instead
 	 * of each setup call
@@ -79,8 +81,8 @@ struct ipath_portdata {
 	dma_addr_t port_rcvhdrq_phys;
 	dma_addr_t port_rcvhdrqtailaddr_phys;
 	/*
-	 * number of opens on this instance (0 or 1; ignoring forks, dup,
-	 * etc. for now)
+	 * number of opens (including slave subports) on this instance
+	 * (ignoring forks, dup, etc. for now)
 	 */
 	int port_cnt;
 	/*
@@ -89,6 +91,10 @@ struct ipath_portdata {
 	 */
 	/* instead of calculating it */
 	unsigned port_port;
+	/* non-zero if port is being shared. */
+	u16 port_subport_cnt;
+	/* non-zero if port is being shared. */
+	u16 port_subport_id;
 	/* chip offset of PIO buffers for this port */
 	u32 port_piobufs;
 	/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -121,6 +127,16 @@ struct ipath_portdata {
 	u16 port_pkeys[4];
 	/* so file ops can get at unit */
 	struct ipath_devdata *port_dd;
+	/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
+	void *subport_uregbase;
+	/* An array of pages for the eager receive buffers * N */
+	void *subport_rcvegrbuf;
+	/* An array of pages for the eager header queue entries * N */
+	void *subport_rcvhdr_base;
+	/* The version of the library which opened this port */
+	u32 userversion;
+	/* Bitmask of active slaves */
+	u32 active_slaves;
 };
 
 struct sk_buff;
@@ -132,6 +148,11 @@ struct _ipath_layer {
 	void *l_arg;
 };
 
+struct ipath_skbinfo {
+	struct sk_buff *skb;
+	dma_addr_t phys;
+};
+
 struct ipath_devdata {
 	struct list_head ipath_list;
 
@@ -154,7 +175,7 @@ struct ipath_devdata {
 	/* ipath_cfgports pointers */
 	struct ipath_portdata **ipath_pd;
 	/* sk_buffs used by port 0 eager receive queue */
-	struct sk_buff **ipath_port0_skbs;
+	struct ipath_skbinfo *ipath_port0_skbinfo;
 	/* kvirt address of 1st 2k pio buffer */
 	void __iomem *ipath_pio2kbase;
 	/* kvirt address of 1st 4k pio buffer */
@@ -252,6 +273,8 @@ struct ipath_devdata {
 	u32 ipath_lastport_piobuf;
 	/* is a stats timer active */
 	u32 ipath_stats_timer_active;
+	/* number of interrupts for this device -- saturates... */
+	u32 ipath_int_counter;
 	/* dwords sent read from counter */
 	u32 ipath_lastsword;
 	/* dwords received read from counter */
@@ -315,12 +338,16 @@ struct ipath_devdata {
 	u8 ipath_ht_slave_off;
 	/* for write combining settings */
 	unsigned long ipath_wc_cookie;
+	unsigned long ipath_wc_base;
+	unsigned long ipath_wc_len;
 	/* ref count for each pkey */
 	atomic_t ipath_pkeyrefs[4];
 	/* shadow copy of all exptids physaddr; used only by funcsim */
 	u64 *ipath_tidsimshadow;
 	/* shadow copy of struct page *'s for exp tid pages */
 	struct page **ipath_pageshadow;
+	/* shadow copy of dma handles for exp tid pages */
+	dma_addr_t *ipath_physshadow;
 	/* lock to workaround chip bug 9437 */
 	spinlock_t ipath_tid_lock;
 
@@ -402,6 +429,9 @@ struct ipath_devdata {
 	unsigned long ipath_rcvctrl;
 	/* shadow kr_sendctrl */
 	unsigned long ipath_sendctrl;
+	/* ports waiting for PIOavail intr */
+	unsigned long ipath_portpiowait;
+	unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */
 
 	/* value we put in kr_rcvhdrcnt */
 	u32 ipath_rcvhdrcnt;
@@ -465,8 +495,6 @@ struct ipath_devdata {
 	u32 ipath_htwidth;
 	/* HT speed (200,400,800,1000) from HT config */
 	u32 ipath_htspeed;
-	/* ports waiting for PIOavail intr */
-	unsigned long ipath_portpiowait;
 	/*
 	 * number of sequential ibcstatus change for polling active/quiet
 	 * (i.e., link not coming up).
@@ -510,11 +538,50 @@ struct ipath_devdata {
 	u32 ipath_lli_counter;
 	/* local link integrity errors */
 	u32 ipath_lli_errors;
+	/*
+	 * Above counts only cases where _successive_ LocalLinkIntegrity
+	 * errors were seen in the receive headers of kern-packets.
+	 * Below are the three (monotonically increasing) counters
+	 * maintained via GPIO interrupts on iba6120-rev2.
+	 */
+	u32 ipath_rxfc_unsupvl_errs;
+	u32 ipath_overrun_thresh_errs;
+	u32 ipath_lli_errs;
 
 	/* Link status check work */
-	struct work_struct link_task;
+	struct work_struct link_work;
+
+	/*
+	 * Not all devices managed by a driver instance are the same
+	 * type, so these fields must be per-device.
+	 */
+	u64 ipath_i_bitsextant;
+	ipath_err_t ipath_e_bitsextant;
+	ipath_err_t ipath_hwe_bitsextant;
+
+	/*
+	 * Below should be computable from number of ports,
+	 * since they are never modified.
+	 */
+	u32 ipath_i_rcvavail_mask;
+	u32 ipath_i_rcvurg_mask;
+
+	/*
+	 * Register bits for selecting i2c direction and values, used for
+	 * I2C serial flash.
+	 */
+	u16 ipath_gpio_sda_num;
+	u16 ipath_gpio_scl_num;
+	u64 ipath_gpio_sda;
+	u64 ipath_gpio_scl;
 };
 
+/* Private data for file operations */
+struct ipath_filedata {
+	struct ipath_portdata *pd;
+	unsigned subport;
+	unsigned tidcursor;
+};
 extern struct list_head ipath_dev_list;
 extern spinlock_t ipath_devs_lock;
 extern struct ipath_devdata *ipath_lookup(int unit);
@@ -524,6 +591,7 @@ int ipath_enable_wc(struct ipath_devdata
 void ipath_disable_wc(struct ipath_devdata *dd);
 int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
 void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
 
 struct file_operations;
 int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
@@ -543,8 +611,8 @@ struct sk_buff *ipath_alloc_skb(struct i
 
 extern int ipath_diag_inuse;
 
-irqreturn_t ipath_intr(int irq, void *devid, struct pt_regs *regs);
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+irqreturn_t ipath_intr(int irq, void *devid, struct pt_regs *);
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
 #if __IPATH_INFO || __IPATH_DBG
 extern const char *ipath_ibcstatus_str[];
 #endif
@@ -575,7 +643,11 @@ int ipath_set_lid(struct ipath_devdata *
 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 
 /* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
+#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
+#define subport_fp(fp) \
+	((struct ipath_filedata *)(fp)->private_data)->subport
+#define tidcursor_fp(fp) \
+	((struct ipath_filedata *)(fp)->private_data)->tidcursor
 
 /*
  * values for ipath_flags
@@ -615,12 +687,23 @@ int ipath_set_rx_pol_inv(struct ipath_de
 		/* can miss port0 rx interrupts */
 #define IPATH_POLL_RX_INTR  0x40000
 #define IPATH_DISABLED      0x80000 /* administratively disabled */
+		/* Use GPIO interrupts for new counters */
+#define IPATH_GPIO_ERRINTRS 0x100000
+
+/* Bits in GPIO for the added interrupts */
+#define IPATH_GPIO_PORT0_BIT 2
+#define IPATH_GPIO_RXUVL_BIT 3
+#define IPATH_GPIO_OVRUN_BIT 4
+#define IPATH_GPIO_LLI_BIT 5
+#define IPATH_GPIO_ERRINTR_MASK 0x38
 
 /* portdata flag bit offsets */
 		/* waiting for a packet to arrive */
 #define IPATH_PORT_WAITING_RCV   2
 		/* waiting for a PIO buffer to be available */
 #define IPATH_PORT_WAITING_PIO   3
+		/* master has not finished initializing */
+#define IPATH_PORT_MASTER_UNINIT 4
 
 /* free up any allocated data at closes */
 void ipath_free_data(struct ipath_portdata *dd);
@@ -674,8 +757,6 @@ int ipath_eeprom_write(struct ipath_devd
 /* these are used for the registers that vary with port */
 void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
 			   unsigned, u64);
-u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
-			   unsigned);
 
 /*
  * We could have a single register get/put routine, that takes a group type,
@@ -793,15 +874,19 @@ int ipath_device_create_group(struct dev
 void ipath_device_remove_group(struct device *, struct ipath_devdata *);
 int ipath_expose_reset(struct device *);
 
-void ipath_diagpkt_add(void);
-void ipath_diagpkt_remove(void);
-
 int ipath_init_ipathfs(void);
 void ipath_exit_ipathfs(void);
 int ipathfs_add_device(struct ipath_devdata *);
 int ipathfs_remove_device(struct ipath_devdata *);
 
 /*
+ * dma_addr wrappers - all 0's invalid for hw
+ */
+dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
+			  size_t, int);
+dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
+
+/*
  * Flush write combining store buffers (if present) and perform a write
  * barrier.
  */
@@ -858,4 +943,20 @@ extern struct mutex ipath_mutex;
 
 #endif /* _IPATH_DEBUGGING */
 
+/*
+ * this is used for formatting hw error messages...
+ */
+struct ipath_hwerror_msgs {
+	u64 mask;
+	const char *msg;
+};
+
+#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
+
+/* in ipath_intr.c... */
+void ipath_format_hwerrors(u64 hwerrs,
+			   const struct ipath_hwerror_msgs *hwerrmsgs,
+			   size_t nhwerrmsgs,
+			   char *msg, size_t lmsg);
+
 #endif				/* _IPATH_KERNEL_H */
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_keys.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_t
 		r = (r + 1) & (rkt->max - 1);
 		if (r == n) {
 			spin_unlock_irqrestore(&rkt->lock, flags);
-			ipath_dbg(KERN_INFO "LKEY table full\n");
+			ipath_dbg("LKEY table full\n");
 			ret = 0;
 			goto bail;
 		}
@@ -118,9 +118,10 @@ void ipath_free_lkey(struct ipath_lkey_t
  * Check the IB SGE for validity and initialize our internal version
  * of it.
  */
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
 		  struct ib_sge *sge, int acc)
 {
+	struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
 	struct ipath_mregion *mr;
 	unsigned n, m;
 	size_t off;
@@ -132,15 +133,22 @@ int ipath_lkey_ok(struct ipath_lkey_tabl
 	 * being reversible by calling bus_to_virt().
 	 */
 	if (sge->lkey == 0) {
+		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+		if (pd->user) {
+			ret = 0;
+			goto bail;
+		}
 		isge->mr = NULL;
-		isge->vaddr = bus_to_virt(sge->addr);
+		isge->vaddr = (void *) sge->addr;
 		isge->length = sge->length;
 		isge->sge_length = sge->length;
 		ret = 1;
 		goto bail;
 	}
 	mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
-	if (unlikely(mr == NULL || mr->lkey != sge->lkey)) {
+	if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
+		     qp->ibqp.pd != mr->pd)) {
 		ret = 0;
 		goto bail;
 	}
@@ -188,9 +196,10 @@ bail:
  *
  * Return 1 if successful, otherwise 0.
  */
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
 		  u32 len, u64 vaddr, u32 rkey, int acc)
 {
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_lkey_table *rkt = &dev->lk_table;
 	struct ipath_sge *sge = &ss->sge;
 	struct ipath_mregion *mr;
@@ -199,12 +208,18 @@ int ipath_rkey_ok(struct ipath_ibdev *de
 	int ret;
 
 	/*
-	 * We use RKEY == zero for physical addresses
-	 * (see ipath_get_dma_mr).
+	 * We use RKEY == zero for kernel virtual addresses
+	 * (see ipath_get_dma_mr and ipath_dma.c).
 	 */
 	if (rkey == 0) {
+		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+		if (pd->user) {
+			ret = 0;
+			goto bail;
+		}
 		sge->mr = NULL;
-		sge->vaddr = phys_to_virt(vaddr);
+		sge->vaddr = (void *) vaddr;
 		sge->length = len;
 		sge->sge_length = len;
 		ss->sg_list = NULL;
@@ -214,7 +229,8 @@ int ipath_rkey_ok(struct ipath_ibdev *de
 	}
 
 	mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
-	if (unlikely(mr == NULL || mr->lkey != rkey)) {
+	if (unlikely(mr == NULL || mr->lkey != rkey ||
+		     qp->ibqp.pd != mr->pd)) {
 		ret = 0;
 		goto bail;
 	}
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_mad.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -87,7 +87,8 @@ static int recv_subn_get_nodeinfo(struct
 	struct ipath_devdata *dd = to_idev(ibdev)->dd;
 	u32 vendor, majrev, minrev;
 
-	if (smp->attr_mod)
+	/* GUID 0 is illegal */
+	if (smp->attr_mod || (dd->ipath_guid == 0))
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	nip->base_version = 1;
@@ -131,10 +132,15 @@ static int recv_subn_get_guidinfo(struct
 	 * We only support one GUID for now.  If this changes, the
 	 * portinfo.guid_cap field needs to be updated too.
 	 */
-	if (startgx == 0)
-		/* The first is a copy of the read-only HW GUID. */
-		*p = to_idev(ibdev)->dd->ipath_guid;
-	else
+	if (startgx == 0) {
+		__be64 g = to_idev(ibdev)->dd->ipath_guid;
+		if (g == 0)
+			/* GUID 0 is illegal */
+			smp->status |= IB_SMP_INVALID_FIELD;
+		else
+			/* The first is a copy of the read-only HW GUID. */
+			*p = g;
+	} else
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	return reply(smp);
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_mmap.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct ipath_mmap_info
+ */
+void ipath_release_mmap_info(struct kref *ref)
+{
+	struct ipath_mmap_info *ip =
+		container_of(ref, struct ipath_mmap_info, ref);
+
+	vfree(ip->obj);
+	kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void ipath_vma_open(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	kref_get(&ip->ref);
+	ip->mmap_cnt++;
+}
+
+static void ipath_vma_close(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	ip->mmap_cnt--;
+	kref_put(&ip->ref, ipath_release_mmap_info);
+}
+
+static struct vm_operations_struct ipath_vm_ops = {
+	.open =     ipath_vma_open,
+	.close =    ipath_vma_close,
+};
+
+/**
+ * ipath_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct ipath_ibdev *dev = to_idev(context->device);
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long size = vma->vm_end - vma->vm_start;
+	struct ipath_mmap_info *ip, **pp;
+	int ret = -EINVAL;
+
+	/*
+	 * Search the device's list of objects waiting for a mmap call.
+	 * Normally, this list is very short since a call to create a
+	 * CQ, QP, or SRQ is soon followed by a call to mmap().
+	 */
+	spin_lock_irq(&dev->pending_lock);
+	for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+		/* Only the creator is allowed to mmap the object */
+		if (context != ip->context || (void *) offset != ip->obj)
+			continue;
+		/* Don't allow a mmap larger than the object. */
+		if (size > ip->size)
+			break;
+
+		*pp = ip->next;
+		spin_unlock_irq(&dev->pending_lock);
+
+		ret = remap_vmalloc_range(vma, ip->obj, 0);
+		if (ret)
+			goto done;
+		vma->vm_ops = &ipath_vm_ops;
+		vma->vm_private_data = ip;
+		ipath_vma_open(vma);
+		goto done;
+	}
+	spin_unlock_irq(&dev->pending_lock);
+done:
+	return ret;
+}
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_mr.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -54,6 +54,8 @@ static inline struct ipath_fmr *to_ifmr(
  * @acc: access flags
  *
  * Returns the memory region on success, otherwise returns an errno.
+ * Note that all DMA addresses should be created via the
+ * struct ib_dma_mapping_ops functions (see ipath_dma.c).
  */
 struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
 {
@@ -138,6 +140,7 @@ struct ib_mr *ipath_reg_phys_mr(struct i
 		goto bail;
 	}
 
+	mr->mr.pd = pd;
 	mr->mr.user_base = *iova_start;
 	mr->mr.iova = *iova_start;
 	mr->mr.length = 0;
@@ -148,8 +151,7 @@ struct ib_mr *ipath_reg_phys_mr(struct i
 	m = 0;
 	n = 0;
 	for (i = 0; i < num_phys_buf; i++) {
-		mr->mr.map[m]->segs[n].vaddr =
-			phys_to_virt(buffer_list[i].addr);
+		mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
 		mr->mr.map[m]->segs[n].length = buffer_list[i].size;
 		mr->mr.length += buffer_list[i].size;
 		n++;
@@ -197,6 +199,7 @@ struct ib_mr *ipath_reg_user_mr(struct i
 		goto bail;
 	}
 
+	mr->mr.pd = pd;
 	mr->mr.user_base = region->user_base;
 	mr->mr.iova = region->virt_base;
 	mr->mr.length = region->length;
@@ -207,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct i
 	m = 0;
 	n = 0;
 	list_for_each_entry(chunk, &region->chunk_list, list) {
-		for (i = 0; i < chunk->nmap; i++) {
-			mr->mr.map[m]->segs[n].vaddr =
-				page_address(chunk->page_list[i].page);
+		for (i = 0; i < chunk->nents; i++) {
+			void *vaddr;
+
+			vaddr = page_address(chunk->page_list[i].page);
+			if (!vaddr) {
+				ret = ERR_PTR(-EINVAL);
+				goto bail;
+			}
+			mr->mr.map[m]->segs[n].vaddr = vaddr;
 			mr->mr.map[m]->segs[n].length = region->page_size;
 			n++;
 			if (n == IPATH_SEGSZ) {
@@ -289,6 +298,7 @@ struct ib_fmr *ipath_alloc_fmr(struct ib
 	 * Resources are allocated but no valid mapping (RKEY can't be
 	 * used).
 	 */
+	fmr->mr.pd = pd;
 	fmr->mr.user_base = 0;
 	fmr->mr.iova = 0;
 	fmr->mr.length = 0;
@@ -344,7 +354,7 @@ int ipath_map_phys_fmr(struct ib_fmr *ib
 	n = 0;
 	ps = 1 << fmr->page_shift;
 	for (i = 0; i < list_len; i++) {
-		fmr->mr.map[m]->segs[n].vaddr = phys_to_virt(page_list[i]);
+		fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
 		fmr->mr.map[m]->segs[n].length = ps;
 		if (++n == IPATH_SEGSZ) {
 			m++;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_qp.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -36,7 +36,6 @@
 
 #include "ipath_verbs.h"
 #include "ipath_kernel.h"
-#include "ipath_common.h"
 
 #define BITS_PER_PAGE		(PAGE_SIZE*BITS_PER_BYTE)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
@@ -45,19 +44,6 @@
 #define find_next_offset(map, off) find_next_zero_bit((map)->page, \
 						      BITS_PER_PAGE, off)
 
-#define TRANS_INVALID	0
-#define TRANS_ANY2RST	1
-#define TRANS_RST2INIT	2
-#define TRANS_INIT2INIT	3
-#define TRANS_INIT2RTR	4
-#define TRANS_RTR2RTS	5
-#define TRANS_RTS2RTS	6
-#define TRANS_SQERR2RTS	7
-#define TRANS_ANY2ERR	8
-#define TRANS_RTS2SQD	9  /* XXX Wait for expected ACKs & signal event */
-#define TRANS_SQD2SQD	10 /* error if not drained & parameter change */
-#define TRANS_SQD2RTS	11 /* error if not drained */
-
 /*
  * Convert the AETH credit code into the number of credits.
  */
@@ -288,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_
 				free_qpn(qpt, qp->ibqp.qp_num);
 			if (!atomic_dec_and_test(&qp->refcount) ||
 			    !ipath_destroy_qp(&qp->ibqp))
-				ipath_dbg(KERN_INFO "QP memory leak!\n");
+				ipath_dbg("QP memory leak!\n");
 			qp = nqp;
 		}
 	}
@@ -334,7 +320,8 @@ static void ipath_reset_qp(struct ipath_
 	qp->remote_qpn = 0;
 	qp->qkey = 0;
 	qp->qp_access_flags = 0;
-	clear_bit(IPATH_S_BUSY, &qp->s_flags);
+	qp->s_busy = 0;
+	qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
 	qp->s_hdrwords = 0;
 	qp->s_psn = 0;
 	qp->r_psn = 0;
@@ -347,8 +334,8 @@ static void ipath_reset_qp(struct ipath_
 		qp->r_state = IB_OPCODE_UC_SEND_LAST;
 	}
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 	qp->r_nak_state = 0;
+	qp->r_wrid_valid = 0;
 	qp->s_rnr_timeout = 0;
 	qp->s_head = 0;
 	qp->s_tail = 0;
@@ -356,25 +343,33 @@ static void ipath_reset_qp(struct ipath_
 	qp->s_last = 0;
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
-	qp->r_rq.head = 0;
-	qp->r_rq.tail = 0;
+	qp->s_wait_credit = 0;
+	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+	qp->r_head_ack_queue = 0;
+	qp->s_tail_ack_queue = 0;
+	qp->s_num_rd_atomic = 0;
+	if (qp->r_rq.wq) {
+		qp->r_rq.wq->head = 0;
+		qp->r_rq.wq->tail = 0;
+	}
 	qp->r_reuse_sge = 0;
 }
 
 /**
  * ipath_error_qp - put a QP into an error state
  * @qp: the QP to put into an error state
+ * @err: the receive completion error to signal if a RWQE is active
  *
  * Flushes both send and receive work queues.
- * QP s_lock should be held and interrupts disabled.
+ * The QP s_lock should be held and interrupts disabled.
  */
 
-void ipath_error_qp(struct ipath_qp *qp)
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 {
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;
 
-	ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+	ipath_dbg("QP%d/%d in error state\n",
 		  qp->ibqp.qp_num, qp->remote_qpn);
 
 	spin_lock(&dev->pending_lock);
@@ -385,11 +380,10 @@ void ipath_error_qp(struct ipath_qp *qp)
 		list_del_init(&qp->piowait);
 	spin_unlock(&dev->pending_lock);
 
-	wc.status = IB_WC_WR_FLUSH_ERR;
 	wc.vendor_err = 0;
 	wc.byte_len = 0;
 	wc.imm_data = 0;
-	wc.qp_num = qp->ibqp.qp_num;
+	wc.qp = &qp->ibqp;
 	wc.src_qp = 0;
 	wc.wc_flags = 0;
 	wc.pkey_index = 0;
@@ -397,6 +391,14 @@ void ipath_error_qp(struct ipath_qp *qp)
 	wc.sl = 0;
 	wc.dlid_path_bits = 0;
 	wc.port_num = 0;
+	if (qp->r_wrid_valid) {
+		qp->r_wrid_valid = 0;
+		wc.wr_id = qp->r_wr_id;
+		wc.opcode = IB_WC_RECV;
+		wc.status = err;
+		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
+	}
+	wc.status = IB_WC_WR_FLUSH_ERR;
 
 	while (qp->s_last != qp->s_head) {
 		struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
@@ -411,15 +413,32 @@ void ipath_error_qp(struct ipath_qp *qp)
 	qp->s_hdrwords = 0;
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 
-	wc.opcode = IB_WC_RECV;
-	spin_lock(&qp->r_rq.lock);
-	while (qp->r_rq.tail != qp->r_rq.head) {
-		wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
-		if (++qp->r_rq.tail >= qp->r_rq.size)
-			qp->r_rq.tail = 0;
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	if (qp->r_rq.wq) {
+		struct ipath_rwq *wq;
+		u32 head;
+		u32 tail;
+
+		spin_lock(&qp->r_rq.lock);
+
+		/* sanity check pointers before trusting them */
+		wq = qp->r_rq.wq;
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
+		wc.opcode = IB_WC_RECV;
+		while (tail != head) {
+			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+			if (++tail >= qp->r_rq.size)
+				tail = 0;
+			ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+		}
+		wq->tail = tail;
+
+		spin_unlock(&qp->r_rq.lock);
 	}
-	spin_unlock(&qp->r_rq.lock);
 }
 
 /**
@@ -427,11 +446,12 @@ void ipath_error_qp(struct ipath_qp *qp)
  * @ibqp: the queue pair who's attributes we're modifying
  * @attr: the new attributes
  * @attr_mask: the mask of attributes to modify
+ * @udata: user data for ipathverbs.so
  *
  * Returns 0 on success, otherwise returns an errno.
  */
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask)
+		    int attr_mask, struct ib_udata *udata)
 {
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	struct ipath_qp *qp = to_iqp(ibqp);
@@ -480,14 +500,18 @@ int ipath_modify_qp(struct ib_qp *ibqp, 
 		if (attr->path_mig_state != IB_MIG_MIGRATED &&
 		    attr->path_mig_state != IB_MIG_REARM)
 			goto inval;
- 
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
+			goto inval;
+
 	switch (new_state) {
 	case IB_QPS_RESET:
 		ipath_reset_qp(qp);
 		break;
 
 	case IB_QPS_ERR:
-		ipath_error_qp(qp);
+		ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 		break;
 
 	default:
@@ -537,11 +561,14 @@ int ipath_modify_qp(struct ib_qp *ibqp, 
 	if (attr_mask & IB_QP_QKEY)
 		qp->qkey = attr->qkey;
 
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		qp->s_max_rd_atomic = attr->max_rd_atomic;
+
 	qp->state = new_state;
 	spin_unlock_irqrestore(&qp->s_lock, flags);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
 
 	ret = 0;
 	goto bail;
@@ -569,7 +596,7 @@ int ipath_query_qp(struct ib_qp *ibqp, s
 	attr->dest_qp_num = qp->remote_qpn;
 	attr->qp_access_flags = qp->qp_access_flags;
 	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->r_rq.size - 1;
+	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
 	attr->cap.max_send_sge = qp->s_max_sge;
 	attr->cap.max_recv_sge = qp->r_rq.max_sge;
 	attr->cap.max_inline_data = 0;
@@ -579,8 +606,8 @@ int ipath_query_qp(struct ib_qp *ibqp, s
 	attr->alt_pkey_index = 0;
 	attr->en_sqd_async_notify = 0;
 	attr->sq_draining = 0;
-	attr->max_rd_atomic = 1;
-	attr->max_dest_rd_atomic = 1;
+	attr->max_rd_atomic = qp->s_max_rd_atomic;
+	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
 	attr->min_rnr_timer = qp->r_min_rnr_timer;
 	attr->port_num = 1;
 	attr->timeout = qp->timeout;
@@ -595,7 +622,7 @@ int ipath_query_qp(struct ib_qp *ibqp, s
 	init_attr->recv_cq = qp->ibqp.recv_cq;
 	init_attr->srq = qp->ibqp.srq;
 	init_attr->cap = attr->cap;
-	if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+	if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
 		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
 	else
 		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -623,13 +650,23 @@ __be32 ipath_compute_aeth(struct ipath_q
 	} else {
 		u32 min, max, x;
 		u32 credits;
-
+		struct ipath_rwq *wq = qp->r_rq.wq;
+		u32 head;
+		u32 tail;
+
+		/* sanity check pointers before trusting them */
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
 		/*
 		 * Compute the number of credits available (RWQEs).
 		 * XXX Not holding the r_rq.lock here so there is a small
 		 * chance that the pair of reads are not atomic.
 		 */
-		credits = qp->r_rq.head - qp->r_rq.tail;
+		credits = head - tail;
 		if ((int)credits < 0)
 			credits += qp->r_rq.size;
 		/*
@@ -706,27 +743,37 @@ struct ib_qp *ipath_create_qp(struct ib_
 	case IB_QPT_UD:
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
-		qp = kmalloc(sizeof(*qp), GFP_KERNEL);
+		sz = sizeof(*qp);
+		if (init_attr->srq) {
+			struct ipath_srq *srq = to_isrq(init_attr->srq);
+
+			sz += sizeof(*qp->r_sg_list) *
+				srq->rq.max_sge;
+		} else
+			sz += sizeof(*qp->r_sg_list) *
+				init_attr->cap.max_recv_sge;
+		qp = kmalloc(sz, GFP_KERNEL);
 		if (!qp) {
-			vfree(swq);
 			ret = ERR_PTR(-ENOMEM);
-			goto bail;
+			goto bail_swq;
 		}
 		if (init_attr->srq) {
+			sz = 0;
 			qp->r_rq.size = 0;
 			qp->r_rq.max_sge = 0;
 			qp->r_rq.wq = NULL;
+			init_attr->cap.max_recv_wr = 0;
+			init_attr->cap.max_recv_sge = 0;
 		} else {
 			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
 			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) +
+			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
 				sizeof(struct ipath_rwqe);
-			qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
+			qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
 			if (!qp->r_rq.wq) {
-				kfree(qp);
-				vfree(swq);
 				ret = ERR_PTR(-ENOMEM);
-				goto bail;
+				goto bail_qp;
 			}
 		}
 
@@ -747,19 +794,17 @@ struct ib_qp *ipath_create_qp(struct ib_
 		qp->s_size = init_attr->cap.max_send_wr + 1;
 		qp->s_max_sge = init_attr->cap.max_send_sge;
 		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+			qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
 		else
 			qp->s_flags = 0;
 		dev = to_idev(ibpd->device);
 		err = ipath_alloc_qpn(&dev->qp_table, qp,
 				      init_attr->qp_type);
 		if (err) {
-			vfree(swq);
-			vfree(qp->r_rq.wq);
-			kfree(qp);
 			ret = ERR_PTR(err);
-			goto bail;
+			goto bail_rwq;
 		}
+		qp->ip = NULL;
 		ipath_reset_qp(qp);
 		break;
 
@@ -771,6 +816,42 @@ struct ib_qp *ipath_create_qp(struct ib_
 
 	init_attr->cap.max_inline_data = 0;
 
+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) qp->r_rq.wq;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_rwq;
+		}
+
+		if (qp->r_rq.wq) {
+			/* Allocate info for ipath_mmap(). */
+			ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+			if (!ip) {
+				ret = ERR_PTR(-ENOMEM);
+				goto bail_rwq;
+			}
+			qp->ip = ip;
+			ip->context = ibpd->uobject->context;
+			ip->obj = qp->r_rq.wq;
+			kref_init(&ip->ref);
+			ip->mmap_cnt = 0;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	}
+
 	spin_lock(&dev->n_qps_lock);
 	if (dev->n_qps_allocated == ib_ipath_max_qps) {
 		spin_unlock(&dev->n_qps_lock);
@@ -785,11 +866,13 @@ struct ib_qp *ipath_create_qp(struct ib_
 	goto bail;
 
 bail_ip:
-	ipath_free_qp(&dev->qp_table, qp);
-	vfree(swq);
+	kfree(qp->ip);
+bail_rwq:
 	vfree(qp->r_rq.wq);
+bail_qp:
 	kfree(qp);
-
+bail_swq:
+	vfree(swq);
 bail:
 	return ret;
 }
@@ -809,11 +892,12 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	unsigned long flags;
 
-	spin_lock_irqsave(&qp->r_rq.lock, flags);
-	spin_lock(&qp->s_lock);
+	spin_lock_irqsave(&qp->s_lock, flags);
 	qp->state = IB_QPS_ERR;
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	spin_lock(&dev->n_qps_lock);
+	dev->n_qps_allocated--;
+	spin_unlock(&dev->n_qps_lock);
 
 	/* Stop the sending tasklet. */
 	tasklet_kill(&qp->s_task);
@@ -834,8 +918,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	if (atomic_read(&qp->refcount) != 0)
 		ipath_free_qp(&dev->qp_table, qp);
 
+	if (qp->ip)
+		kref_put(&qp->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(qp->r_rq.wq);
 	vfree(qp->s_wq);
-	vfree(qp->r_rq.wq);
 	kfree(qp);
 	return 0;
 }
@@ -879,7 +966,7 @@ bail:
  * @wc: the WC responsible for putting the QP in this state
  *
  * Flushes the send work queue.
- * The QP s_lock should be held.
+ * The QP s_lock should be held and interrupts disabled.
  */
 
 void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
@@ -887,7 +974,7 @@ void ipath_sqerror_qp(struct ipath_qp *q
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 
-	ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+	ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
 		  qp->ibqp.qp_num, qp->remote_qpn, wc->status);
 
 	spin_lock(&dev->pending_lock);
@@ -905,12 +992,12 @@ void ipath_sqerror_qp(struct ipath_qp *q
 	wc->status = IB_WC_WR_FLUSH_ERR;
 
 	while (qp->s_last != qp->s_head) {
+		wqe = get_swqe_ptr(qp, qp->s_last);
 		wc->wr_id = wqe->wr.wr_id;
 		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 		ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
 		if (++qp->s_last >= qp->s_size)
 			qp->s_last = 0;
-		wqe = get_swqe_ptr(qp, qp->s_last);
 	}
 	qp->s_cur = qp->s_tail = qp->s_head;
 	qp->state = IB_QPS_SQE;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_rc.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -37,6 +37,19 @@
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
 
+static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
+		       u32 psn, u32 pmtu)
+{
+	u32 len;
+
+	len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
+	ss->sge = wqe->sg_list[0];
+	ss->sg_list = wqe->sg_list + 1;
+	ss->num_sge = wqe->wr.num_sge;
+	ipath_skip_sge(ss, len);
+	return wqe->length - len;
+}
+
 /**
  * ipath_init_restart- initialize the qp->s_sge after a restart
  * @qp: the QP who's SGE we're restarting
@@ -47,15 +60,9 @@
 static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
 {
 	struct ipath_ibdev *dev;
-	u32 len;
 
-	len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) *
-		ib_mtu_enum_to_int(qp->path_mtu);
-	qp->s_sge.sge = wqe->sg_list[0];
-	qp->s_sge.sg_list = wqe->sg_list + 1;
-	qp->s_sge.num_sge = wqe->wr.num_sge;
-	ipath_skip_sge(&qp->s_sge, len);
-	qp->s_len = wqe->length - len;
+	qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
+				ib_mtu_enum_to_int(qp->path_mtu));
 	dev = to_idev(qp->ibqp.device);
 	spin_lock(&dev->pending_lock);
 	if (list_empty(&qp->timerwait))
@@ -70,45 +77,81 @@ static void ipath_init_restart(struct ip
  * @ohdr: a pointer to the IB header being constructed
  * @pmtu: the path MTU
  *
- * Return bth0 if constructed; otherwise, return 0.
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  */
-u32 ipath_make_rc_ack(struct ipath_qp *qp,
-		      struct ipath_other_headers *ohdr,
-		      u32 pmtu)
+static int ipath_make_rc_ack(struct ipath_qp *qp,
+			     struct ipath_other_headers *ohdr,
+			     u32 pmtu, u32 *bth0p, u32 *bth2p)
 {
+	struct ipath_ack_entry *e;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
+	u32 bth2;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 
-	/*
-	 * Send a response.  Note that we are in the responder's
-	 * side of the QP context.
-	 */
 	switch (qp->s_ack_state) {
-	case OP(RDMA_READ_REQUEST):
-		qp->s_cur_sge = &qp->s_rdma_sge;
-		len = qp->s_rdma_len;
-		if (len > pmtu) {
-			len = pmtu;
-			qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-		} else
-			qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
-		qp->s_rdma_len -= len;
+	case OP(RDMA_READ_RESPONSE_LAST):
+	case OP(RDMA_READ_RESPONSE_ONLY):
+	case OP(ATOMIC_ACKNOWLEDGE):
+		/*
+		 * We can increment the tail pointer now that the last
+		 * response has been sent instead of only being
+		 * constructed.
+		 */
+		if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+			qp->s_tail_ack_queue = 0;
+		/* FALLTHROUGH */
+	case OP(SEND_ONLY):
+	case OP(ACKNOWLEDGE):
+		/* Check for no next entry in the queue. */
+		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+			if (qp->s_flags & IPATH_S_ACK_PENDING)
+				goto normal;
+			qp->s_ack_state = OP(ACKNOWLEDGE);
+			goto bail;
+		}
+
+		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+		if (e->opcode == OP(RDMA_READ_REQUEST)) {
+			/* Copy SGE state in case we need to resend */
+			qp->s_ack_rdma_sge = e->rdma_sge;
+			qp->s_cur_sge = &qp->s_ack_rdma_sge;
+			len = e->rdma_sge.sge.sge_length;
+			if (len > pmtu) {
+				len = pmtu;
+				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+			} else
+				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+			ohdr->u.aeth = ipath_compute_aeth(qp);
+			hwords++;
+			qp->s_ack_rdma_psn = e->psn;
+			bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
+		} else {
+			/* COMPARE_SWAP or FETCH_ADD */
+			qp->s_cur_sge = NULL;
+			len = 0;
+			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+			ohdr->u.at.aeth = ipath_compute_aeth(qp);
+			ohdr->u.at.atomic_ack_eth[0] =
+				cpu_to_be32(e->atomic_data >> 32);
+			ohdr->u.at.atomic_ack_eth[1] =
+				cpu_to_be32(e->atomic_data);
+			hwords += sizeof(ohdr->u.at) / sizeof(u32);
+			bth2 = e->psn;
+		}
 		bth0 = qp->s_ack_state << 24;
-		ohdr->u.aeth = ipath_compute_aeth(qp);
-		hwords++;
 		break;
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
 		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
 		/* FALLTHROUGH */
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		qp->s_cur_sge = &qp->s_rdma_sge;
-		len = qp->s_rdma_len;
+		len = qp->s_ack_rdma_sge.sge.sge_length;
 		if (len > pmtu)
 			len = pmtu;
 		else {
@@ -116,61 +159,41 @@ u32 ipath_make_rc_ack(struct ipath_qp *q
 			hwords++;
 			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
 		}
-		qp->s_rdma_len -= len;
 		bth0 = qp->s_ack_state << 24;
-		break;
-
-	case OP(RDMA_READ_RESPONSE_LAST):
-	case OP(RDMA_READ_RESPONSE_ONLY):
-		/*
-		 * We have to prevent new requests from changing
-		 * the r_sge state while a ipath_verbs_send()
-		 * is in progress.
-		 */
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-		bth0 = 0;
-		goto bail;
-
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD):
-		qp->s_cur_sge = NULL;
-		len = 0;
-		/*
-		 * Set the s_ack_state so the receive interrupt handler
-		 * won't try to send an ACK (out of order) until this one
-		 * is actually sent.
-		 */
-		qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-		bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
-		ohdr->u.at.aeth = ipath_compute_aeth(qp);
-		ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
-		hwords += sizeof(ohdr->u.at) / 4;
+		bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
 		break;
 
 	default:
-		/* Send a regular ACK. */
-		qp->s_cur_sge = NULL;
-		len = 0;
+	normal:
 		/*
-		 * Set the s_ack_state so the receive interrupt handler
-		 * won't try to send an ACK (out of order) until this one
-		 * is actually sent.
+		 * Send a regular ACK.
+		 * Set the s_ack_state so we wait until after sending
+		 * the ACK before setting s_ack_state to ACKNOWLEDGE
+		 * (see above).
 		 */
-		qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-		bth0 = OP(ACKNOWLEDGE) << 24;
+		qp->s_ack_state = OP(SEND_ONLY);
+		qp->s_flags &= ~IPATH_S_ACK_PENDING;
+		qp->s_cur_sge = NULL;
 		if (qp->s_nak_state)
-			ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-						    (qp->s_nak_state <<
-						     IPATH_AETH_CREDIT_SHIFT));
+			ohdr->u.aeth =
+				cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+					    (qp->s_nak_state <<
+					     IPATH_AETH_CREDIT_SHIFT));
 		else
 			ohdr->u.aeth = ipath_compute_aeth(qp);
 		hwords++;
+		len = 0;
+		bth0 = OP(ACKNOWLEDGE) << 24;
+		bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
 	}
 	qp->s_hdrwords = hwords;
 	qp->s_cur_size = len;
+	*bth0p = bth0;
+	*bth2p = bth2;
+	return 1;
 
 bail:
-	return bth0;
+	return 0;
 }
 
 /**
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *q
 	u32 bth2;
 	char newreq;
 
+	/* Sending responses has higher priority over sending requests. */
+	if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+	     (qp->s_flags & IPATH_S_ACK_PENDING) ||
+	     qp->s_ack_state != OP(ACKNOWLEDGE)) &&
+	    ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
+		goto done;
+
 	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
 	    qp->s_rnr_timeout)
-		goto done;
+		goto bail;
 
 	/* Limit the number of packets sent without an ACK. */
 	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *q
 			list_add_tail(&qp->timerwait,
 				      &dev->pending[dev->pending_index]);
 		spin_unlock(&dev->pending_lock);
-		goto done;
+		goto bail;
 	}
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *q
 		if (qp->s_cur == qp->s_tail) {
 			/* Check if send work queue is empty. */
 			if (qp->s_tail == qp->s_head)
-				goto done;
+				goto bail;
+			/*
+			 * If a fence is requested, wait for previous
+			 * RDMA read and atomic operations to finish.
+			 */
+			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+			    qp->s_num_rd_atomic) {
+				qp->s_flags |= IPATH_S_FENCE_PENDING;
+				goto bail;
+			}
 			wqe->psn = qp->s_next_psn;
 			newreq = 1;
 		}
@@ -241,10 +280,7 @@ int ipath_make_rc_req(struct ipath_qp *q
 		 * original work request since we may need to resend
 		 * it.
 		 */
-		qp->s_sge.sge = wqe->sg_list[0];
-		qp->s_sge.sg_list = wqe->sg_list + 1;
-		qp->s_sge.num_sge = wqe->wr.num_sge;
-		qp->s_len = len = wqe->length;
+		len = wqe->length;
 		ss = &qp->s_sge;
 		bth2 = 0;
 		switch (wqe->wr.opcode) {
@@ -253,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *q
 			/* If no credit, return. */
 			if (qp->s_lsn != (u32) -1 &&
 			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
-				goto done;
+				goto bail;
 			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
 				wqe->lpsn += (len - 1) / pmtu;
@@ -284,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *q
 			/* If no credit, return. */
 			if (qp->s_lsn != (u32) -1 &&
 			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
-				goto done;
+				goto bail;
 			ohdr->u.rc.reth.vaddr =
 				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
 			ohdr->u.rc.reth.rkey =
 				cpu_to_be32(wqe->wr.wr.rdma.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			hwords += sizeof(struct ib_reth) / 4;
+			hwords += sizeof(struct ib_reth) / sizeof(u32);
 			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
 				wqe->lpsn += (len - 1) / pmtu;
@@ -315,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *q
 			break;
 
 		case IB_WR_RDMA_READ:
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
-			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->wr.wr.rdma.rkey);
-			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			qp->s_state = OP(RDMA_READ_REQUEST);
-			hwords += sizeof(ohdr->u.rc.reth) / 4;
+			/*
+			 * Don't allow more operations to be started
+			 * than the QP limits allow.
+			 */
 			if (newreq) {
+				if (qp->s_num_rd_atomic >=
+				    qp->s_max_rd_atomic) {
+					qp->s_flags |= IPATH_S_RDMAR_PENDING;
+					goto bail;
+				}
+				qp->s_num_rd_atomic++;
 				if (qp->s_lsn != (u32) -1)
 					qp->s_lsn++;
 				/*
@@ -333,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *q
 					qp->s_next_psn += (len - 1) / pmtu;
 				wqe->lpsn = qp->s_next_psn++;
 			}
+			ohdr->u.rc.reth.vaddr =
+				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+			ohdr->u.rc.reth.rkey =
+				cpu_to_be32(wqe->wr.wr.rdma.rkey);
+			ohdr->u.rc.reth.length = cpu_to_be32(len);
+			qp->s_state = OP(RDMA_READ_REQUEST);
+			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 			ss = NULL;
 			len = 0;
 			if (++qp->s_cur == qp->s_size)
@@ -341,41 +387,66 @@ int ipath_make_rc_req(struct ipath_qp *q
 
 		case IB_WR_ATOMIC_CMP_AND_SWP:
 		case IB_WR_ATOMIC_FETCH_AND_ADD:
-			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP)
-				qp->s_state = OP(COMPARE_SWAP);
-			else
-				qp->s_state = OP(FETCH_ADD);
-			ohdr->u.atomic_eth.vaddr = cpu_to_be64(
-				wqe->wr.wr.atomic.remote_addr);
-			ohdr->u.atomic_eth.rkey = cpu_to_be32(
-				wqe->wr.wr.atomic.rkey);
-			ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-				wqe->wr.wr.atomic.swap);
-			ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-				wqe->wr.wr.atomic.compare_add);
-			hwords += sizeof(struct ib_atomic_eth) / 4;
+			/*
+			 * Don't allow more operations to be started
+			 * than the QP limits allow.
+			 */
 			if (newreq) {
+				if (qp->s_num_rd_atomic >=
+				    qp->s_max_rd_atomic) {
+					qp->s_flags |= IPATH_S_RDMAR_PENDING;
+					goto bail;
+				}
+				qp->s_num_rd_atomic++;
 				if (qp->s_lsn != (u32) -1)
 					qp->s_lsn++;
 				wqe->lpsn = wqe->psn;
 			}
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
+			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+				qp->s_state = OP(COMPARE_SWAP);
+				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+					wqe->wr.wr.atomic.swap);
+				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+					wqe->wr.wr.atomic.compare_add);
+			} else {
+				qp->s_state = OP(FETCH_ADD);
+				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+					wqe->wr.wr.atomic.compare_add);
+				ohdr->u.atomic_eth.compare_data = 0;
+			}
+			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+				wqe->wr.wr.atomic.remote_addr >> 32);
+			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+				wqe->wr.wr.atomic.remote_addr);
+			ohdr->u.atomic_eth.rkey = cpu_to_be32(
+				wqe->wr.wr.atomic.rkey);
+			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
 			ss = NULL;
 			len = 0;
+			if (++qp->s_cur == qp->s_size)
+				qp->s_cur = 0;
 			break;
 
 		default:
-			goto done;
+			goto bail;
 		}
+		qp->s_sge.sge = wqe->sg_list[0];
+		qp->s_sge.sg_list = wqe->sg_list + 1;
+		qp->s_sge.num_sge = wqe->wr.num_sge;
+		qp->s_len = wqe->length;
 		if (newreq) {
 			qp->s_tail++;
 			if (qp->s_tail >= qp->s_size)
 				qp->s_tail = 0;
 		}
-		bth2 |= qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
+		bth2 |= qp->s_psn & IPATH_PSN_MASK;
+		if (wqe->wr.opcode == IB_WR_RDMA_READ)
+			qp->s_psn = wqe->lpsn + 1;
+		else {
+			qp->s_psn++;
+			if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+				qp->s_next_psn = qp->s_psn;
+		}
 		/*
 		 * Put the QP on the pending list so lost ACKs will cause
 		 * a retry.  More than one request can be pending so the
@@ -400,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *q
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
@@ -436,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *q
 		/* FALLTHROUGH */
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
@@ -473,9 +544,9 @@ int ipath_make_rc_req(struct ipath_qp *q
 			cpu_to_be32(wqe->wr.wr.rdma.rkey);
 		ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
 		qp->s_state = OP(RDMA_READ_REQUEST);
-		hwords += sizeof(ohdr->u.rc.reth) / 4;
+		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
 		ss = NULL;
 		len = 0;
@@ -483,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *q
 		if (qp->s_cur == qp->s_size)
 			qp->s_cur = 0;
 		break;
-
-	case OP(RDMA_READ_REQUEST):
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD):
-		/*
-		 * We shouldn't start anything new until this request is
-		 * finished.  The ACK will handle rescheduling us.  XXX The
-		 * number of outstanding ones is negotiated at connection
-		 * setup time (see pg. 258,289)?  XXX Also, if we support
-		 * multiple outstanding requests, we need to check the WQE
-		 * IB_SEND_FENCE flag and not send a new request if a RDMA
-		 * read or atomic is pending.
-		 */
-		goto done;
 	}
 	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
 		bth2 |= 1 << 31;	/* Request ACK. */
@@ -506,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *q
 	qp->s_cur_size = len;
 	*bth0p = bth0 | (qp->s_state << 24);
 	*bth2p = bth2;
+done:
 	return 1;
 
-done:
+bail:
 	return 0;
 }
 
@@ -518,7 +576,8 @@ done:
  *
  * This is called from ipath_rc_rcv() and only uses the receive
  * side QP state.
- * Note that RDMA reads are handled in the send side QP state and tasklet.
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
  */
 static void send_rc_ack(struct ipath_qp *qp)
 {
@@ -529,6 +588,12 @@ static void send_rc_ack(struct ipath_qp 
 	struct ipath_ib_header hdr;
 	struct ipath_other_headers *ohdr;
 
+	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+	if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+	    (qp->s_flags & IPATH_S_ACK_PENDING) ||
+	    qp->s_ack_state != OP(ACKNOWLEDGE))
+		goto queue_ack;
+
 	/* Construct the header. */
 	ohdr = &hdr.u.oth;
 	lrh0 = IPATH_LRH_BTH;
@@ -542,19 +607,14 @@ static void send_rc_ack(struct ipath_qp 
 		lrh0 = IPATH_LRH_GRH;
 	}
 	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
+	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
+		OP(ACKNOWLEDGE) << 24;
 	if (qp->r_nak_state)
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
 					    (qp->r_nak_state <<
 					     IPATH_AETH_CREDIT_SHIFT));
 	else
 		ohdr->u.aeth = ipath_compute_aeth(qp);
-	if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
-		bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
-		ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
-		hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
-	} else
-		bth0 |= OP(ACKNOWLEDGE) << 24;
 	lrh0 |= qp->remote_ah_attr.sl << 4;
 	hdr.lrh[0] = cpu_to_be16(lrh0);
 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
@@ -568,31 +628,31 @@ static void send_rc_ack(struct ipath_qp 
 	 * If we can send the ACK, clear the ACK state.
 	 */
 	if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
-		qp->r_ack_state = OP(ACKNOWLEDGE);
 		dev->n_unicast_xmit++;
-	} else {
-		/*
-		 * We are out of PIO buffers at the moment.
-		 * Pass responsibility for sending the ACK to the
-		 * send tasklet so that when a PIO buffer becomes
-		 * available, the ACK is sent ahead of other outgoing
-		 * packets.
-		 */
-		dev->n_rc_qacks++;
-		spin_lock_irq(&qp->s_lock);
-		/* Don't coalesce if a RDMA read or atomic is pending. */
-		if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
-		    qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
-			qp->s_ack_state = qp->r_ack_state;
-			qp->s_nak_state = qp->r_nak_state;
-			qp->s_ack_psn = qp->r_ack_psn;
-			qp->r_ack_state = OP(ACKNOWLEDGE);
-		}
-		spin_unlock_irq(&qp->s_lock);
-
-		/* Call ipath_do_rc_send() in another thread. */
-		tasklet_hi_schedule(&qp->s_task);
+		goto done;
 	}
+
+	/*
+	 * We are out of PIO buffers at the moment.
+	 * Pass responsibility for sending the ACK to the
+	 * send tasklet so that when a PIO buffer becomes
+	 * available, the ACK is sent ahead of other outgoing
+	 * packets.
+	 */
+	dev->n_rc_qacks++;
+
+queue_ack:
+	spin_lock_irq(&qp->s_lock);
+	qp->s_flags |= IPATH_S_ACK_PENDING;
+	qp->s_nak_state = qp->r_nak_state;
+	qp->s_ack_psn = qp->r_ack_psn;
+	spin_unlock_irq(&qp->s_lock);
+
+	/* Call ipath_do_rc_send() in another thread. */
+	tasklet_hi_schedule(&qp->s_task);
+
+done:
+	return;
 }
 
 /**
@@ -690,20 +750,13 @@ void ipath_restart_rc(struct ipath_qp *q
 	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 	struct ipath_ibdev *dev;
 
-	/*
-	 * If there are no requests pending, we are done.
-	 */
-	if (ipath_cmp24(psn, qp->s_next_psn) >= 0 ||
-	    qp->s_last == qp->s_tail)
-		goto done;
-
 	if (qp->s_retry == 0) {
 		wc->wr_id = wqe->wr.wr_id;
 		wc->status = IB_WC_RETRY_EXC_ERR;
 		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 		wc->vendor_err = 0;
 		wc->byte_len = 0;
-		wc->qp_num = qp->ibqp.qp_num;
+		wc->qp = &qp->ibqp;
 		wc->src_qp = qp->remote_qpn;
 		wc->pkey_index = 0;
 		wc->slid = qp->remote_ah_attr.dlid;
@@ -728,11 +781,9 @@ void ipath_restart_rc(struct ipath_qp *q
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
 		dev->n_rc_resends++;
 	else
-		dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+		dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
 
 	reset_psn(qp, psn);
-
-done:
 	tasklet_hi_schedule(&qp->s_task);
 
 bail:
@@ -765,6 +816,7 @@ static int do_rc_ack(struct ipath_qp *qp
 	struct ib_wc wc;
 	struct ipath_swqe *wqe;
 	int ret = 0;
+	u32 ack_psn;
 
 	/*
 	 * Remove the QP from the timeout queue (or RNR timeout queue).
@@ -783,20 +835,16 @@ static int do_rc_ack(struct ipath_qp *qp
 	 * before the NAK'ed request.  The MSN won't include the NAK'ed
 	 * request but will include an ACK'ed request(s).
 	 */
+	ack_psn = psn;
+	if (aeth >> 29)
+		ack_psn--;
 	wqe = get_swqe_ptr(qp, qp->s_last);
 
-	/* Nothing is pending to ACK/NAK. */
-	if (qp->s_last == qp->s_tail)
-		goto bail;
-
 	/*
 	 * The MSN might be for a later WQE than the PSN indicates so
 	 * only complete WQEs that the PSN finishes.
 	 */
-	while (ipath_cmp24(psn, wqe->lpsn) >= 0) {
-		/* If we are ACKing a WQE, the MSN should be >= the SSN. */
-		if (ipath_cmp24(aeth, wqe->ssn) < 0)
-			break;
+	while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) {
 		/*
 		 * If this request is a RDMA read or atomic, and the ACK is
 		 * for a later operation, this ACK NAKs the RDMA read or
@@ -807,7 +855,8 @@ static int do_rc_ack(struct ipath_qp *qp
 		 * is sent but before the response is received.
 		 */
 		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
-		     opcode != OP(RDMA_READ_RESPONSE_LAST)) ||
+		     (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
+		      ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
 		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
@@ -825,16 +874,33 @@ static int do_rc_ack(struct ipath_qp *qp
 			 */
 			goto bail;
 		}
+		if (qp->s_num_rd_atomic &&
+		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
+		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+			qp->s_num_rd_atomic--;
+			/* Restart sending task if fence is complete */
+			if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+			    !qp->s_num_rd_atomic) {
+				qp->s_flags &= ~IPATH_S_FENCE_PENDING;
+				tasklet_hi_schedule(&qp->s_task);
+			} else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
+				qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
+				tasklet_hi_schedule(&qp->s_task);
+			}
+		}
 		/* Post a send completion queue entry if requested. */
-		if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+		if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			wc.wr_id = wqe->wr.wr_id;
 			wc.status = IB_WC_SUCCESS;
 			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 			wc.vendor_err = 0;
 			wc.byte_len = wqe->length;
-			wc.qp_num = qp->ibqp.qp_num;
+			wc.imm_data = 0;
+			wc.qp = &qp->ibqp;
 			wc.src_qp = qp->remote_qpn;
+			wc.wc_flags = 0;
 			wc.pkey_index = 0;
 			wc.slid = qp->remote_ah_attr.dlid;
 			wc.sl = qp->remote_ah_attr.sl;
@@ -851,15 +917,19 @@ static int do_rc_ack(struct ipath_qp *qp
 		if (qp->s_last == qp->s_cur) {
 			if (++qp->s_cur >= qp->s_size)
 				qp->s_cur = 0;
+			qp->s_last = qp->s_cur;
+			if (qp->s_last == qp->s_tail)
+				break;
 			wqe = get_swqe_ptr(qp, qp->s_cur);
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = wqe->psn;
+		} else {
+			if (++qp->s_last >= qp->s_size)
+				qp->s_last = 0;
+			if (qp->s_last == qp->s_tail)
+				break;
+			wqe = get_swqe_ptr(qp, qp->s_last);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		if (qp->s_last == qp->s_tail)
-			break;
 	}
 
 	switch (aeth >> 29) {
@@ -871,6 +941,18 @@ static int do_rc_ack(struct ipath_qp *qp
 			list_add_tail(&qp->timerwait,
 				      &dev->pending[dev->pending_index]);
 			spin_unlock(&dev->pending_lock);
+			/*
+			 * If we get a partial ACK for a resent operation,
+			 * we can stop resending the earlier packets and
+			 * continue with the next packet the receiver wants.
+			 */
+			if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+				reset_psn(qp, psn + 1);
+				tasklet_hi_schedule(&qp->s_task);
+			}
+		} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+			qp->s_state = OP(SEND_LAST);
+			qp->s_psn = psn + 1;
 		}
 		ipath_get_credit(qp, aeth);
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
@@ -881,22 +963,23 @@ static int do_rc_ack(struct ipath_qp *qp
 
 	case 1:		/* RNR NAK */
 		dev->n_rnr_naks++;
+		if (qp->s_last == qp->s_tail)
+			goto bail;
 		if (qp->s_rnr_retry == 0) {
-			if (qp->s_last == qp->s_tail)
-				goto bail;
-
 			wc.status = IB_WC_RNR_RETRY_EXC_ERR;
 			goto class_b;
 		}
 		if (qp->s_rnr_retry_cnt < 7)
 			qp->s_rnr_retry--;
-		if (qp->s_last == qp->s_tail)
-			goto bail;
 
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 
-		dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+		if (wqe->wr.opcode == IB_WR_RDMA_READ)
+			dev->n_rc_resends++;
+		else
+			dev->n_rc_resends +=
+				(qp->s_psn - psn) & IPATH_PSN_MASK;
 
 		reset_psn(qp, psn);
 
@@ -907,26 +990,20 @@ static int do_rc_ack(struct ipath_qp *qp
 		goto bail;
 
 	case 3:		/* NAK */
-		/* The last valid PSN seen is the previous request's. */
-		if (qp->s_last != qp->s_tail)
-			update_last_psn(qp, wqe->psn - 1);
+		if (qp->s_last == qp->s_tail)
+			goto bail;
+		/* The last valid PSN is the previous PSN. */
+		update_last_psn(qp, psn - 1);
 		switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
 			IPATH_AETH_CREDIT_MASK) {
 		case 0:	/* PSN sequence error */
 			dev->n_seq_naks++;
 			/*
-			 * Back up to the responder's expected PSN.  XXX
+			 * Back up to the responder's expected PSN.
 			 * Note that we might get a NAK in the middle of an
 			 * RDMA READ response which terminates the RDMA
 			 * READ.
 			 */
-			if (qp->s_last == qp->s_tail)
-				break;
-
-			if (ipath_cmp24(psn, wqe->psn) < 0)
-				break;
-
-			/* Retry the request. */
 			ipath_restart_rc(qp, psn, &wc);
 			break;
 
@@ -948,7 +1025,7 @@ static int do_rc_ack(struct ipath_qp *qp
 			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 			wc.vendor_err = 0;
 			wc.byte_len = 0;
-			wc.qp_num = qp->ibqp.qp_num;
+			wc.qp = &qp->ibqp;
 			wc.src_qp = qp->remote_qpn;
 			wc.pkey_index = 0;
 			wc.slid = qp->remote_ah_attr.dlid;
@@ -1000,6 +1077,7 @@ static inline void ipath_rc_rcv_resp(str
 				     u32 psn, u32 hdrsize, u32 pmtu,
 				     int header_in_data)
 {
+	struct ipath_swqe *wqe;
 	unsigned long flags;
 	struct ib_wc wc;
 	int diff;
@@ -1029,6 +1107,10 @@ static inline void ipath_rc_rcv_resp(str
 		goto ack_done;
 	}
 
+	if (unlikely(qp->s_last == qp->s_tail))
+		goto ack_done;
+	wqe = get_swqe_ptr(qp, qp->s_last);
+
 	switch (opcode) {
 	case OP(ACKNOWLEDGE):
 	case OP(ATOMIC_ACKNOWLEDGE):
@@ -1039,17 +1121,32 @@ static inline void ipath_rc_rcv_resp(str
 			aeth = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
 		}
-		if (opcode == OP(ATOMIC_ACKNOWLEDGE))
-			*(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data;
+		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+			u64 val;
+
+			if (!header_in_data) {
+				__be32 *p = ohdr->u.at.atomic_ack_eth;
+
+				val = ((u64) be32_to_cpu(p[0]) << 32) |
+					be32_to_cpu(p[1]);
+			} else
+				val = be64_to_cpu(((__be64 *) data)[0]);
+			*(u64 *) wqe->sg_list[0].vaddr = val;
+		}
 		if (!do_rc_ack(qp, aeth, psn, opcode) ||
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
 		hdrsize += 4;
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
 		/*
-		 * do_rc_ack() has already checked the PSN so skip
-		 * the sequence check.
-		 */
-		goto rdma_read;
+		 * If this is a response to a resent RDMA read, we
+		 * have to be careful to copy the data to the right
+		 * location.
+		 */
+		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+						  wqe, psn, pmtu);
+		goto read_middle;
 
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
 		/* no AETH, no ACK */
@@ -1058,18 +1155,15 @@ static inline void ipath_rc_rcv_resp(str
 			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
 			goto ack_done;
 		}
-	rdma_read:
-		if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
-			goto ack_done;
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
+	read_middle:
 		if (unlikely(tlen != (hdrsize + pmtu + 4)))
-			goto ack_done;
-		if (unlikely(pmtu >= qp->s_len))
-			goto ack_done;
+			goto ack_len_err;
+		if (unlikely(pmtu >= qp->s_rdma_read_len))
+			goto ack_len_err;
+
 		/* We got a response so update the timeout. */
-		if (unlikely(qp->s_last == qp->s_tail ||
-			     get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
-			     IB_WR_RDMA_READ))
-			goto ack_done;
 		spin_lock(&dev->pending_lock);
 		if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
 			list_move_tail(&qp->timerwait,
@@ -1078,66 +1172,97 @@ static inline void ipath_rc_rcv_resp(str
 		/*
 		 * Update the RDMA receive state but do the copy w/o
 		 * holding the locks and blocking interrupts.
-		 * XXX Yet another place that affects relaxed RDMA order
-		 * since we don't want s_sge modified.
 		 */
-		qp->s_len -= pmtu;
+		qp->s_rdma_read_len -= pmtu;
 		update_last_psn(qp, psn);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
-		ipath_copy_sge(&qp->s_sge, data, pmtu);
+		ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
 		goto bail;
 
-	case OP(RDMA_READ_RESPONSE_LAST):
-		/* ACKs READ req. */
+	case OP(RDMA_READ_RESPONSE_ONLY):
 		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
 			dev->n_rdma_seq++;
 			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
 			goto ack_done;
 		}
-		/* FALLTHROUGH */
-	case OP(RDMA_READ_RESPONSE_ONLY):
-		if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
-			goto ack_done;
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
+		/* Get the number of bytes the message was padded by. */
+		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
 		/*
-		 * Get the number of bytes the message was padded by.
+		 * Check that the data size is >= 0 && <= pmtu.
+		 * Remember to account for the AETH header (4) and
+		 * ICRC (4).
 		 */
+		if (unlikely(tlen < (hdrsize + pad + 8)))
+			goto ack_len_err;
+		/*
+		 * If this is a response to a resent RDMA read, we
+		 * have to be careful to copy the data to the right
+		 * location.
+		 */
+		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+						  wqe, psn, pmtu);
+		goto read_last;
+
+	case OP(RDMA_READ_RESPONSE_LAST):
+		/* ACKs READ req. */
+		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
+			dev->n_rdma_seq++;
+			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			goto ack_done;
+		}
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
+		/* Get the number of bytes the message was padded by. */
 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
 		/*
 		 * Check that the data size is >= 1 && <= pmtu.
 		 * Remember to account for the AETH header (4) and
 		 * ICRC (4).
 		 */
-		if (unlikely(tlen <= (hdrsize + pad + 8))) {
-			/* XXX Need to generate an error CQ entry. */
-			goto ack_done;
-		}
+		if (unlikely(tlen <= (hdrsize + pad + 8)))
+			goto ack_len_err;
+	read_last:
 		tlen -= hdrsize + pad + 8;
-		if (unlikely(tlen != qp->s_len)) {
-			/* XXX Need to generate an error CQ entry. */
-			goto ack_done;
-		}
+		if (unlikely(tlen != qp->s_rdma_read_len))
+			goto ack_len_err;
 		if (!header_in_data)
 			aeth = be32_to_cpu(ohdr->u.aeth);
 		else {
 			aeth = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
 		}
-		ipath_copy_sge(&qp->s_sge, data, tlen);
-		if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) {
-			/*
-			 * Change the state so we contimue
-			 * processing new requests and wake up the
-			 * tasklet if there are posted sends.
-			 */
-			qp->s_state = OP(SEND_LAST);
-			if (qp->s_tail != qp->s_head)
-				tasklet_hi_schedule(&qp->s_task);
-		}
+		ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
+		(void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
 		goto ack_done;
 	}
 
 ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
+	goto bail;
+
+ack_op_err:
+	wc.status = IB_WC_LOC_QP_OP_ERR;
+	goto ack_err;
+
+ack_len_err:
+	wc.status = IB_WC_LOC_LEN_ERR;
+ack_err:
+	wc.wr_id = wqe->wr.wr_id;
+	wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp = &qp->ibqp;
+	wc.src_qp = qp->remote_qpn;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = qp->remote_ah_attr.dlid;
+	wc.sl = qp->remote_ah_attr.sl;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	ipath_sqerror_qp(qp, &wc);
 bail:
 	return;
 }
@@ -1157,7 +1282,7 @@ bail:
  * incoming RC packet for the given QP.
  * Called at interrupt level.
  * Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent and the s_lock unlocked.
+ * schedule a response to be sent.
  */
 static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 				     struct ipath_other_headers *ohdr,
@@ -1168,25 +1293,23 @@ static inline int ipath_rc_rcv_error(str
 				     int diff,
 				     int header_in_data)
 {
-	struct ib_reth *reth;
+	struct ipath_ack_entry *e;
+	u8 i, prev;
+	int old_req;
 
 	if (diff > 0) {
 		/*
 		 * Packet sequence error.
 		 * A NAK will ACK earlier sends and RDMA writes.
-		 * Don't queue the NAK if a RDMA read, atomic, or
-		 * NAK is pending though.
+		 * Don't queue the NAK if we already sent one.
 		 */
-		if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
-		    qp->r_nak_state != 0)
-			goto done;
-		if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-			qp->r_ack_state = OP(SEND_ONLY);
+		if (!qp->r_nak_state) {
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
+			goto send_ack;
 		}
-		goto send_ack;
+		goto done;
 	}
 
 	/*
@@ -1199,8 +1322,46 @@ static inline int ipath_rc_rcv_error(str
 	 * can coalesce an outstanding duplicate ACK.  We have to
 	 * send the earliest so that RDMA reads can be restarted at
 	 * the requester's expected PSN.
+	 *
+	 * First, find where this duplicate PSN falls within the
+	 * ACKs previously sent.
 	 */
-	if (opcode == OP(RDMA_READ_REQUEST)) {
+	psn &= IPATH_PSN_MASK;
+	e = NULL;
+	old_req = 1;
+	spin_lock_irq(&qp->s_lock);
+	for (i = qp->r_head_ack_queue; ; i = prev) {
+		if (i == qp->s_tail_ack_queue)
+			old_req = 0;
+		if (i)
+			prev = i - 1;
+		else
+			prev = IPATH_MAX_RDMA_ATOMIC;
+		if (prev == qp->r_head_ack_queue) {
+			e = NULL;
+			break;
+		}
+		e = &qp->s_ack_queue[prev];
+		if (!e->opcode) {
+			e = NULL;
+			break;
+		}
+		if (ipath_cmp24(psn, e->psn) >= 0)
+			break;
+	}
+	switch (opcode) {
+	case OP(RDMA_READ_REQUEST): {
+		struct ib_reth *reth;
+		u32 offset;
+		u32 len;
+
+		/*
+		 * If we didn't find the RDMA read request in the ack queue,
+		 * or the send tasklet is already backed up to send an
+		 * earlier entry, we can ignore this request.
+		 */
+		if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
+			goto unlock_done;
 		/* RETH comes after BTH */
 		if (!header_in_data)
 			reth = &ohdr->u.rc.reth;
@@ -1209,88 +1370,95 @@ static inline int ipath_rc_rcv_error(str
 			data += sizeof(*reth);
 		}
 		/*
-		 * If we receive a duplicate RDMA request, it means the
-		 * requester saw a sequence error and needs to restart
-		 * from an earlier point.  We can abort the current
-		 * RDMA read send in that case.
-		 */
-		spin_lock_irq(&qp->s_lock);
-		if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
-		    (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
-			/*
-			 * We are already sending earlier requested data.
-			 * Don't abort it to send later out of sequence data.
-			 */
-			spin_unlock_irq(&qp->s_lock);
-			goto done;
-		}
-		qp->s_rdma_len = be32_to_cpu(reth->length);
-		if (qp->s_rdma_len != 0) {
+		 * Address range must be a subset of the original
+		 * request and start on pmtu boundaries.
+		 * We reuse the old ack_queue slot since the requester
+		 * should not back up and request an earlier PSN for the
+		 * same request.
+		 */
+		offset = ((psn - e->psn) & IPATH_PSN_MASK) *
+			ib_mtu_enum_to_int(qp->path_mtu);
+		len = be32_to_cpu(reth->length);
+		if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
+			goto unlock_done;
+		if (len != 0) {
 			u32 rkey = be32_to_cpu(reth->rkey);
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
-			/*
-			 * Address range must be a subset of the original
-			 * request and start on pmtu boundaries.
-			 */
-			ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
-					   qp->s_rdma_len, vaddr, rkey,
+			ok = ipath_rkey_ok(qp, &e->rdma_sge,
+					   len, vaddr, rkey,
 					   IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok)) {
-				spin_unlock_irq(&qp->s_lock);
-				goto done;
-			}
+			if (unlikely(!ok))
+				goto unlock_done;
 		} else {
-			qp->s_rdma_sge.sg_list = NULL;
-			qp->s_rdma_sge.num_sge = 0;
-			qp->s_rdma_sge.sge.mr = NULL;
-			qp->s_rdma_sge.sge.vaddr = NULL;
-			qp->s_rdma_sge.sge.length = 0;
-			qp->s_rdma_sge.sge.sge_length = 0;
-		}
-		qp->s_ack_state = opcode;
-		qp->s_ack_psn = psn;
-		spin_unlock_irq(&qp->s_lock);
-		tasklet_hi_schedule(&qp->s_task);
-		goto send_ack;
+			e->rdma_sge.sg_list = NULL;
+			e->rdma_sge.num_sge = 0;
+			e->rdma_sge.sge.mr = NULL;
+			e->rdma_sge.sge.vaddr = NULL;
+			e->rdma_sge.sge.length = 0;
+			e->rdma_sge.sge.sge_length = 0;
+		}
+		e->psn = psn;
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		qp->s_tail_ack_queue = prev;
+		break;
 	}
 
-	/*
-	 * A pending RDMA read will ACK anything before it so
-	 * ignore earlier duplicate requests.
-	 */
-	if (qp->s_ack_state != OP(ACKNOWLEDGE))
-		goto done;
-
-	/*
-	 * If an ACK is pending, don't replace the pending ACK
-	 * with an earlier one since the later one will ACK the earlier.
-	 * Also, if we already have a pending atomic, send it.
-	 */
-	if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
-	    (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
-	     qp->r_ack_state >= OP(COMPARE_SWAP)))
-		goto send_ack;
-	switch (opcode) {
 	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD):
+	case OP(FETCH_ADD): {
 		/*
-		 * Check for the PSN of the last atomic operation
-		 * performed and resend the result if found.
+		 * If we didn't find the atomic request in the ack queue
+		 * or the send tasklet is already backed up to send an
+		 * earlier entry, we can ignore this request.
 		 */
-		if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn)
-			goto done;
+		if (!e || e->opcode != (u8) opcode || old_req)
+			goto unlock_done;
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		qp->s_tail_ack_queue = prev;
+		break;
+	}
+
+	default:
+		if (old_req)
+			goto unlock_done;
+		/*
+		 * Resend the most recent ACK if this request is
+		 * after all the previous RDMA reads and atomics.
+		 */
+		if (i == qp->r_head_ack_queue) {
+			spin_unlock_irq(&qp->s_lock);
+			qp->r_nak_state = 0;
+			qp->r_ack_psn = qp->r_psn - 1;
+			goto send_ack;
+		}
+		/*
+		 * Resend the RDMA read or atomic op which
+		 * ACKs this duplicate request.
+		 */
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		qp->s_tail_ack_queue = i;
 		break;
 	}
-	qp->r_ack_state = opcode;
 	qp->r_nak_state = 0;
-	qp->r_ack_psn = psn;
-send_ack:
-	return 0;
+	spin_unlock_irq(&qp->s_lock);
+	tasklet_hi_schedule(&qp->s_task);
 
+unlock_done:
+	spin_unlock_irq(&qp->s_lock);
 done:
 	return 1;
+
+send_ack:
+	return 0;
+}
+
+static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+{
+	spin_lock_irq(&qp->s_lock);
+	qp->state = IB_QPS_ERR;
+	ipath_error_qp(qp, err);
+	spin_unlock_irq(&qp->s_lock);
 }
 
 /**
@@ -1320,6 +1488,10 @@ void ipath_rc_rcv(struct ipath_ibdev *de
 	struct ib_reth *reth;
 	int header_in_data;
 
+	/* Validate the SLID. See Ch. 9.6.1.5 */
+	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+		goto done;
+
 	/* Check for GRH */
 	if (!has_grh) {
 		ohdr = &hdr->u.oth;
@@ -1374,16 +1546,7 @@ void ipath_rc_rcv(struct ipath_ibdev *de
 		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
 			break;
 	nack_inv:
-		/*
-		 * A NAK will ACK earlier sends and RDMA writes.
-		 * Don't queue the NAK if a RDMA read, atomic, or NAK
-		 * is pending though.
-		 */
-		if (qp->r_ack_state >= OP(COMPARE_SWAP))
-			goto send_ack;
-		/* XXX Flush WQEs */
-		qp->state = IB_QPS_ERR;
-		qp->r_ack_state = OP(SEND_ONLY);
+		ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
 		qp->r_nak_state = IB_NAK_INVALID_REQUEST;
 		qp->r_ack_psn = qp->r_psn;
 		goto send_ack;
@@ -1425,9 +1588,8 @@ void ipath_rc_rcv(struct ipath_ibdev *de
 			 * Don't queue the NAK if a RDMA read or atomic
 			 * is pending though.
 			 */
-			if (qp->r_ack_state >= OP(COMPARE_SWAP))
-				goto send_ack;
-			qp->r_ack_state = OP(SEND_ONLY);
+			if (qp->r_nak_state)
+				goto done;
 			qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
 			qp->r_ack_psn = qp->r_psn;
 			goto send_ack;
@@ -1488,14 +1650,14 @@ void ipath_rc_rcv(struct ipath_ibdev *de
 			goto nack_inv;
 		ipath_copy_sge(&qp->r_sge, data, tlen);
 		qp->r_msn++;
-		if (opcode == OP(RDMA_WRITE_LAST) ||
-		    opcode == OP(RDMA_WRITE_ONLY))
+		if (!qp->r_wrid_valid)
 			break;
+		qp->r_wrid_valid = 0;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
 		wc.opcode = IB_WC_RECV;
 		wc.vendor_err = 0;
-		wc.qp_num = qp->ibqp.qp_num;
+		wc.qp = &qp->ibqp;
 		wc.src_qp = qp->remote_qpn;
 		wc.pkey_index = 0;
 		wc.slid = qp->remote_ah_attr.dlid;
@@ -1528,7 +1690,7 @@ void ipath_rc_rcv(struct ipath_ibdev *de
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(dev, &qp->r_sge,
+			ok = ipath_rkey_ok(qp, &qp->r_sge,
 					   qp->r_len, vaddr, rkey,
 					   IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
@@ -1551,7 +1713,19 @@ void ipath_rc_rcv(struct ipath_ibdev *de
 			goto rnr_nak;
 		goto send_last_imm;
 
-	case OP(RDMA_READ_REQUEST):
+	case OP(RDMA_READ_REQUEST): {
+		struct ipath_ack_entry *e;
+		u32 len;
+		u8 next;
+
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+			goto nack_acc;
+		next = qp->r_head_ack_queue + 1;
+		if (next > IPATH_MAX_RDMA_ATOMIC)
+			next = 0;
+		if (unlikely(next == qp->s_tail_ack_queue))
+			goto nack_inv;
+		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		/* RETH comes after BTH */
 		if (!header_in_data)
 			reth = &ohdr->u.rc.reth;
@@ -1559,138 +1733,127 @@ void ipath_rc_rcv(struct ipath_ibdev *de
 			reth = (struct ib_reth *)data;
 			data += sizeof(*reth);
 		}
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ)))
-			goto nack_acc;
-		spin_lock_irq(&qp->s_lock);
-		qp->s_rdma_len = be32_to_cpu(reth->length);
-		if (qp->s_rdma_len != 0) {
+		len = be32_to_cpu(reth->length);
+		if (len) {
 			u32 rkey = be32_to_cpu(reth->rkey);
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
-					   qp->s_rdma_len, vaddr, rkey,
-					   IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok)) {
-				spin_unlock_irq(&qp->s_lock);
+			ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+					   rkey, IB_ACCESS_REMOTE_READ);
+			if (unlikely(!ok))
 				goto nack_acc;
-			}
 			/*
 			 * Update the next expected PSN.  We add 1 later
 			 * below, so only add the remainder here.
 			 */
-			if (qp->s_rdma_len > pmtu)
-				qp->r_psn += (qp->s_rdma_len - 1) / pmtu;
+			if (len > pmtu)
+				qp->r_psn += (len - 1) / pmtu;
 		} else {
-			qp->s_rdma_sge.sg_list = NULL;
-			qp->s_rdma_sge.num_sge = 0;
-			qp->s_rdma_sge.sge.mr = NULL;
-			qp->s_rdma_sge.sge.vaddr = NULL;
-			qp->s_rdma_sge.sge.length = 0;
-			qp->s_rdma_sge.sge.sge_length = 0;
+			e->rdma_sge.sg_list = NULL;
+			e->rdma_sge.num_sge = 0;
+			e->rdma_sge.sge.mr = NULL;
+			e->rdma_sge.sge.vaddr = NULL;
+			e->rdma_sge.sge.length = 0;
+			e->rdma_sge.sge.sge_length = 0;
 		}
+		e->opcode = opcode;
+		e->psn = psn;
 		/*
 		 * We need to increment the MSN here instead of when we
 		 * finish sending the result since a duplicate request would
 		 * increment it more than once.
 		 */
 		qp->r_msn++;
-
-		qp->s_ack_state = opcode;
-		qp->s_ack_psn = psn;
-		spin_unlock_irq(&qp->s_lock);
-
 		qp->r_psn++;
 		qp->r_state = opcode;
 		qp->r_nak_state = 0;
+		barrier();
+		qp->r_head_ack_queue = next;
 
 		/* Call ipath_do_rc_send() in another thread. */
 		tasklet_hi_schedule(&qp->s_task);
 
 		goto done;
+	}
 
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
 		struct ib_atomic_eth *ateth;
+		struct ipath_ack_entry *e;
 		u64 vaddr;
+		atomic64_t *maddr;
 		u64 sdata;
 		u32 rkey;
+		u8 next;
 
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_ATOMIC)))
+			goto nack_acc;
+		next = qp->r_head_ack_queue + 1;
+		if (next > IPATH_MAX_RDMA_ATOMIC)
+			next = 0;
+		if (unlikely(next == qp->s_tail_ack_queue))
+			goto nack_inv;
 		if (!header_in_data)
 			ateth = &ohdr->u.atomic_eth;
-		else {
+		else
 			ateth = (struct ib_atomic_eth *)data;
-			data += sizeof(*ateth);
-		}
-		vaddr = be64_to_cpu(ateth->vaddr);
+		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+			be32_to_cpu(ateth->vaddr[1]);
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
 			goto nack_inv;
 		rkey = be32_to_cpu(ateth->rkey);
 		/* Check rkey & NAK */
-		if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge,
+		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
 					    sizeof(u64), vaddr, rkey,
 					    IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc;
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_ATOMIC)))
-			goto nack_acc;
 		/* Perform atomic OP and save result. */
+		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
 		sdata = be64_to_cpu(ateth->swap_data);
-		spin_lock_irq(&dev->pending_lock);
-		qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
-		if (opcode == OP(FETCH_ADD))
-			*(u64 *) qp->r_sge.sge.vaddr =
-				qp->r_atomic_data + sdata;
-		else if (qp->r_atomic_data ==
-			 be64_to_cpu(ateth->compare_data))
-			*(u64 *) qp->r_sge.sge.vaddr = sdata;
-		spin_unlock_irq(&dev->pending_lock);
+		e = &qp->s_ack_queue[qp->r_head_ack_queue];
+		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+			(u64) atomic64_add_return(sdata, maddr) - sdata :
+			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+				      be64_to_cpu(ateth->compare_data),
+				      sdata);
+		e->opcode = opcode;
+		e->psn = psn & IPATH_PSN_MASK;
 		qp->r_msn++;
-		qp->r_atomic_psn = psn & IPATH_PSN_MASK;
-		psn |= 1 << 31;
-		break;
+		qp->r_psn++;
+		qp->r_state = opcode;
+		qp->r_nak_state = 0;
+		barrier();
+		qp->r_head_ack_queue = next;
+
+		/* Call ipath_do_rc_send() in another thread. */
+		tasklet_hi_schedule(&qp->s_task);
+
+		goto done;
 	}
 
 	default:
-		/* Drop packet for unknown opcodes. */
-		goto done;
+		/* NAK unknown opcodes. */
+		goto nack_inv;
 	}
 	qp->r_psn++;
 	qp->r_state = opcode;
+	qp->r_ack_psn = psn;
 	qp->r_nak_state = 0;
 	/* Send an ACK if requested or required. */
-	if (psn & (1 << 31)) {
-		/*
-		 * Coalesce ACKs unless there is a RDMA READ or
-		 * ATOMIC pending.
-		 */
-		if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-			qp->r_ack_state = opcode;
-			qp->r_ack_psn = psn;
-		}
+	if (psn & (1 << 31))
 		goto send_ack;
-	}
 	goto done;
 
 nack_acc:
-	/*
-	 * A NAK will ACK earlier sends and RDMA writes.
-	 * Don't queue the NAK if a RDMA read, atomic, or NAK
-	 * is pending though.
-	 */
-	if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-		/* XXX Flush WQEs */
-		qp->state = IB_QPS_ERR;
-		qp->r_ack_state = OP(RDMA_WRITE_ONLY);
-		qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
-		qp->r_ack_psn = qp->r_psn;
-	}
+	ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
+	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+	qp->r_ack_psn = qp->r_psn;
+
 send_ack:
-	/* Send ACK right away unless the send tasklet has a pending ACK. */
-	if (qp->s_ack_state == OP(ACKNOWLEDGE))
-		send_rc_ack(qp);
+	send_rc_ack(qp);
 
 done:
 	return;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_registers.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -126,18 +126,41 @@
 #define INFINIPATH_E_RESET           0x0004000000000000ULL
 #define INFINIPATH_E_HARDWARE        0x0008000000000000ULL
 
+/*
+ * this is used to print "common" packet errors only when the
+ * __IPATH_ERRPKTDBG bit is set in ipath_debug.
+ */
+#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
+		| INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
+		| INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
+		| INFINIPATH_E_REBP )
+
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID
+ * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
  * 		bit 4: flag buffer, 5: datainfo, 6: header info */
 #define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
 #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
 #define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
-#define INFINIPATH_HWE_MEMBISTFAILED        0x0040000000000000ULL
 #define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
 #define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
+/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF	0x1ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC	0x2ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
+/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF   0x01ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ  0x02ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF  0x10ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
+/* waldo specific -- find the rest in ipath_6110.c */
+#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
+/* monty specific -- find the rest in ipath_6120.c */
+#define INFINIPATH_HWE_MEMBISTFAILED	0x0040000000000000ULL
 
 /* kr_hwdiagctrl bits */
 #define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
@@ -209,9 +232,9 @@
 
 /* combination link status states that we use with some frequency */
 #define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
-		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
+		<< INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
 		(INFINIPATH_IBCS_LINKSTATE_MASK \
-		<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
+		<<INFINIPATH_IBCS_LINKSTATE_SHIFT))
 #define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
 		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
 		(INFINIPATH_IBCS_LT_STATE_LINKUP \
@@ -302,6 +325,17 @@
 
 typedef u64 ipath_err_t;
 
+/* The following change with the type of device, so
+ * need to be part of the ipath_devdata struct, or
+ * we could have problems plugging in devices of
+ * different types (e.g. one HT, one PCIE)
+ * in one system, to be managed by one driver.
+ * On the other hand, this file is may also be included
+ * by other code, so leave the declarations here
+ * temporarily. Minor footprint issue if common-model
+ * linker used, none if C89+ linker used.
+ */
+
 /* mask of defined bits for various registers */
 extern u64 infinipath_i_bitsextant;
 extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
@@ -310,13 +344,6 @@ extern ipath_err_t infinipath_e_bitsexta
 extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
 
 /*
- * register bits for selecting i2c direction and values, used for I2C serial
- * flash
- */
-extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-extern u64 ipath_gpio_sda, ipath_gpio_scl;
-
-/*
  * These are the infinipath general register numbers (not offsets).
  * The kernel registers are used directly, those beyond the kernel
  * registers are calculated from one of the base registers.  The use of
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -33,7 +33,6 @@
 
 #include "ipath_verbs.h"
 #include "ipath_kernel.h"
-#include "ipath_common.h"
 
 /*
  * Convert the AETH RNR timeout code into the number of milliseconds.
@@ -107,6 +106,52 @@ void ipath_insert_rnr_queue(struct ipath
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 }
 
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+{
+	int user = to_ipd(qp->ibqp.pd)->user;
+	int i, j, ret;
+	struct ib_wc wc;
+
+	qp->r_len = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if ((user && wqe->sg_list[i].lkey == 0) ||
+		    !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i],
+				   IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		qp->r_len += wqe->sg_list[i].length;
+		j++;
+	}
+	qp->r_sge.sge = qp->r_sg_list[0];
+	qp->r_sge.sg_list = qp->r_sg_list + 1;
+	qp->r_sge.num_sge = j;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp = &qp->ibqp;
+	wc.src_qp = 0;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = 0;
+	wc.sl = 0;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	/* Signal solicited completion event. */
+	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
+
 /**
  * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
  * @qp: the QP
@@ -120,71 +165,72 @@ int ipath_get_rwqe(struct ipath_qp *qp, 
 {
 	unsigned long flags;
 	struct ipath_rq *rq;
+	struct ipath_rwq *wq;
 	struct ipath_srq *srq;
 	struct ipath_rwqe *wqe;
-	int ret = 1;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	int ret;
 
-	if (!qp->ibqp.srq) {
+	if (qp->ibqp.srq) {
+		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
+		rq = &srq->rq;
+	} else {
+		srq = NULL;
+		handler = NULL;
 		rq = &qp->r_rq;
-		spin_lock_irqsave(&rq->lock, flags);
-
-		if (unlikely(rq->tail == rq->head)) {
-			ret = 0;
-			goto done;
-		}
-		wqe = get_rwqe_ptr(rq, rq->tail);
-		qp->r_wr_id = wqe->wr_id;
-		if (!wr_id_only) {
-			qp->r_sge.sge = wqe->sg_list[0];
-			qp->r_sge.sg_list = wqe->sg_list + 1;
-			qp->r_sge.num_sge = wqe->num_sge;
-			qp->r_len = wqe->length;
-		}
-		if (++rq->tail >= rq->size)
-			rq->tail = 0;
-		goto done;
 	}
 
-	srq = to_isrq(qp->ibqp.srq);
-	rq = &srq->rq;
 	spin_lock_irqsave(&rq->lock, flags);
-
-	if (unlikely(rq->tail == rq->head)) {
-		ret = 0;
-		goto done;
-	}
-	wqe = get_rwqe_ptr(rq, rq->tail);
+	wq = rq->wq;
+	tail = wq->tail;
+	/* Validate tail before using it since it is user writable. */
+	if (tail >= rq->size)
+		tail = 0;
+	do {
+		if (unlikely(tail == wq->head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			ret = 0;
+			goto bail;
+		}
+		wqe = get_rwqe_ptr(rq, tail);
+		if (++tail >= rq->size)
+			tail = 0;
+	} while (!wr_id_only && !init_sge(qp, wqe));
 	qp->r_wr_id = wqe->wr_id;
-	if (!wr_id_only) {
-		qp->r_sge.sge = wqe->sg_list[0];
-		qp->r_sge.sg_list = wqe->sg_list + 1;
-		qp->r_sge.num_sge = wqe->num_sge;
-		qp->r_len = wqe->length;
-	}
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq->ibsrq.event_handler) {
-		struct ib_event ev;
+	wq->tail = tail;
+
+	ret = 1;
+	qp->r_wrid_valid = 1;
+	if (handler) {
 		u32 n;
 
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
 		else
-			n = rq->head - rq->tail;
+			n -= tail;
 		if (n < srq->limit) {
+			struct ib_event ev;
+
 			srq->limit = 0;
 			spin_unlock_irqrestore(&rq->lock, flags);
 			ev.device = qp->ibqp.device;
 			ev.element.srq = qp->ibqp.srq;
 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
+			handler(&ev, srq->ibsrq.srq_context);
 			goto bail;
 		}
 	}
-
-done:
 	spin_unlock_irqrestore(&rq->lock, flags);
+
 bail:
 	return ret;
 }
@@ -209,6 +255,7 @@ static void ipath_ruc_loopback(struct ip
 	unsigned long flags;
 	struct ib_wc wc;
 	u64 sdata;
+	atomic64_t *maddr;
 
 	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
 	if (!qp) {
@@ -219,7 +266,8 @@ static void ipath_ruc_loopback(struct ip
 again:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
-	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
+	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
+	    qp->s_rnr_timeout) {
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		goto done;
 	}
@@ -264,7 +312,7 @@ again:
 				sqp->s_rnr_retry--;
 			dev->n_rnr_naks++;
 			sqp->s_rnr_timeout =
-				ib_ipath_rnr_table[sqp->r_min_rnr_timer];
+				ib_ipath_rnr_table[qp->r_min_rnr_timer];
 			ipath_insert_rnr_queue(sqp);
 			goto done;
 		}
@@ -279,7 +327,7 @@ again:
 	case IB_WR_RDMA_WRITE:
 		if (wqe->length == 0)
 			break;
-		if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
+		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
 					    IB_ACCESS_REMOTE_WRITE))) {
@@ -290,27 +338,29 @@ again:
 			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 			wc.vendor_err = 0;
 			wc.byte_len = 0;
-			wc.qp_num = sqp->ibqp.qp_num;
+			wc.qp = &sqp->ibqp;
 			wc.src_qp = sqp->remote_qpn;
 			wc.pkey_index = 0;
 			wc.slid = sqp->remote_ah_attr.dlid;
 			wc.sl = sqp->remote_ah_attr.sl;
 			wc.dlid_path_bits = 0;
 			wc.port_num = 0;
+			spin_lock_irqsave(&sqp->s_lock, flags);
 			ipath_sqerror_qp(sqp, &wc);
+			spin_unlock_irqrestore(&sqp->s_lock, flags);
 			goto done;
 		}
 		break;
 
 	case IB_WR_RDMA_READ:
-		if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_READ)))
+			goto acc_err;
+		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
 					    IB_ACCESS_REMOTE_READ)))
 			goto acc_err;
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ)))
-			goto acc_err;
 		qp->r_sge.sge = wqe->sg_list[0];
 		qp->r_sge.sg_list = wqe->sg_list + 1;
 		qp->r_sge.num_sge = wqe->wr.num_sge;
@@ -318,22 +368,22 @@ again:
 
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
-		if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
-					    wqe->wr.wr.rdma.remote_addr,
-					    wqe->wr.wr.rdma.rkey,
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_ATOMIC)))
+			goto acc_err;
+		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
+					    wqe->wr.wr.atomic.remote_addr,
+					    wqe->wr.wr.atomic.rkey,
 					    IB_ACCESS_REMOTE_ATOMIC)))
 			goto acc_err;
 		/* Perform atomic OP and save result. */
-		sdata = wqe->wr.wr.atomic.swap;
-		spin_lock_irqsave(&dev->pending_lock, flags);
-		qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
-		if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-			*(u64 *) qp->r_sge.sge.vaddr =
-				qp->r_atomic_data + sdata;
-		else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
-			*(u64 *) qp->r_sge.sge.vaddr = sdata;
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
-		*(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
+		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		sdata = wqe->wr.wr.atomic.compare_add;
+		*(u64 *) sqp->s_sge.sge.vaddr =
+			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+			(u64) atomic64_add_return(sdata, maddr) - sdata :
+			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+				      sdata, wqe->wr.wr.atomic.swap);
 		goto send_comp;
 
 	default:
@@ -380,7 +430,7 @@ again:
 	wc.status = IB_WC_SUCCESS;
 	wc.vendor_err = 0;
 	wc.byte_len = wqe->length;
-	wc.qp_num = qp->ibqp.qp_num;
+	wc.qp = &qp->ibqp;
 	wc.src_qp = qp->remote_qpn;
 	/* XXX do we know which pkey matched? Only needed for GSI. */
 	wc.pkey_index = 0;
@@ -394,14 +444,14 @@ again:
 send_comp:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 
-	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
+	if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 		wc.wr_id = wqe->wr.wr_id;
 		wc.status = IB_WC_SUCCESS;
 		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 		wc.vendor_err = 0;
 		wc.byte_len = wqe->length;
-		wc.qp_num = sqp->ibqp.qp_num;
+		wc.qp = &sqp->ibqp;
 		wc.src_qp = 0;
 		wc.pkey_index = 0;
 		wc.slid = 0;
@@ -456,7 +506,7 @@ void ipath_no_bufs_available(struct ipat
 	 * We clear the tasklet flag now since we are committing to return
 	 * from the tasklet function.
 	 */
-	clear_bit(IPATH_S_BUSY, &qp->s_flags);
+	clear_bit(IPATH_S_BUSY, &qp->s_busy);
 	tasklet_unlock(&qp->s_task);
 	want_buffer(dev->dd);
 	dev->n_piowait++;
@@ -495,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp 
 		    wr->sg_list[0].addr & (sizeof(u64) - 1))) {
 		ret = -EINVAL;
 		goto bail;
+	} else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
+		ret = -EINVAL;
+		goto bail;
 	}
 	/* IB spec says that num_sge == 0 is OK. */
 	if (wr->num_sge > qp->s_max_sge) {
@@ -528,8 +581,7 @@ int ipath_post_ruc_send(struct ipath_qp 
 		}
 		if (wr->sg_list[i].length == 0)
 			continue;
-		if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
-				   &wqe->sg_list[j], &wr->sg_list[i],
+		if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
 				   acc)) {
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			ret = -EINVAL;
@@ -602,7 +654,7 @@ void ipath_do_ruc_send(unsigned long dat
 	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
 	struct ipath_other_headers *ohdr;
 
-	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
+	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
 		goto bail;
 
 	if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
@@ -638,19 +690,15 @@ again:
 	 */
 	spin_lock_irqsave(&qp->s_lock, flags);
 
-	/* Sending responses has higher priority over sending requests. */
-	if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE &&
-	    (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)
-		bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
-	else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
-		   ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
-		   ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
+	if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
+	       ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
+	       ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
 		/*
 		 * Clear the busy bit before unlocking to avoid races with
 		 * adding new work queue items and then failing to process
 		 * them.
 		 */
-		clear_bit(IPATH_S_BUSY, &qp->s_flags);
+		clear_bit(IPATH_S_BUSY, &qp->s_busy);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		goto bail;
 	}
@@ -683,7 +731,7 @@ again:
 	goto again;
 
 clear:
-	clear_bit(IPATH_S_BUSY, &qp->s_flags);
+	clear_bit(IPATH_S_BUSY, &qp->s_busy);
 bail:
 	return;
 }
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_srq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq
 			   struct ib_recv_wr **bad_wr)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_ibdev *dev = to_idev(ibsrq->device);
+	struct ipath_rwq *wq;
 	unsigned long flags;
 	int ret;
 
 	for (; wr; wr = wr->next) {
 		struct ipath_rwqe *wqe;
 		u32 next;
-		int i, j;
+		int i;
 
-		if (wr->num_sge > srq->rq.max_sge) {
+		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
 		spin_lock_irqsave(&srq->rq.lock, flags);
-		next = srq->rq.head + 1;
+		wq = srq->rq.wq;
+		next = wq->head + 1;
 		if (next >= srq->rq.size)
 			next = 0;
-		if (next == srq->rq.tail) {
+		if (next == wq->tail) {
 			spin_unlock_irqrestore(&srq->rq.lock, flags);
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
+		wqe = get_rwqe_ptr(&srq->rq, wq->head);
 		wqe->wr_id = wr->wr_id;
-		wqe->sg_list[0].mr = NULL;
-		wqe->sg_list[0].vaddr = NULL;
-		wqe->sg_list[0].length = 0;
-		wqe->sg_list[0].sge_length = 0;
-		wqe->length = 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			/* Check LKEY */
-			if (to_ipd(srq->ibsrq.pd)->user &&
-			    wr->sg_list[i].lkey == 0) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			if (wr->sg_list[i].length == 0)
-				continue;
-			if (!ipath_lkey_ok(&dev->lk_table,
-					   &wqe->sg_list[j],
-					   &wr->sg_list[i],
-					   IB_ACCESS_LOCAL_WRITE)) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			wqe->length += wr->sg_list[i].length;
-			j++;
-		}
-		wqe->num_sge = j;
-		srq->rq.head = next;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		wq->head = next;
 		spin_unlock_irqrestore(&srq->rq.lock, flags);
 	}
 	ret = 0;
@@ -152,25 +125,58 @@ struct ib_srq *ipath_create_srq(struct i
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
 	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
+	srq->rq.max_sge = srq_init_attr->attr.max_sge;
+	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
 		sizeof(struct ipath_rwqe);
-	srq->rq.wq = vmalloc(srq->rq.size * sz);
+	srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
 	if (!srq->rq.wq) {
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_srq;
 	}
 
 	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) srq->rq.wq;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wq;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wq;
+		}
+		srq->ip = ip;
+		ip->context = ibpd->uobject->context;
+		ip->obj = srq->rq.wq;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+				      srq->rq.size * sz);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		srq->ip = NULL;
+
+	/*
 	 * ib_create_srq() will initialize srq->ibsrq.
 	 */
 	spin_lock_init(&srq->rq.lock);
-	srq->rq.head = 0;
-	srq->rq.tail = 0;
-	srq->rq.max_sge = srq_init_attr->attr.max_sge;
+	srq->rq.wq->head = 0;
+	srq->rq.wq->tail = 0;
 	srq->limit = srq_init_attr->attr.srq_limit;
 
-	ret = &srq->ibsrq;
-
 	spin_lock(&dev->n_srqs_lock);
 	if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
 		spin_unlock(&dev->n_srqs_lock);
@@ -178,9 +184,10 @@ struct ib_srq *ipath_create_srq(struct i
 		goto bail_wq;
 	}
 
-	dev->n_srqs_allocated++;
+ 	dev->n_srqs_allocated++;
 	spin_unlock(&dev->n_srqs_lock);
 
+	ret = &srq->ibsrq;
 	goto done;
 
 bail_wq:
@@ -198,83 +205,130 @@ done:
  * @ibsrq: the SRQ to modify
  * @attr: the new attributes of the SRQ
  * @attr_mask: indicates which attributes to modify
+ * @udata: user data for ipathverbs.so
  */
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask)
+		     enum ib_srq_attr_mask attr_mask,
+		     struct ib_udata *udata)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	unsigned long flags;
-	int ret;
+	int ret = 0;
 
-	if (attr_mask & IB_SRQ_MAX_WR)
-		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-		    (attr->max_sge > srq->rq.max_sge)) {
-			ret = -EINVAL;
-			goto bail;
-		}
+	if (attr_mask & IB_SRQ_MAX_WR) {
+		struct ipath_rwq *owq;
+		struct ipath_rwq *wq;
+		struct ipath_rwqe *p;
+		u32 sz, size, n, head, tail;
 
-	if (attr_mask & IB_SRQ_LIMIT)
-		if (attr->srq_limit >= srq->rq.size) {
+		/* Check that the requested sizes are below the limits. */
+		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
+		    ((attr_mask & IB_SRQ_LIMIT) ?
+		     attr->srq_limit : srq->limit) > attr->max_wr) {
 			ret = -EINVAL;
 			goto bail;
 		}
 
-	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct ipath_rwqe *wq, *p;
-		u32 sz, size, n;
-
 		sz = sizeof(struct ipath_rwqe) +
-			attr->max_sge * sizeof(struct ipath_sge);
+			srq->rq.max_sge * sizeof(struct ib_sge);
 		size = attr->max_wr + 1;
-		wq = vmalloc(size * sz);
+		wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
 		if (!wq) {
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		if (srq->rq.head < srq->rq.tail)
-			n = srq->rq.size + srq->rq.head - srq->rq.tail;
+		/*
+		 * Return the address of the RWQ as the offset to mmap.
+		 * See ipath_mmap() for details.
+		 */
+		if (udata && udata->inlen >= sizeof(__u64)) {
+			__u64 offset_addr;
+			__u64 offset = (__u64) wq;
+
+			ret = ib_copy_from_udata(&offset_addr, udata,
+						 sizeof(offset_addr));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+			udata->outbuf = (void __user *) offset_addr;
+			ret = ib_copy_to_udata(udata, &offset,
+					       sizeof(offset));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+		}
+
+		spin_lock_irq(&srq->rq.lock);
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		owq = srq->rq.wq;
+		head = owq->head;
+		if (head >= srq->rq.size)
+			head = 0;
+		tail = owq->tail;
+		if (tail >= srq->rq.size)
+			tail = 0;
+		n = head;
+		if (n < tail)
+			n += srq->rq.size - tail;
 		else
-			n = srq->rq.head - srq->rq.tail;
-		if (size <= n || size <= srq->limit) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
+			n -= tail;
+		if (size <= n) {
+			spin_unlock_irq(&srq->rq.lock);
 			vfree(wq);
 			ret = -EINVAL;
 			goto bail;
 		}
 		n = 0;
-		p = wq;
-		while (srq->rq.tail != srq->rq.head) {
+		p = wq->wq;
+		while (tail != head) {
 			struct ipath_rwqe *wqe;
 			int i;
 
-			wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
+			wqe = get_rwqe_ptr(&srq->rq, tail);
 			p->wr_id = wqe->wr_id;
-			p->length = wqe->length;
 			p->num_sge = wqe->num_sge;
 			for (i = 0; i < wqe->num_sge; i++)
 				p->sg_list[i] = wqe->sg_list[i];
 			n++;
 			p = (struct ipath_rwqe *)((char *) p + sz);
-			if (++srq->rq.tail >= srq->rq.size)
-				srq->rq.tail = 0;
+			if (++tail >= srq->rq.size)
+				tail = 0;
 		}
-		vfree(srq->rq.wq);
 		srq->rq.wq = wq;
 		srq->rq.size = size;
-		srq->rq.head = n;
-		srq->rq.tail = 0;
-		srq->rq.max_sge = attr->max_sge;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-
-	if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		srq->limit = attr->srq_limit;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
+		wq->head = n;
+		wq->tail = 0;
+		if (attr_mask & IB_SRQ_LIMIT)
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
+
+		vfree(owq);
+
+		if (srq->ip) {
+			struct ipath_mmap_info *ip = srq->ip;
+			struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+
+			ip->obj = wq;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	} else if (attr_mask & IB_SRQ_LIMIT) {
+		spin_lock_irq(&srq->rq.lock);
+		if (attr->srq_limit >= srq->rq.size)
+			ret = -EINVAL;
+		else
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
 	}
-	ret = 0;
 
 bail:
 	return ret;
@@ -302,7 +356,10 @@ int ipath_destroy_srq(struct ib_srq *ibs
 	spin_lock(&dev->n_srqs_lock);
 	dev->n_srqs_allocated--;
 	spin_unlock(&dev->n_srqs_lock);
-	vfree(srq->rq.wq);
+	if (srq->ip)
+		kref_put(&srq->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(srq->rq.wq);
 	kfree(srq);
 
 	return 0;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_stats.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long o
 	if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
 	    && time_after(jiffies, dd->ipath_unmasktime)) {
 		char ebuf[256];
-		ipath_decode_err(ebuf, sizeof ebuf,
+		int iserr;
+		iserr = ipath_decode_err(ebuf, sizeof ebuf,
 				 (dd->ipath_maskederrs & ~dd->
 				  ipath_ignorederrs));
 		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+				~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+				INFINIPATH_E_PKTERRS ))
 			ipath_dev_err(dd, "Re-enabling masked errors "
 				      "(%s)\n", ebuf);
 		else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long o
 			 * them.  So only complain about these at debug
 			 * level.
 			 */
-			ipath_dbg("Disabling frequent queue full errors "
-				  "(%s)\n", ebuf);
+			if(iserr)
+					ipath_dbg("Re-enabling queue full errors (%s)\n",
+							ebuf);
+			else
+				ipath_cdbg(ERRPKT, "Re-enabling packet"
+						" problem interrupt (%s)\n", ebuf);
 		}
 		dd->ipath_maskederrs = dd->ipath_ignorederrs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -215,7 +215,6 @@ static ssize_t store_mlid(struct device 
 			  size_t count)
 {
 	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int unit;
 	u16 mlid;
 	int ret;
 
@@ -223,8 +222,6 @@ static ssize_t store_mlid(struct device 
 	if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
 		goto invalid;
 
-	unit = dd->ipath_unit;
-
 	dd->ipath_mlid = mlid;
 
 	goto bail;
@@ -257,7 +254,7 @@ static ssize_t store_guid(struct device 
 	struct ipath_devdata *dd = dev_get_drvdata(dev);
 	ssize_t ret;
 	unsigned short guid[8];
-	__be64 nguid;
+	__be64 new_guid;
 	u8 *ng;
 	int i;
 
@@ -266,7 +263,7 @@ static ssize_t store_guid(struct device 
 		   &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
 		goto invalid;
 
-	ng = (u8 *) &nguid;
+	ng = (u8 *) &new_guid;
 
 	for (i = 0; i < 8; i++) {
 		if (guid[i] > 0xff)
@@ -274,7 +271,10 @@ static ssize_t store_guid(struct device 
 		ng[i] = guid[i];
 	}
 
-	dd->ipath_guid = nguid;
+	if (new_guid == 0)
+		goto invalid;
+
+	dd->ipath_guid = new_guid;
 	dd->ipath_nguid = 1;
 
 	ret = strlen(buf);
@@ -297,6 +297,16 @@ static ssize_t show_nguid(struct device 
 	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
 }
 
+static ssize_t show_nports(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+	/* Return the number of user ports available. */
+	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
+}
+
 static ssize_t show_serial(struct device *dev,
 			   struct device_attribute *attr,
 			   char *buf)
@@ -608,6 +618,7 @@ static DEVICE_ATTR(mlid, S_IWUSR | S_IRU
 static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
 static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
 static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
+static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
 static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
 static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
 static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
@@ -623,6 +634,7 @@ static struct attribute *dev_attributes[
 	&dev_attr_mlid.attr,
 	&dev_attr_mtu.attr,
 	&dev_attr_nguid.attr,
+	&dev_attr_nports.attr,
 	&dev_attr_serial.attr,
 	&dev_attr_status.attr,
 	&dev_attr_status_str.attr,
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_uc.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -42,14 +42,14 @@ static void complete_last_send(struct ip
 {
 	if (++qp->s_last == qp->s_size)
 		qp->s_last = 0;
-	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 		wc->wr_id = wqe->wr.wr_id;
 		wc->status = IB_WC_SUCCESS;
 		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 		wc->vendor_err = 0;
 		wc->byte_len = wqe->length;
-		wc->qp_num = qp->ibqp.qp_num;
+		wc->qp = &qp->ibqp;
 		wc->src_qp = qp->remote_qpn;
 		wc->pkey_index = 0;
 		wc->slid = qp->remote_ah_attr.dlid;
@@ -246,6 +246,10 @@ void ipath_uc_rcv(struct ipath_ibdev *de
 	struct ib_reth *reth;
 	int header_in_data;
 
+	/* Validate the SLID. See Ch. 9.6.1.5 */
+	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+		goto done;
+
 	/* Check for GRH */
 	if (!has_grh) {
 		ohdr = &hdr->u.oth;
@@ -340,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *de
 	send_first:
 		if (qp->r_reuse_sge) {
 			qp->r_reuse_sge = 0;
-			qp->r_sge = qp->s_rdma_sge;
+			qp->r_sge = qp->s_rdma_read_sge;
 		} else if (!ipath_get_rwqe(qp, 0)) {
 			dev->n_pkt_drops++;
 			goto done;
 		}
 		/* Save the WQE so we can reuse it in case of an error. */
-		qp->s_rdma_sge = qp->r_sge;
+		qp->s_rdma_read_sge = qp->r_sge;
 		qp->r_rcv_len = 0;
 		if (opcode == OP(SEND_ONLY))
 			goto send_last;
@@ -407,7 +411,7 @@ void ipath_uc_rcv(struct ipath_ibdev *de
 		wc.status = IB_WC_SUCCESS;
 		wc.opcode = IB_WC_RECV;
 		wc.vendor_err = 0;
-		wc.qp_num = qp->ibqp.qp_num;
+		wc.qp = &qp->ibqp;
 		wc.src_qp = qp->remote_qpn;
 		wc.pkey_index = 0;
 		wc.slid = qp->remote_ah_attr.dlid;
@@ -440,7 +444,7 @@ void ipath_uc_rcv(struct ipath_ibdev *de
 			int ok;
 
 			/* Check rkey */
-			ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len,
+			ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
 					   vaddr, rkey,
 					   IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok)) {
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_ud.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -35,7 +35,51 @@
 
 #include "ipath_verbs.h"
 #include "ipath_kernel.h"
-#include "ipath_common.h"
+
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+		    u32 *lengthp, struct ipath_sge_state *ss)
+{
+	int user = to_ipd(qp->ibqp.pd)->user;
+	int i, j, ret;
+	struct ib_wc wc;
+
+	*lengthp = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if ((user && wqe->sg_list[i].lkey == 0) ||
+		    !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
+				   &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		*lengthp += wqe->sg_list[i].length;
+		j++;
+	}
+	ss->num_sge = j;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp = &qp->ibqp;
+	wc.src_qp = 0;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = 0;
+	wc.sl = 0;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	/* Signal solicited completion event. */
+	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
 
 /**
  * ipath_ud_loopback - handle send on loopback QPs
@@ -47,6 +91,8 @@
  *
  * This is called from ipath_post_ud_send() to forward a WQE addressed
  * to the same HCA.
+ * Note that the receive interrupt handler may be calling ipath_ud_rcv()
+ * while this is being called.
  */
 static void ipath_ud_loopback(struct ipath_qp *sqp,
 			      struct ipath_sge_state *ss,
@@ -61,7 +107,11 @@ static void ipath_ud_loopback(struct ipa
 	struct ipath_srq *srq;
 	struct ipath_sge_state rsge;
 	struct ipath_sge *sge;
+	struct ipath_rwq *wq;
 	struct ipath_rwqe *wqe;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	u32 rlen;
 
 	qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
 	if (!qp)
@@ -95,6 +145,13 @@ static void ipath_ud_loopback(struct ipa
 		wc->imm_data = 0;
 	}
 
+	if (wr->num_sge > 1) {
+		rsge.sg_list = kmalloc((wr->num_sge - 1) *
+					sizeof(struct ipath_sge),
+				       GFP_ATOMIC);
+	} else
+		rsge.sg_list = NULL;
+
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 * Note that it is safe to drop the lock after changing rq->tail
@@ -102,37 +159,52 @@ static void ipath_ud_loopback(struct ipa
 	 */
 	if (qp->ibqp.srq) {
 		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 	} else {
 		srq = NULL;
+		handler = NULL;
 		rq = &qp->r_rq;
 	}
+
 	spin_lock_irqsave(&rq->lock, flags);
-	if (rq->tail == rq->head) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto done;
+	wq = rq->wq;
+	tail = wq->tail;
+	while (1) {
+		if (unlikely(tail == wq->head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			dev->n_pkt_drops++;
+			goto bail_sge;
+		}
+		wqe = get_rwqe_ptr(rq, tail);
+		if (++tail >= rq->size)
+			tail = 0;
+		if (init_sge(qp, wqe, &rlen, &rsge))
+			break;
+		wq->tail = tail;
 	}
 	/* Silently drop packets which are too big. */
-	wqe = get_rwqe_ptr(rq, rq->tail);
-	if (wc->byte_len > wqe->length) {
+	if (wc->byte_len > rlen) {
 		spin_unlock_irqrestore(&rq->lock, flags);
 		dev->n_pkt_drops++;
-		goto done;
+		goto bail_sge;
 	}
+	wq->tail = tail;
 	wc->wr_id = wqe->wr_id;
-	rsge.sge = wqe->sg_list[0];
-	rsge.sg_list = wqe->sg_list + 1;
-	rsge.num_sge = wqe->num_sge;
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq && srq->ibsrq.event_handler) {
+	if (handler) {
 		u32 n;
 
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
 		else
-			n = rq->head - rq->tail;
+			n -= tail;
 		if (n < srq->limit) {
 			struct ib_event ev;
 
@@ -141,12 +213,12 @@ static void ipath_ud_loopback(struct ipa
 			ev.device = qp->ibqp.device;
 			ev.element.srq = qp->ibqp.srq;
 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
+			handler(&ev, srq->ibsrq.srq_context);
 		} else
 			spin_unlock_irqrestore(&rq->lock, flags);
 	} else
 		spin_unlock_irqrestore(&rq->lock, flags);
+
 	ah_attr = &to_iah(wr->wr.ud.ah)->attr;
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
@@ -183,11 +255,11 @@ static void ipath_ud_loopback(struct ipa
 	wc->status = IB_WC_SUCCESS;
 	wc->opcode = IB_WC_RECV;
 	wc->vendor_err = 0;
-	wc->qp_num = qp->ibqp.qp_num;
+	wc->qp = &qp->ibqp;
 	wc->src_qp = sqp->ibqp.qp_num;
 	/* XXX do we know which pkey matched? Only needed for GSI. */
 	wc->pkey_index = 0;
-	wc->slid = ipath_layer_get_lid(dev->dd) |
+	wc->slid = dev->dd->ipath_lid |
 		(ah_attr->src_path_bits &
 		 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
 	wc->sl = ah_attr->sl;
@@ -197,6 +269,8 @@ static void ipath_ud_loopback(struct ipa
 	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
 		       wr->send_flags & IB_SEND_SOLICITED);
 
+bail_sge:
+	kfree(rsge.sg_list);
 done:
 	if (atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
@@ -234,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *
 		goto bail;
 	}
 
+	if (wr->wr.ud.ah->pd != qp->ibqp.pd) {
+		ret = -EPERM;
+		goto bail;
+	}
+
 	/* IB spec says that num_sge == 0 is OK. */
 	if (wr->num_sge > qp->s_max_sge) {
 		ret = -EINVAL;
@@ -267,7 +346,7 @@ int ipath_post_ud_send(struct ipath_qp *
 
 		if (wr->sg_list[i].length == 0)
 			continue;
-		if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ?
+		if (!ipath_lkey_ok(qp, ss.num_sge ?
 				   sg_list + ss.num_sge - 1 : &ss.sge,
 				   &wr->sg_list[i], 0)) {
 			ret = -EINVAL;
@@ -393,14 +472,14 @@ int ipath_post_ud_send(struct ipath_qp *
 
 done:
 	/* Queue the completion status entry. */
-	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 	    (wr->send_flags & IB_SEND_SIGNALED)) {
 		wc.wr_id = wr->wr_id;
 		wc.status = IB_WC_SUCCESS;
 		wc.vendor_err = 0;
 		wc.opcode = IB_WC_SEND;
 		wc.byte_len = len;
-		wc.qp_num = qp->ibqp.qp_num;
+		wc.qp = &qp->ibqp;
 		wc.src_qp = 0;
 		wc.wc_flags = 0;
 		/* XXX initialize other fields? */
@@ -434,13 +513,9 @@ void ipath_ud_rcv(struct ipath_ibdev *de
 	int opcode;
 	u32 hdrsize;
 	u32 pad;
-	unsigned long flags;
 	struct ib_wc wc;
 	u32 qkey;
 	u32 src_qp;
-	struct ipath_rq *rq;
-	struct ipath_srq *srq;
-	struct ipath_rwqe *wqe;
 	u16 dlid;
 	int header_in_data;
 
@@ -547,19 +622,10 @@ void ipath_ud_rcv(struct ipath_ibdev *de
 
 	/*
 	 * Get the next work request entry to find where to put the data.
-	 * Note that it is safe to drop the lock after changing rq->tail
-	 * since ipath_post_receive() won't fill the empty slot.
 	 */
-	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
-		rq = &srq->rq;
-	} else {
-		srq = NULL;
-		rq = &qp->r_rq;
-	}
-	spin_lock_irqsave(&rq->lock, flags);
-	if (rq->tail == rq->head) {
-		spin_unlock_irqrestore(&rq->lock, flags);
+	if (qp->r_reuse_sge)
+		qp->r_reuse_sge = 0;
+	else if (!ipath_get_rwqe(qp, 0)) {
 		/*
 		 * Count VL15 packets dropped due to no receive buffer.
 		 * Otherwise, count them as buffer overruns since usually,
@@ -573,39 +639,11 @@ void ipath_ud_rcv(struct ipath_ibdev *de
 		goto bail;
 	}
 	/* Silently drop packets which are too big. */
-	wqe = get_rwqe_ptr(rq, rq->tail);
-	if (wc.byte_len > wqe->length) {
-		spin_unlock_irqrestore(&rq->lock, flags);
+	if (wc.byte_len > qp->r_len) {
+		qp->r_reuse_sge = 1;
 		dev->n_pkt_drops++;
 		goto bail;
 	}
-	wc.wr_id = wqe->wr_id;
-	qp->r_sge.sge = wqe->sg_list[0];
-	qp->r_sge.sg_list = wqe->sg_list + 1;
-	qp->r_sge.num_sge = wqe->num_sge;
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq && srq->ibsrq.event_handler) {
-		u32 n;
-
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
-		else
-			n = rq->head - rq->tail;
-		if (n < srq->limit) {
-			struct ib_event ev;
-
-			srq->limit = 0;
-			spin_unlock_irqrestore(&rq->lock, flags);
-			ev.device = qp->ibqp.device;
-			ev.element.srq = qp->ibqp.srq;
-			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
-		} else
-			spin_unlock_irqrestore(&rq->lock, flags);
-	} else
-		spin_unlock_irqrestore(&rq->lock, flags);
 	if (has_grh) {
 		ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
 			       sizeof(struct ib_grh));
@@ -614,10 +652,12 @@ void ipath_ud_rcv(struct ipath_ibdev *de
 		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
 	ipath_copy_sge(&qp->r_sge, data,
 		       wc.byte_len - sizeof(struct ib_grh));
+	qp->r_wrid_valid = 0;
+	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.opcode = IB_WC_RECV;
 	wc.vendor_err = 0;
-	wc.qp_num = qp->ibqp.qp_num;
+	wc.qp = &qp->ibqp;
 	wc.src_qp = src_qp;
 	/* XXX do we know which pkey matched? Only needed for GSI. */
 	wc.pkey_index = 0;
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -90,6 +90,62 @@ bail:
 }
 
 /**
+ * ipath_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
+	unsigned long offset, size_t size, int direction)
+{
+	dma_addr_t phys;
+
+	phys = pci_map_page(hwdev, page, offset, size, direction);
+
+	if (phys == 0) {
+		pci_unmap_page(hwdev, phys, size, direction);
+		phys = pci_map_page(hwdev, page, offset, size, direction);
+		/*
+		 * FIXME: If we get 0 again, we should keep this page,
+		 * map another, then free the 0 page.
+		 */
+	}
+
+	return phys;
+}
+
+/**
+ * ipath_map_single - a safety wrapper around pci_map_single()
+ *
+ * Same idea as ipath_map_page().
+ */
+dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
+	int direction)
+{
+	dma_addr_t phys;
+
+	phys = pci_map_single(hwdev, ptr, size, direction);
+
+	if (phys == 0) {
+		pci_unmap_single(hwdev, phys, size, direction);
+		phys = pci_map_single(hwdev, ptr, size, direction);
+		/*
+		 * FIXME: If we get 0 again, we should keep this page,
+		 * map another, then free the 0 page.
+		 */
+	}
+
+	return phys;
+}
+
+/**
  * ipath_get_user_pages - lock user pages into memory
  * @start_page: the start page
  * @num_pages: the number of pages
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -173,7 +173,7 @@ void ipath_copy_sge(struct ipath_sge_sta
 		BUG_ON(len == 0);
 		if (len > length)
 			len = length;
-		memcpy_cachebypass(sge->vaddr, data, len);
+		memcpy(sge->vaddr, data, len);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
@@ -291,11 +291,12 @@ static int ipath_post_receive(struct ib_
 			      struct ib_recv_wr **bad_wr)
 {
 	struct ipath_qp *qp = to_iqp(ibqp);
+	struct ipath_rwq *wq = qp->r_rq.wq;
 	unsigned long flags;
 	int ret;
 
 	/* Check that state is OK to post receive. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
 		*bad_wr = wr;
 		ret = -EINVAL;
 		goto bail;
@@ -304,59 +305,31 @@ static int ipath_post_receive(struct ib_
 	for (; wr; wr = wr->next) {
 		struct ipath_rwqe *wqe;
 		u32 next;
-		int i, j;
+		int i;
 
-		if (wr->num_sge > qp->r_rq.max_sge) {
+		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
 		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = qp->r_rq.head + 1;
+		next = wq->head + 1;
 		if (next >= qp->r_rq.size)
 			next = 0;
-		if (next == qp->r_rq.tail) {
+		if (next == wq->tail) {
 			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
+		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
 		wqe->wr_id = wr->wr_id;
-		wqe->sg_list[0].mr = NULL;
-		wqe->sg_list[0].vaddr = NULL;
-		wqe->sg_list[0].length = 0;
-		wqe->sg_list[0].sge_length = 0;
-		wqe->length = 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			/* Check LKEY */
-			if (to_ipd(qp->ibqp.pd)->user &&
-			    wr->sg_list[i].lkey == 0) {
-				spin_unlock_irqrestore(&qp->r_rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			if (wr->sg_list[i].length == 0)
-				continue;
-			if (!ipath_lkey_ok(
-				    &to_idev(qp->ibqp.device)->lk_table,
-				    &wqe->sg_list[j], &wr->sg_list[i],
-				    IB_ACCESS_LOCAL_WRITE)) {
-				spin_unlock_irqrestore(&qp->r_rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			wqe->length += wr->sg_list[i].length;
-			j++;
-		}
-		wqe->num_sge = j;
-		qp->r_rq.head = next;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		wq->head = next;
 		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 	}
 	ret = 0;
@@ -471,6 +444,10 @@ void ipath_ib_rcv(struct ipath_ibdev *de
 		struct ipath_mcast *mcast;
 		struct ipath_mcast_qp *p;
 
+		if (lnh != IPATH_LRH_GRH) {
+			dev->n_pkt_drops++;
+			goto bail;
+		}
 		mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
 		if (mcast == NULL) {
 			dev->n_pkt_drops++;
@@ -478,8 +455,7 @@ void ipath_ib_rcv(struct ipath_ibdev *de
 		}
 		dev->n_multicast_rcv++;
 		list_for_each_entry_rcu(p, &mcast->qp_list, list)
-			ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
-				     tlen, p->qp);
+			ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
 		/*
 		 * Notify ipath_multicast_detach() if it is waiting for us
 		 * to finish.
@@ -806,7 +782,6 @@ int ipath_verbs_send(struct ipath_devdat
 	/* +1 is for the qword padding of pbc */
 	plen = hdrwords + ((len + 3) >> 2) + 1;
 	if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
-		ipath_dbg("packet len 0x%x too long, failing\n", plen);
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -931,7 +906,8 @@ int ipath_get_counters(struct ipath_devd
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) +
+		dd->ipath_rxfc_unsupvl_errs;
 	cntrs->port_rcv_remphys_errors =
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
 	cntrs->port_xmit_discards =
@@ -944,8 +920,10 @@ int ipath_get_counters(struct ipath_devd
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
 	cntrs->port_rcv_packets =
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
-	cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
+	cntrs->local_link_integrity_errors =
+		(dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+		dd->ipath_lli_errs : dd->ipath_lli_errors;
+	cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs;
 
 	ret = 0;
 
@@ -1010,14 +988,14 @@ static int ipath_query_device(struct ib_
 	props->max_cqe = ib_ipath_max_cqes;
 	props->max_mr = dev->lk_table.max;
 	props->max_pd = ib_ipath_max_pds;
-	props->max_qp_rd_atom = 1;
-	props->max_qp_init_rd_atom = 1;
+	props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
+	props->max_qp_init_rd_atom = 255;
 	/* props->max_res_rd_atom */
 	props->max_srq = ib_ipath_max_srqs;
 	props->max_srq_wr = ib_ipath_max_srq_wrs;
 	props->max_srq_sge = ib_ipath_max_srq_sges;
 	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_HCA;
+	props->atomic_cap = IB_ATOMIC_GLOB;
 	props->max_pkeys = ipath_get_npkeys(dev->dd);
 	props->max_mcast_grp = ib_ipath_max_mcast_grps;
 	props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
@@ -1232,6 +1210,7 @@ static struct ib_ah *ipath_create_ah(str
 	struct ipath_ah *ah;
 	struct ib_ah *ret;
 	struct ipath_ibdev *dev = to_idev(pd->device);
+	unsigned long flags;
 
 	/* A multicast address requires a GRH (see ch. 8.4.1). */
 	if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
@@ -1258,16 +1237,16 @@ static struct ib_ah *ipath_create_ah(str
 		goto bail;
 	}
 
-	spin_lock(&dev->n_ahs_lock);
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
 	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-		spin_unlock(&dev->n_ahs_lock);
+		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 		kfree(ah);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
 
 	dev->n_ahs_allocated++;
-	spin_unlock(&dev->n_ahs_lock);
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
 	/* ib_create_ah() will initialize ah->ibah. */
 	ah->attr = *ah_attr;
@@ -1288,10 +1267,11 @@ static int ipath_destroy_ah(struct ib_ah
 {
 	struct ipath_ibdev *dev = to_idev(ibah->device);
 	struct ipath_ah *ah = to_iah(ibah);
+	unsigned long flags;
 
-	spin_lock(&dev->n_ahs_lock);
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
 	dev->n_ahs_allocated--;
-	spin_unlock(&dev->n_ahs_lock);
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
 	kfree(ah);
 
@@ -1413,11 +1393,13 @@ static int enable_timer(struct ipath_dev
 	 * processing.
 	 */
 	if (dd->ipath_flags & IPATH_GPIO_INTR) {
+		u64 val;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
 				 0x2074076542310ULL);
 		/* Enable GPIO bit 2 interrupt */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 (u64) (1 << 2));
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+		val |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
+		ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
 	}
 
 	init_timer(&dd->verbs_timer);
@@ -1432,8 +1414,17 @@ static int enable_timer(struct ipath_dev
 static int disable_timer(struct ipath_devdata *dd)
 {
 	/* Disable GPIO bit 2 interrupt */
-	if (dd->ipath_flags & IPATH_GPIO_INTR)
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
+	if (dd->ipath_flags & IPATH_GPIO_INTR) {
+                u64 val;
+                /* Disable GPIO bit 2 interrupt */
+                val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+                val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
+                ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+		/*
+		 * We might want to undo changes to debugportselect,
+		 * but how?
+		 */
+	}
 
 	del_timer_sync(&dd->verbs_timer);
 
@@ -1504,7 +1495,7 @@ int ipath_register_ib_device(struct ipat
 	idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
 	idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
 	idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-	idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT;
+	idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
 	idev->link_width_enabled = 3;	/* 1x or 4x */
 
 	/* Snapshot current HW counters to "clear" them. */
@@ -1571,7 +1562,7 @@ int ipath_register_ib_device(struct ipat
 		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	dev->node_type = IB_NODE_CA;
+	dev->node_type = RDMA_NODE_IB_CA;
 	dev->phys_port_cnt = 1;
 	dev->dma_device = &dd->pcidev->dev;
 	dev->class_dev.dev = dev->dma_device;
@@ -1615,9 +1606,11 @@ int ipath_register_ib_device(struct ipat
 	dev->attach_mcast = ipath_multicast_attach;
 	dev->detach_mcast = ipath_multicast_detach;
 	dev->process_mad = ipath_process_mad;
+	dev->mmap = ipath_mmap;
+	dev->dma_ops = &ipath_dma_mapping_ops;
 
 	snprintf(dev->node_desc, sizeof(dev->node_desc),
-		 IPATH_IDSTR " %s", system_utsname.nodename);
+		 IPATH_IDSTR " %s", init_utsname()->nodename);
 
 	ret = ib_register_device(dev);
 	if (ret)
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -38,10 +38,14 @@
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kref.h>
 #include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
 
 #include "ipath_layer.h"
 
+#define IPATH_MAX_RDMA_ATOMIC	4
+
 #define QPN_MAX                 (1 << 24)
 #define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
 
@@ -49,7 +53,7 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IPATH_UVERBS_ABI_VERSION       1
+#define IPATH_UVERBS_ABI_VERSION       2
 
 /*
  * Define an ib_cq_notify value that is not valid so we know when CQ
@@ -88,7 +92,7 @@ struct ib_reth {
 } __attribute__ ((packed));
 
 struct ib_atomic_eth {
-	__be64 vaddr;
+	__be32 vaddr[2];	/* unaligned so access as 2 32-bit words */
 	__be32 rkey;
 	__be64 swap_data;
 	__be64 compare_data;
@@ -107,7 +111,7 @@ struct ipath_other_headers {
 		} rc;
 		struct {
 			__be32 aeth;
-			__be64 atomic_ack_eth;
+			__be32 atomic_ack_eth[2];
 		} at;
 		__be32 imm_data;
 		__be32 aeth;
@@ -164,58 +168,41 @@ struct ipath_ah {
 };
 
 /*
- * Quick description of our CQ/QP locking scheme:
- *
- * We have one global lock that protects dev->cq/qp_table.  Each
- * struct ipath_cq/qp also has its own lock.  An individual qp lock
- * may be taken inside of an individual cq lock.  Both cqs attached to
- * a qp may be locked, with the send cq locked first.  No other
- * nesting should be done.
- *
- * Each struct ipath_cq/qp also has an atomic_t ref count.  The
- * pointer from the cq/qp_table to the struct counts as one reference.
- * This reference also is good for access through the consumer API, so
- * modifying the CQ/QP etc doesn't need to take another reference.
- * Access because of a completion being polled does need a reference.
- *
- * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
- * destroy function to sleep on.
- *
- * This means that access from the consumer API requires nothing but
- * taking the struct's lock.
- *
- * Access because of a completion event should go as follows:
- * - lock cq/qp_table and look up struct
- * - increment ref count in struct
- * - drop cq/qp_table lock
- * - lock struct, do your thing, and unlock struct
- * - decrement ref count; if zero, wake up waiters
- *
- * To destroy a CQ/QP, we can do the following:
- * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
- * - decrement ref count
- * - wait_event until ref count is zero
- *
- * It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
- * QP is destroyed.  Also, the consumer must make sure that calls to
- * qp_modify are serialized.
- *
- * Possible optimizations (wait for profile data to see if/where we
- * have locks bouncing between CPUs):
- * - split cq/qp table lock into n separate (cache-aligned) locks,
- *   indexed (say) by the page in the table
+ * This structure is used by ipath_mmap() to validate an offset
+ * when an mmap() request is made.  The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct ipath_mmap_info {
+	struct ipath_mmap_info *next;
+	struct ib_ucontext *context;
+	void *obj;
+	struct kref ref;
+	unsigned size;
+	unsigned mmap_cnt;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
  */
+struct ipath_cq_wc {
+	u32 head;		/* index of next entry to fill */
+	u32 tail;		/* index of next ib_poll_cq() entry */
+	struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
+};
 
+/*
+ * The completion queue structure.
+ */
 struct ipath_cq {
 	struct ib_cq ibcq;
 	struct tasklet_struct comptask;
 	spinlock_t lock;
 	u8 notify;
 	u8 triggered;
-	u32 head;		/* new records added to the head */
-	u32 tail;		/* poll_cq() reads from here. */
-	struct ib_wc *queue;	/* this is actually ibcq.cqe + 1 */
+	struct ipath_cq_wc *queue;
+	struct ipath_mmap_info *ip;
 };
 
 /*
@@ -236,6 +223,7 @@ struct ipath_segarray {
 };
 
 struct ipath_mregion {
+	struct ib_pd *pd;	/* shares refcnt of ibmr.pd */
 	u64 user_base;		/* User's address for this region */
 	u64 iova;		/* IB start address of this region */
 	size_t length;
@@ -282,28 +270,40 @@ struct ipath_swqe {
 
 /*
  * Receive work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->r_max_sge.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
  */
 struct ipath_rwqe {
 	u64 wr_id;
-	u32 length;		/* total length of data in sg_list */
 	u8 num_sge;
-	struct ipath_sge sg_list[0];
+	struct ib_sge sg_list[0];
 };
 
-struct ipath_rq {
-	spinlock_t lock;
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct ipath_rwq {
 	u32 head;		/* new work requests posted to the head */
 	u32 tail;		/* receives pull requests from here. */
+	struct ipath_rwqe wq[0];
+};
+
+struct ipath_rq {
+	struct ipath_rwq *wq;
+	spinlock_t lock;
 	u32 size;		/* size of RWQE array */
 	u8 max_sge;
-	struct ipath_rwqe *wq;	/* RWQE array */
 };
 
 struct ipath_srq {
 	struct ib_srq ibsrq;
 	struct ipath_rq rq;
+	struct ipath_mmap_info *ip;
 	/* send signal when number of RWQEs < limit */
 	u32 limit;
 };
@@ -315,6 +315,19 @@ struct ipath_sge_state {
 };
 
 /*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct ipath_ack_entry {
+	u8 opcode;
+	u32 psn;
+	union {
+		struct ipath_sge_state rdma_sge;
+		u64 atomic_data;
+	};
+};
+
+/*
  * Variables prefixed with s_ are for the requester (sender).
  * Variables prefixed with r_ are for the responder (receiver).
  * Variables prefixed with ack_ are for responder replies.
@@ -333,26 +346,27 @@ struct ipath_qp {
 	atomic_t refcount;
 	wait_queue_head_t wait;
 	struct tasklet_struct s_task;
+	struct ipath_mmap_info *ip;
 	struct ipath_sge_state *s_cur_sge;
 	struct ipath_sge_state s_sge;	/* current send request data */
-	/* current RDMA read send data */
-	struct ipath_sge_state s_rdma_sge;
+	struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
+	struct ipath_sge_state s_ack_rdma_sge;
+	struct ipath_sge_state s_rdma_read_sge;
 	struct ipath_sge_state r_sge;	/* current receive data */
 	spinlock_t s_lock;
-	unsigned long s_flags;
+	unsigned long s_busy;
 	u32 s_hdrwords;		/* size of s_hdr in 32 bit words */
 	u32 s_cur_size;		/* size of send packet in bytes */
 	u32 s_len;		/* total length of s_sge */
-	u32 s_rdma_len;		/* total length of s_rdma_sge */
+	u32 s_rdma_read_len;	/* total length of s_rdma_read_sge */
 	u32 s_next_psn;		/* PSN for next request */
 	u32 s_last_psn;		/* last response PSN processed */
 	u32 s_psn;		/* current packet sequence number */
-	u32 s_ack_psn;		/* PSN for RDMA_READ */
+	u32 s_ack_rdma_psn;	/* PSN for sending RDMA read responses */
+	u32 s_ack_psn;		/* PSN for acking sends and RDMA writes */
 	u32 s_rnr_timeout;	/* number of milliseconds for RNR timeout */
 	u32 r_ack_psn;		/* PSN for next ACK or atomic ACK */
 	u64 r_wr_id;		/* ID for current receive WQE */
-	u64 r_atomic_data;	/* data for last atomic op */
-	u32 r_atomic_psn;	/* PSN of last atomic op */
 	u32 r_len;		/* total length of r_sge */
 	u32 r_rcv_len;		/* receive data len processed */
 	u32 r_psn;		/* expected rcv packet sequence number */
@@ -362,11 +376,13 @@ struct ipath_qp {
 	u8 s_ack_state;		/* opcode of packet to ACK */
 	u8 s_nak_state;		/* non-zero if NAK is pending */
 	u8 r_state;		/* opcode of last packet received */
-	u8 r_ack_state;		/* opcode of packet to ACK */
 	u8 r_nak_state;		/* non-zero if NAK is pending */
 	u8 r_min_rnr_timer;	/* retry timeout value for RNR NAKs */
 	u8 r_reuse_sge;		/* for UC receive errors */
 	u8 r_sge_inx;		/* current index into sg_list */
+	u8 r_wrid_valid;	/* r_wrid set but CQ entry not yet made */
+	u8 r_max_rd_atomic;	/* max number of RDMA read/atomic to receive */
+	u8 r_head_ack_queue;	/* index into s_ack_queue[] */
 	u8 qp_access_flags;
 	u8 s_max_sge;		/* size of s_wq->sg_list */
 	u8 s_retry_cnt;		/* number of times to retry */
@@ -375,6 +391,10 @@ struct ipath_qp {
 	u8 s_rnr_retry;		/* requester RNR retry counter */
 	u8 s_wait_credit;	/* limit number of unacked packets sent */
 	u8 s_pkey_index;	/* PKEY index to use */
+	u8 s_max_rd_atomic;	/* max number of RDMA read/atomic to send */
+	u8 s_num_rd_atomic;	/* number of RDMA read/atomic pending */
+	u8 s_tail_ack_queue;	/* index into s_ack_queue[] */
+	u8 s_flags;
 	u8 timeout;		/* Timeout for this QP */
 	enum ib_mtu path_mtu;
 	u32 remote_qpn;
@@ -387,14 +407,20 @@ struct ipath_qp {
 	u32 s_ssn;		/* SSN of tail entry */
 	u32 s_lsn;		/* limit sequence number (credit) */
 	struct ipath_swqe *s_wq;	/* send work queue */
-	struct ipath_rq r_rq;	/* receive work queue */
+	struct ipath_rq r_rq;		/* receive work queue */
+	struct ipath_sge r_sg_list[0];	/* verified SGEs */
 };
 
+/* Bit definition for s_busy. */
+#define IPATH_S_BUSY		0
+
 /*
  * Bit definitions for s_flags.
  */
-#define IPATH_S_BUSY		0
-#define IPATH_S_SIGNAL_REQ_WR	1
+#define IPATH_S_SIGNAL_REQ_WR	0x01
+#define IPATH_S_FENCE_PENDING	0x02
+#define IPATH_S_RDMAR_PENDING	0x04
+#define IPATH_S_ACK_PENDING	0x08
 
 #define IPATH_PSN_CREDIT	2048
 
@@ -413,15 +439,15 @@ static inline struct ipath_swqe *get_swq
 
 /*
  * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rq.wq.  This function does the array index computation.
+ * struct ipath_rwq.wq.  This function does the array index computation.
  */
 static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
 					      unsigned n)
 {
 	return (struct ipath_rwqe *)
-		((char *) rq->wq +
+		((char *) rq->wq->wq +
 		 (sizeof(struct ipath_rwqe) +
-		  rq->max_sge * sizeof(struct ipath_sge)) * n);
+		  rq->max_sge * sizeof(struct ib_sge)) * n);
 }
 
 /*
@@ -461,6 +487,7 @@ struct ipath_ibdev {
 	struct ib_device ibdev;
 	struct list_head dev_list;
 	struct ipath_devdata *dd;
+	struct ipath_mmap_info *pending_mmaps;
 	int ib_unit;		/* This is the device number */
 	u16 sm_lid;		/* in host order */
 	u8 sm_sl;
@@ -639,8 +666,10 @@ struct ib_qp *ipath_create_qp(struct ib_
 
 int ipath_destroy_qp(struct ib_qp *ibqp);
 
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
+
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask);
+		    int attr_mask, struct ib_udata *udata);
 
 int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		   int attr_mask, struct ib_qp_init_attr *init_attr);
@@ -658,12 +687,6 @@ int ipath_verbs_send(struct ipath_devdat
 
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
 
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
-		  u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
-		  struct ib_sge *sge, int acc);
-
 void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
 
 void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
@@ -688,10 +711,10 @@ int ipath_alloc_lkey(struct ipath_lkey_t
 
 void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
 
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
 		  struct ib_sge *sge, int acc);
 
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
 		  u32 len, u64 vaddr, u32 rkey, int acc);
 
 int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
@@ -702,14 +725,13 @@ struct ib_srq *ipath_create_srq(struct i
 				struct ib_udata *udata);
 
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask);
+		     enum ib_srq_attr_mask attr_mask,
+		     struct ib_udata *udata);
 
 int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
 
 int ipath_destroy_srq(struct ib_srq *ibsrq);
 
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
 
 struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
@@ -744,6 +766,10 @@ int ipath_unmap_fmr(struct list_head *fm
 
 int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
 
+void ipath_release_mmap_info(struct kref *ref);
+
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
 void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
 
 void ipath_insert_rnr_queue(struct ipath_qp *qp);
@@ -755,9 +781,6 @@ u32 ipath_make_grh(struct ipath_ibdev *d
 
 void ipath_do_ruc_send(unsigned long data);
 
-u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
-		      u32 pmtu);
-
 int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
 		      u32 pmtu, u32 *bth0p, u32 *bth2p);
 
@@ -810,4 +833,6 @@ extern unsigned int ib_ipath_max_srq_wrs
 
 extern const u32 ib_ipath_rnr_table[];
 
+extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
+
 #endif				/* IPATH_VERBS_H */
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -38,13 +38,23 @@
 #include "ipath_kernel.h"
 
 /**
- * ipath_unordered_wc - indicate whether write combining is ordered
+ * ipath_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: infinipath device
  *
- * PowerPC systems (at least those in the 970 processor family)
- * write partially filled store buffers in address order, but will write
- * completely filled store buffers in "random" order, and therefore must
- * have serialization for correctness with current InfiniPath chips.
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int ipath_enable_wc(struct ipath_devdata *dd)
+{
+	return 0;
+}
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is unordered
  *
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
  */
 int ipath_unordered_wc(void)
 {
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -123,6 +123,8 @@ int ipath_enable_wc(struct ipath_devdata
 			ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
 				   "cookie is %d\n", cookie);
 			dd->ipath_wc_cookie = cookie;
+			dd->ipath_wc_base = (unsigned long) pioaddr;
+			dd->ipath_wc_len = (unsigned long) piolen;
 		}
 	}
 
@@ -136,9 +138,16 @@ int ipath_enable_wc(struct ipath_devdata
 void ipath_disable_wc(struct ipath_devdata *dd)
 {
 	if (dd->ipath_wc_cookie) {
+		int r;
 		ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
-		mtrr_del(dd->ipath_wc_cookie, 0, 0);
-		dd->ipath_wc_cookie = 0;
+		r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
+			     dd->ipath_wc_len);
+		if (r < 0)
+			dev_info(&dd->pcidev->dev,
+				 "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+				 dd->ipath_wc_cookie, dd->ipath_wc_base,
+				 dd->ipath_wc_len, r);
+		dd->ipath_wc_cookie = 0; /* even on failure */
 	}
 }
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/Kconfig
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/Kconfig
@@ -1,6 +1,6 @@
 config INFINIBAND_IPATH
 	tristate "QLogic InfiniPath Driver"
-	depends on PCI_MSI && X86_64 && INFINIBAND
+	depends on PCI_MSI && 64BIT && INFINIBAND && NET
 	---help---
 	This is a driver for QLogic InfiniPath host channel adapters,
 	including InfiniBand verbs support.  This driver allows these
--- linux-2.6.18.noarch/drivers/infiniband/hw/ipath/Makefile
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/ipath/Makefile
@@ -6,17 +6,17 @@ obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipa
 ib_ipath-y := \
 	ipath_cq.o \
 	ipath_diag.o \
+	ipath_dma.o \
 	ipath_driver.o \
 	ipath_eeprom.o \
 	ipath_file_ops.o \
 	ipath_fs.o \
-	ipath_iba6110.o \
-	ipath_iba6120.o \
 	ipath_init_chip.o \
 	ipath_intr.o \
 	ipath_keys.o \
 	ipath_layer.o \
 	ipath_mad.o \
+	ipath_mmap.o \
 	ipath_mr.o \
 	ipath_qp.o \
 	ipath_rc.o \
@@ -30,6 +30,9 @@ ib_ipath-y := \
 	ipath_verbs_mcast.o \
 	ipath_verbs.o
 
+ib_ipath-y += ipath_iba6110.o
+ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o
+
 ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
 ib_ipath-$(CONFIG_X86_64) += memcpy_cachebypass_x86_64.o
 ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_av.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_av.c
@@ -33,7 +33,6 @@
  * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <linux/init.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 
@@ -190,7 +189,7 @@ int mthca_create_ah(struct mthca_dev *de
 on_hca_fail:
 	if (ah->type == MTHCA_AH_PCI_POOL) {
 		ah->av = pci_pool_alloc(dev->av_table.pool,
-					SLAB_ATOMIC, &ah->avdma);
+					GFP_ATOMIC, &ah->avdma);
 		if (!ah->av)
 			return -ENOMEM;
 
@@ -323,7 +322,7 @@ int mthca_ah_query(struct ib_ah *ibah, s
 	return 0;
 }
 
-int __devinit mthca_init_av_table(struct mthca_dev *dev)
+int mthca_init_av_table(struct mthca_dev *dev)
 {
 	int err;
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_catas.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -49,35 +49,34 @@ enum {
 
 static DEFINE_SPINLOCK(catas_lock);
 
+static LIST_HEAD(catas_list);
 static struct workqueue_struct *catas_wq;
-static struct list_head catas_list;
 static struct work_struct catas_work;
 
-static int catas_reset_disable = 0;
+static int catas_reset_disable;
 module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
-MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if > 0");
+MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
 
 static void catas_reset(void *work_ptr)
 {
 	struct mthca_dev *dev, *tmpdev;
-	LIST_HEAD(local_catas);
-	unsigned long flags;
-	int rc;
+	LIST_HEAD(tlist);
+	int ret;
 
 	mutex_lock(&mthca_device_mutex);
 
-	spin_lock_irqsave(&catas_lock, flags);
-	list_for_each_entry_safe(dev, tmpdev, &catas_list, catas_err.list)
-		list_move_tail(&dev->catas_err.list, &local_catas);
-	spin_unlock_irqrestore(&catas_lock, flags);
+	spin_lock_irq(&catas_lock);
+	list_splice_init(&catas_list, &tlist);
+	spin_unlock_irq(&catas_lock);
 
-	list_for_each_entry_safe(dev, tmpdev, &local_catas, catas_err.list) {
-		rc = mthca_restart_one(dev->pdev);
-		if (rc)
-			mthca_err(dev, "Reset failed (%d)\n", rc);
+	list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+		ret = __mthca_restart_one(dev->pdev);
+		if (ret)
+			mthca_err(dev, "Reset failed (%d)\n", ret);
 		else
 			mthca_dbg(dev, "Reset succeeded\n");
 	}
+
 	mutex_unlock(&mthca_device_mutex);
 }
 
@@ -183,8 +182,6 @@ void mthca_start_catas_poll(struct mthca
 
 void mthca_stop_catas_poll(struct mthca_dev *dev)
 {
-	unsigned long flags;
-
 	spin_lock_irq(&catas_lock);
 	dev->catas_err.stop = 1;
 	spin_unlock_irq(&catas_lock);
@@ -199,18 +196,19 @@ void mthca_stop_catas_poll(struct mthca_
 				   dev->catas_err.size * 4);
 	}
 
-	spin_lock_irqsave(&catas_lock, flags);
+	spin_lock_irq(&catas_lock);
 	list_del(&dev->catas_err.list);
-	spin_unlock_irqrestore(&catas_lock, flags);
+	spin_unlock_irq(&catas_lock);
 }
 
 int __init mthca_catas_init(void)
 {
-	INIT_LIST_HEAD(&catas_list);
 	INIT_WORK(&catas_work, catas_reset, NULL);
+
 	catas_wq = create_singlethread_workqueue("mthcacatas");
 	if (!catas_wq)
 		return -ENOMEM;
+
 	return 0;
 }
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -34,7 +34,7 @@
  * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <linux/sched.h>
+#include <linux/completion.h>
 #include <linux/pci.h>
 #include <linux/errno.h>
 #include <asm/io.h>
@@ -355,9 +355,6 @@ void mthca_cmd_event(struct mthca_dev *d
 	context->result    = 0;
 	context->status    = status;
 	context->out_param = out_param;
-
-	context->token += dev->cmd.token_mask + 1;
-
 	complete(&context->done);
 }
 
@@ -379,6 +376,7 @@ static int mthca_cmd_wait(struct mthca_d
 	spin_lock(&dev->cmd.context_lock);
 	BUG_ON(dev->cmd.free_head < 0);
 	context = &dev->cmd.context[dev->cmd.free_head];
+	context->token += dev->cmd.token_mask + 1;
 	dev->cmd.free_head = context->next;
 	spin_unlock(&dev->cmd.context_lock);
 
@@ -1051,7 +1049,11 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
 	dev_lim->max_eqs = 1 << (field & 0x7);
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
-	dev_lim->reserved_mtts = 1 << (field >> 4);
+	if (mthca_is_memfree(dev))
+		dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64),
+					       MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE;
+	else
+		dev_lim->reserved_mtts = 1 << (field >> 4);
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
 	dev_lim->max_mrw_sz = 1 << field;
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
@@ -1820,11 +1822,11 @@ int mthca_MAD_IFC(struct mthca_dev *dev,
 
 #define MAD_IFC_BOX_SIZE      0x400
 #define MAD_IFC_MY_QPN_OFFSET 0x100
-#define MAD_IFC_RQPN_OFFSET   0x104
-#define MAD_IFC_SL_OFFSET     0x108
-#define MAD_IFC_G_PATH_OFFSET 0x109
-#define MAD_IFC_RLID_OFFSET   0x10a
-#define MAD_IFC_PKEY_OFFSET   0x10e
+#define MAD_IFC_RQPN_OFFSET   0x108
+#define MAD_IFC_SL_OFFSET     0x10c
+#define MAD_IFC_G_PATH_OFFSET 0x10d
+#define MAD_IFC_RLID_OFFSET   0x10e
+#define MAD_IFC_PKEY_OFFSET   0x112
 #define MAD_IFC_GRH_OFFSET    0x140
 
 	inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
@@ -1854,7 +1856,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev,
 
 		memset(inbox + 256, 0, 256);
 
-		MTHCA_PUT(inbox, in_wc->qp_num,     MAD_IFC_MY_QPN_OFFSET);
+		MTHCA_PUT(inbox, in_wc->qp->qp_num, MAD_IFC_MY_QPN_OFFSET);
 		MTHCA_PUT(inbox, in_wc->src_qp,     MAD_IFC_RQPN_OFFSET);
 
 		val = in_wc->sl << 4;
@@ -1862,7 +1864,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev,
 
 		val = in_wc->dlid_path_bits |
 			(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
-		MTHCA_PUT(inbox, val,               MAD_IFC_GRH_OFFSET);
+		MTHCA_PUT(inbox, val,               MAD_IFC_G_PATH_OFFSET);
 
 		MTHCA_PUT(inbox, in_wc->slid,       MAD_IFC_RLID_OFFSET);
 		MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
@@ -1870,7 +1872,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev,
 		if (in_grh)
 			memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40);
 
-		op_modifier |= 0x10;
+		op_modifier |= 0x4;
 
 		in_modifier |= in_wc->slid << 16;
 	}
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_cq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -36,9 +36,10 @@
  * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $
  */
 
-#include <linux/init.h>
 #include <linux/hardirq.h>
 
+#include <asm/io.h>
+
 #include <rdma/ib_pack.h>
 
 #include "mthca_dev.h"
@@ -53,6 +54,10 @@ enum {
 	MTHCA_CQ_ENTRY_SIZE = 0x20
 };
 
+enum {
+	MTHCA_ATOMIC_BYTE_LEN = 8
+};
+
 /*
  * Must be packed because start is 64 bits but only aligned to 32 bits.
  */
@@ -210,6 +215,11 @@ static inline void update_cons_index(str
 		mthca_write64(doorbell,
 			      dev->kar + MTHCA_CQ_DOORBELL,
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+		/*
+		 * Make sure doorbells don't leak out of CQ spinlock
+		 * and reach the HCA out of order:
+		 */
+		mmiowb();
 	}
 }
 
@@ -269,14 +279,12 @@ static inline int is_recv_cqe(struct mth
 		return !(cqe->is_send & 0x80);
 }
 
-void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
-		    struct mthca_srq *srq)
+void __mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+		      struct mthca_srq *srq)
 {
 	struct mthca_cqe *cqe;
 	u32 prod_index;
-	int nfreed = 0;
-
-	spin_lock_irq(&cq->lock);
+	int i, nfreed = 0;
 
 	/*
 	 * First we need to find the current producer index, so we
@@ -311,11 +319,19 @@ void mthca_cq_clean(struct mthca_dev *de
 	}
 
 	if (nfreed) {
+		for (i = 0; i < nfreed; ++i)
+			set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe));
 		wmb();
 		cq->cons_index += nfreed;
 		update_cons_index(dev, cq, nfreed);
 	}
+}
 
+void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+		    struct mthca_srq *srq)
+{
+	spin_lock_irq(&cq->lock);
+	__mthca_cq_clean(dev, cq, qpn, srq);
 	spin_unlock_irq(&cq->lock);
 }
 
@@ -524,14 +540,13 @@ static inline int mthca_poll_one(struct 
 		}
 	}
 
-	entry->qp_num = (*cur_qp)->qpn;
+	entry->qp = &(*cur_qp)->ibqp;
 
 	if (is_send) {
 		wq = &(*cur_qp)->sq;
 		wqe_index = ((be32_to_cpu(cqe->wqe) - (*cur_qp)->send_wqe_offset)
 			     >> wq->wqe_shift);
-		entry->wr_id = (*cur_qp)->wrid[wqe_index +
-					       (*cur_qp)->rq.max];
+		entry->wr_id = (*cur_qp)->wrid[wqe_index];
 	} else if ((*cur_qp)->ibqp.srq) {
 		struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq);
 		u32 wqe = be32_to_cpu(cqe->wqe);
@@ -544,14 +559,14 @@ static inline int mthca_poll_one(struct 
 		wq = &(*cur_qp)->rq;
 		wqe = be32_to_cpu(cqe->wqe);
 		wqe_index = wqe >> wq->wqe_shift;
-               /*
-		* WQE addr == base - 1 might be reported in receive completion
-		* with error instead of (rq size - 1) by Sinai FW 1.0.800 and
-		* Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
-		*/
+		/*
+		 * WQE addr == base - 1 might be reported in receive completion
+		 * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
+		 * Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
+		 */
 		if (unlikely(wqe_index < 0))
 			wqe_index = wq->max - 1;
-		entry->wr_id = (*cur_qp)->wrid[wqe_index];
+		entry->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->sq.max];
 	}
 
 	if (wq) {
@@ -593,11 +608,11 @@ static inline int mthca_poll_one(struct 
 			break;
 		case MTHCA_OPCODE_ATOMIC_CS:
 			entry->opcode    = IB_WC_COMP_SWAP;
-			entry->byte_len  = be32_to_cpu(cqe->byte_cnt);
+			entry->byte_len  = MTHCA_ATOMIC_BYTE_LEN;
 			break;
 		case MTHCA_OPCODE_ATOMIC_FA:
 			entry->opcode    = IB_WC_FETCH_ADD;
-			entry->byte_len  = be32_to_cpu(cqe->byte_cnt);
+			entry->byte_len  = MTHCA_ATOMIC_BYTE_LEN;
 			break;
 		case MTHCA_OPCODE_BIND_MW:
 			entry->opcode    = IB_WC_BIND_MW;
@@ -963,7 +978,7 @@ void mthca_free_cq(struct mthca_dev *dev
 	mthca_free_mailbox(dev, mailbox);
 }
 
-int __devinit mthca_init_cq_table(struct mthca_dev *dev)
+int mthca_init_cq_table(struct mthca_dev *dev)
 {
 	int err;
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_dev.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -93,9 +93,8 @@ enum {
 };
 
 enum {
-	MTHCA_EQ_CMD,
-	MTHCA_EQ_ASYNC,
 	MTHCA_EQ_COMP,
+	MTHCA_EQ_ASYNC,
 	MTHCA_NUM_EQ
 };
 
@@ -454,7 +453,7 @@ void mthca_unregister_device(struct mthc
 
 void mthca_start_catas_poll(struct mthca_dev *dev);
 void mthca_stop_catas_poll(struct mthca_dev *dev);
-int mthca_restart_one(struct pci_dev *pdev);
+int __mthca_restart_one(struct pci_dev *pdev);
 int mthca_catas_init(void);
 void mthca_catas_cleanup(void);
 
@@ -464,6 +463,8 @@ void mthca_uar_free(struct mthca_dev *de
 int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
 
+int mthca_write_mtt_size(struct mthca_dev *dev);
+
 struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
 void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
 int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
@@ -503,6 +504,8 @@ void mthca_free_cq(struct mthca_dev *dev
 void mthca_cq_completion(struct mthca_dev *dev, u32 cqn);
 void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
 		    enum ib_event_type event_type);
+void __mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+		      struct mthca_srq *srq);
 void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
 		    struct mthca_srq *srq);
 void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
@@ -513,7 +516,7 @@ int mthca_alloc_srq(struct mthca_dev *de
 		    struct ib_srq_attr *attr, struct mthca_srq *srq);
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask);
+		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mthca_max_srq_sge(struct mthca_dev *dev);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
@@ -528,7 +531,8 @@ void mthca_qp_event(struct mthca_dev *de
 		    enum ib_event_type event_type);
 int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 		   struct ib_qp_init_attr *qp_init_attr);
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		    struct ib_udata *udata);
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			  struct ib_send_wr **bad_wr);
 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_eq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -33,7 +33,6 @@
  * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
@@ -111,11 +110,11 @@ enum {
 				(1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
 				(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
 				(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE)        | \
-				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
+				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))        | \
+				(1ULL << MTHCA_EVENT_TYPE_CMD)
 #define MTHCA_SRQ_EVENT_MASK   ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
 				(1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
 				(1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
-#define MTHCA_CMD_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_CMD)
 
 #define MTHCA_EQ_DB_INC_CI     (1 << 24)
 #define MTHCA_EQ_DB_REQ_NOT    (2 << 24)
@@ -405,7 +404,7 @@ static int mthca_eq_int(struct mthca_dev
 	return eqes_found;
 }
 
-static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
+static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr)
 {
 	struct mthca_dev *dev = dev_ptr;
 	u32 ecr;
@@ -432,8 +431,7 @@ static irqreturn_t mthca_tavor_interrupt
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,
-					 struct pt_regs *regs)
+static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr)
 {
 	struct mthca_eq  *eq  = eq_ptr;
 	struct mthca_dev *dev = eq->dev;
@@ -446,7 +444,7 @@ static irqreturn_t mthca_tavor_msi_x_int
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
+static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr)
 {
 	struct mthca_dev *dev = dev_ptr;
 	int work = 0;
@@ -467,8 +465,7 @@ static irqreturn_t mthca_arbel_interrupt
 	return IRQ_RETVAL(work);
 }
 
-static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr,
-					       struct pt_regs *regs)
+static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr)
 {
 	struct mthca_eq  *eq  = eq_ptr;
 	struct mthca_dev *dev = eq->dev;
@@ -481,10 +478,10 @@ static irqreturn_t mthca_arbel_msi_x_int
 	return IRQ_HANDLED;
 }
 
-static int __devinit mthca_create_eq(struct mthca_dev *dev,
-				     int nent,
-				     u8 intr,
-				     struct mthca_eq *eq)
+static int mthca_create_eq(struct mthca_dev *dev,
+			   int nent,
+			   u8 intr,
+			   struct mthca_eq *eq)
 {
 	int npages;
 	u64 *dma_list = NULL;
@@ -666,9 +663,9 @@ static void mthca_free_irqs(struct mthca
 				 dev->eq_table.eq + i);
 }
 
-static int __devinit mthca_map_reg(struct mthca_dev *dev,
-				   unsigned long offset, unsigned long size,
-				   void __iomem **map)
+static int mthca_map_reg(struct mthca_dev *dev,
+			 unsigned long offset, unsigned long size,
+			 void __iomem **map)
 {
 	unsigned long base = pci_resource_start(dev->pdev, 0);
 
@@ -693,7 +690,7 @@ static void mthca_unmap_reg(struct mthca
 	iounmap(map);
 }
 
-static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
+static int mthca_map_eq_regs(struct mthca_dev *dev)
 {
 	if (mthca_is_memfree(dev)) {
 		/*
@@ -783,7 +780,7 @@ static void mthca_unmap_eq_regs(struct m
 	}
 }
 
-int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
+int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
 {
 	int ret;
 	u8 status;
@@ -827,7 +824,7 @@ void mthca_unmap_eq_icm(struct mthca_dev
 	__free_page(dev->eq_table.icm_page);
 }
 
-int __devinit mthca_init_eq_table(struct mthca_dev *dev)
+int mthca_init_eq_table(struct mthca_dev *dev)
 {
 	int err;
 	u8 status;
@@ -866,23 +863,17 @@ int __devinit mthca_init_eq_table(struct
 	if (err)
 		goto err_out_unmap;
 
-	err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE,
+	err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_ASYNC_EQE +
+			      MTHCA_NUM_SPARE_EQE,
 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
 			      &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
 	if (err)
 		goto err_out_comp;
 
-	err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE,
-			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
-			      &dev->eq_table.eq[MTHCA_EQ_CMD]);
-	if (err)
-		goto err_out_async;
-
 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
 		static const char *eq_name[] = {
 			[MTHCA_EQ_COMP]  = DRV_NAME " (comp)",
 			[MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
-			[MTHCA_EQ_CMD]   = DRV_NAME " (cmd)"
 		};
 
 		for (i = 0; i < MTHCA_NUM_EQ; ++i) {
@@ -892,7 +883,7 @@ int __devinit mthca_init_eq_table(struct
 					  mthca_tavor_msi_x_interrupt,
 					  0, eq_name[i], dev->eq_table.eq + i);
 			if (err)
-				goto err_out_cmd;
+				goto err_out_async;
 			dev->eq_table.eq[i].have_irq = 1;
 		}
 	} else {
@@ -902,7 +893,7 @@ int __devinit mthca_init_eq_table(struct
 				  mthca_tavor_interrupt,
 				  IRQF_SHARED, DRV_NAME, dev);
 		if (err)
-			goto err_out_cmd;
+			goto err_out_async;
 		dev->eq_table.have_irq = 1;
 	}
 
@@ -915,15 +906,6 @@ int __devinit mthca_init_eq_table(struct
 		mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
 
-	err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
-			   0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
-	if (err)
-		mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
-			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
-	if (status)
-		mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
-			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
-
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		if (mthca_is_memfree(dev))
 			arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
@@ -932,11 +914,8 @@ int __devinit mthca_init_eq_table(struct
 
 	return 0;
 
-err_out_cmd:
-	mthca_free_irqs(dev);
-	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
-
 err_out_async:
+	mthca_free_irqs(dev);
 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
 
 err_out_comp:
@@ -959,8 +938,6 @@ void mthca_cleanup_eq_table(struct mthca
 
 	mthca_MAP_EQ(dev, async_mask(dev),
 		     1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
-	mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
-		     1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
 
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		mthca_free_eq(dev, &dev->eq_table.eq[i]);
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_mad.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -119,7 +119,7 @@ static void smp_snoop(struct ib_device *
 
 			mthca_update_rate(to_mdev(ibdev), port_num);
 			update_sm_ah(to_mdev(ibdev), port_num,
-				     be16_to_cpu(pinfo->lid),
+				     be16_to_cpu(pinfo->sm_lid),
 				     pinfo->neighbormtu_mastersmsl & 0xf);
 
 			event.device           = ibdev;
@@ -317,7 +317,7 @@ err:
 	return ret;
 }
 
-void __devexit mthca_free_agents(struct mthca_dev *dev)
+void mthca_free_agents(struct mthca_dev *dev)
 {
 	struct ib_mad_agent *agent;
 	int p, q;
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_main.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_main.c
@@ -80,25 +80,62 @@ static int tune_pci = 0;
 module_param(tune_pci, int, 0444);
 MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
 
-struct mutex mthca_device_mutex;
+DEFINE_MUTEX(mthca_device_mutex);
+
+#define MTHCA_DEFAULT_NUM_QP            (1 << 16)
+#define MTHCA_DEFAULT_RDB_PER_QP        (1 << 2)
+#define MTHCA_DEFAULT_NUM_CQ            (1 << 16)
+#define MTHCA_DEFAULT_NUM_MCG           (1 << 13)
+#define MTHCA_DEFAULT_NUM_MPT           (1 << 17)
+#define MTHCA_DEFAULT_NUM_MTT           (1 << 20)
+#define MTHCA_DEFAULT_NUM_UDAV          (1 << 15)
+#define MTHCA_DEFAULT_NUM_RESERVED_MTTS (1 << 18)
+#define MTHCA_DEFAULT_NUM_UARC_SIZE     (1 << 18)
+
+static struct mthca_profile hca_profile = {
+	.num_qp             = MTHCA_DEFAULT_NUM_QP,
+	.rdb_per_qp         = MTHCA_DEFAULT_RDB_PER_QP,
+	.num_cq             = MTHCA_DEFAULT_NUM_CQ,
+	.num_mcg            = MTHCA_DEFAULT_NUM_MCG,
+	.num_mpt            = MTHCA_DEFAULT_NUM_MPT,
+	.num_mtt            = MTHCA_DEFAULT_NUM_MTT,
+	.num_udav           = MTHCA_DEFAULT_NUM_UDAV,          /* Tavor only */
+	.fmr_reserved_mtts  = MTHCA_DEFAULT_NUM_RESERVED_MTTS, /* Tavor only */
+	.uarc_size          = MTHCA_DEFAULT_NUM_UARC_SIZE,     /* Arbel only */
+};
+
+module_param_named(num_qp, hca_profile.num_qp, int, 0444);
+MODULE_PARM_DESC(num_qp, "maximum number of QPs per HCA");
+
+module_param_named(rdb_per_qp, hca_profile.rdb_per_qp, int, 0444);
+MODULE_PARM_DESC(rdb_per_qp, "number of RDB buffers per QP");
+
+module_param_named(num_cq, hca_profile.num_cq, int, 0444);
+MODULE_PARM_DESC(num_cq, "maximum number of CQs per HCA");
+
+module_param_named(num_mcg, hca_profile.num_mcg, int, 0444);
+MODULE_PARM_DESC(num_mcg, "maximum number of multicast groups per HCA");
+
+module_param_named(num_mpt, hca_profile.num_mpt, int, 0444);
+MODULE_PARM_DESC(num_mpt,
+		"maximum number of memory protection table entries per HCA");
+
+module_param_named(num_mtt, hca_profile.num_mtt, int, 0444);
+MODULE_PARM_DESC(num_mtt,
+		 "maximum number of memory translation table segments per HCA");
+
+module_param_named(num_udav, hca_profile.num_udav, int, 0444);
+MODULE_PARM_DESC(num_udav, "maximum number of UD address vectors per HCA");
+
+module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444);
+MODULE_PARM_DESC(fmr_reserved_mtts,
+		 "number of memory translation table segments reserved for FMR");
 
 static const char mthca_version[] __devinitdata =
 	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
-static struct mthca_profile default_profile = {
-	.num_qp		   = 1 << 16,
-	.rdb_per_qp	   = 4,
-	.num_cq		   = 1 << 16,
-	.num_mcg	   = 1 << 13,
-	.num_mpt	   = 1 << 17,
-	.num_mtt	   = 1 << 20,
-	.num_udav	   = 1 << 15,	/* Tavor only */
-	.fmr_reserved_mtts = 1 << 18,	/* Tavor only */
-	.uarc_size	   = 1 << 18,	/* Arbel only */
-};
-
-static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
+static int mthca_tune_pci(struct mthca_dev *mdev)
 {
 	int cap;
 	u16 val;
@@ -143,7 +180,7 @@ static int __devinit mthca_tune_pci(stru
 	return 0;
 }
 
-static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
+static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
 {
 	int err;
 	u8 status;
@@ -255,7 +292,7 @@ static int __devinit mthca_dev_lim(struc
 	return 0;
 }
 
-static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
+static int mthca_init_tavor(struct mthca_dev *mdev)
 {
 	u8 status;
 	int err;
@@ -303,7 +340,7 @@ static int __devinit mthca_init_tavor(st
 		goto err_disable;
 	}
 
-	profile = default_profile;
+	profile = hca_profile;
 	profile.num_uar   = dev_lim.uar_size / PAGE_SIZE;
 	profile.uarc_size = 0;
 	if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
@@ -333,7 +370,7 @@ err_disable:
 	return err;
 }
 
-static int __devinit mthca_load_fw(struct mthca_dev *mdev)
+static int mthca_load_fw(struct mthca_dev *mdev)
 {
 	u8 status;
 	int err;
@@ -342,7 +379,7 @@ static int __devinit mthca_load_fw(struc
 
 	mdev->fw.arbel.fw_icm =
 		mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages,
-				GFP_HIGHUSER | __GFP_NOWARN);
+				GFP_HIGHUSER | __GFP_NOWARN, 0);
 	if (!mdev->fw.arbel.fw_icm) {
 		mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
 		return -ENOMEM;
@@ -375,14 +412,14 @@ err_unmap_fa:
 	mthca_UNMAP_FA(mdev, &status);
 
 err_free:
-	mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+	mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
 	return err;
 }
 
-static int __devinit mthca_init_icm(struct mthca_dev *mdev,
-				    struct mthca_dev_lim *dev_lim,
-				    struct mthca_init_hca_param *init_hca,
-				    u64 icm_size)
+static int mthca_init_icm(struct mthca_dev *mdev,
+			  struct mthca_dev_lim *dev_lim,
+			  struct mthca_init_hca_param *init_hca,
+			  u64 icm_size)
 {
 	u64 aux_pages;
 	u8 status;
@@ -404,7 +441,7 @@ static int __devinit mthca_init_icm(stru
 		  (unsigned long long) aux_pages << 2);
 
 	mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages,
-						 GFP_HIGHUSER | __GFP_NOWARN);
+						 GFP_HIGHUSER | __GFP_NOWARN, 0);
 	if (!mdev->fw.arbel.aux_icm) {
 		mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n");
 		return -ENOMEM;
@@ -427,10 +464,15 @@ static int __devinit mthca_init_icm(stru
 		goto err_unmap_aux;
 	}
 
+	/* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
+	mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE,
+					   dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE;
+
 	mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
 							 MTHCA_MTT_SEG_SIZE,
 							 mdev->limits.num_mtt_segs,
-							 mdev->limits.reserved_mtts, 1);
+							 mdev->limits.reserved_mtts,
+							 1, 0);
 	if (!mdev->mr_table.mtt_table) {
 		mthca_err(mdev, "Failed to map MTT context memory, aborting.\n");
 		err = -ENOMEM;
@@ -440,7 +482,8 @@ static int __devinit mthca_init_icm(stru
 	mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base,
 							 dev_lim->mpt_entry_sz,
 							 mdev->limits.num_mpts,
-							 mdev->limits.reserved_mrws, 1);
+							 mdev->limits.reserved_mrws,
+							 1, 1);
 	if (!mdev->mr_table.mpt_table) {
 		mthca_err(mdev, "Failed to map MPT context memory, aborting.\n");
 		err = -ENOMEM;
@@ -450,7 +493,8 @@ static int __devinit mthca_init_icm(stru
 	mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base,
 							dev_lim->qpc_entry_sz,
 							mdev->limits.num_qps,
-							mdev->limits.reserved_qps, 0);
+							mdev->limits.reserved_qps,
+						       	0, 0);
 	if (!mdev->qp_table.qp_table) {
 		mthca_err(mdev, "Failed to map QP context memory, aborting.\n");
 		err = -ENOMEM;
@@ -460,7 +504,8 @@ static int __devinit mthca_init_icm(stru
 	mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base,
 							 dev_lim->eqpc_entry_sz,
 							 mdev->limits.num_qps,
-							 mdev->limits.reserved_qps, 0);
+							 mdev->limits.reserved_qps,
+							 0, 0);
 	if (!mdev->qp_table.eqp_table) {
 		mthca_err(mdev, "Failed to map EQP context memory, aborting.\n");
 		err = -ENOMEM;
@@ -470,7 +515,7 @@ static int __devinit mthca_init_icm(stru
 	mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base,
 							 MTHCA_RDB_ENTRY_SIZE,
 							 mdev->limits.num_qps <<
-							 mdev->qp_table.rdb_shift,
+							 mdev->qp_table.rdb_shift, 0,
 							 0, 0);
 	if (!mdev->qp_table.rdb_table) {
 		mthca_err(mdev, "Failed to map RDB context memory, aborting\n");
@@ -481,7 +526,8 @@ static int __devinit mthca_init_icm(stru
        mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
 						    dev_lim->cqc_entry_sz,
 						    mdev->limits.num_cqs,
-						    mdev->limits.reserved_cqs, 0);
+						    mdev->limits.reserved_cqs,
+						    0, 0);
 	if (!mdev->cq_table.table) {
 		mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
 		err = -ENOMEM;
@@ -493,7 +539,8 @@ static int __devinit mthca_init_icm(stru
 			mthca_alloc_icm_table(mdev, init_hca->srqc_base,
 					      dev_lim->srq_entry_sz,
 					      mdev->limits.num_srqs,
-					      mdev->limits.reserved_srqs, 0);
+					      mdev->limits.reserved_srqs,
+					      0, 0);
 		if (!mdev->srq_table.table) {
 			mthca_err(mdev, "Failed to map SRQ context memory, "
 				  "aborting.\n");
@@ -513,7 +560,7 @@ static int __devinit mthca_init_icm(stru
 						      mdev->limits.num_amgms,
 						      mdev->limits.num_mgms +
 						      mdev->limits.num_amgms,
-						      0);
+						      0, 0);
 	if (!mdev->mcg_table.table) {
 		mthca_err(mdev, "Failed to map MCG context memory, aborting.\n");
 		err = -ENOMEM;
@@ -551,7 +598,7 @@ err_unmap_aux:
 	mthca_UNMAP_ICM_AUX(mdev, &status);
 
 err_free_aux:
-	mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+	mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
 
 	return err;
 }
@@ -572,10 +619,10 @@ static void mthca_free_icms(struct mthca
 	mthca_unmap_eq_icm(mdev);
 
 	mthca_UNMAP_ICM_AUX(mdev, &status);
-	mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+	mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
 }
 
-static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
+static int mthca_init_arbel(struct mthca_dev *mdev)
 {
 	struct mthca_dev_lim        dev_lim;
 	struct mthca_profile        profile;
@@ -621,7 +668,7 @@ static int __devinit mthca_init_arbel(st
 		goto err_stop_fw;
 	}
 
-	profile = default_profile;
+	profile = hca_profile;
 	profile.num_uar  = dev_lim.uar_size / PAGE_SIZE;
 	profile.num_udav = 0;
 	if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
@@ -656,7 +703,7 @@ err_free_icm:
 
 err_stop_fw:
 	mthca_UNMAP_FA(mdev, &status);
-	mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+	mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
 
 err_disable:
 	if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
@@ -675,7 +722,7 @@ static void mthca_close_hca(struct mthca
 		mthca_free_icms(mdev);
 
 		mthca_UNMAP_FA(mdev, &status);
-		mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+		mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
 
 		if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
 			mthca_DISABLE_LAM(mdev, &status);
@@ -683,7 +730,7 @@ static void mthca_close_hca(struct mthca
 		mthca_SYS_DIS(mdev, &status);
 }
 
-static int __devinit mthca_init_hca(struct mthca_dev *mdev)
+static int mthca_init_hca(struct mthca_dev *mdev)
 {
 	u8 status;
 	int err;
@@ -720,7 +767,7 @@ err_close:
 	return err;
 }
 
-static int __devinit mthca_setup_hca(struct mthca_dev *dev)
+static int mthca_setup_hca(struct mthca_dev *dev)
 {
 	int err;
 	u8 status;
@@ -788,7 +835,7 @@ static int __devinit mthca_setup_hca(str
 	if (err || status) {
 		mthca_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting.\n",
 			  dev->mthca_flags & MTHCA_FLAG_MSI_X ?
-			  dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector :
+			  dev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector :
 			  dev->pdev->irq);
 		if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))
 			mthca_err(dev, "Try again with MSI/MSI-X disabled.\n");
@@ -875,8 +922,7 @@ err_uar_table_free:
 	return err;
 }
 
-static int __devinit mthca_request_regions(struct pci_dev *pdev,
-					   int ddr_hidden)
+static int mthca_request_regions(struct pci_dev *pdev, int ddr_hidden)
 {
 	int err;
 
@@ -928,14 +974,13 @@ static void mthca_release_regions(struct
 			   MTHCA_HCR_SIZE);
 }
 
-static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
+static int mthca_enable_msi_x(struct mthca_dev *mdev)
 {
-	struct msix_entry entries[3];
+	struct msix_entry entries[2];
 	int err;
 
 	entries[0].entry = 0;
 	entries[1].entry = 1;
-	entries[2].entry = 2;
 
 	err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
 	if (err) {
@@ -947,7 +992,6 @@ static int __devinit mthca_enable_msi_x(
 
 	mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
 	mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
-	mdev->eq_table.eq[MTHCA_EQ_CMD  ].msi_x_vector = entries[2].vector;
 
 	return 0;
 }
@@ -1201,13 +1245,25 @@ static void __mthca_remove_one(struct pc
 	}
 }
 
+int __mthca_restart_one(struct pci_dev *pdev)
+{
+	struct mthca_dev *mdev;
+
+	mdev = pci_get_drvdata(pdev);
+	if (!mdev)
+		return -ENODEV;
+	__mthca_remove_one(pdev);
+	return __mthca_init_one(pdev, mdev->hca_type);
+}
+
 static int __devinit mthca_init_one(struct pci_dev *pdev,
-			     const struct pci_device_id *id)
+				    const struct pci_device_id *id)
 {
 	static int mthca_version_printed = 0;
-	int rc;
+	int ret;
 
 	mutex_lock(&mthca_device_mutex);
+
 	if (!mthca_version_printed) {
 		printk(KERN_INFO "%s", mthca_version);
 		++mthca_version_printed;
@@ -1220,9 +1276,11 @@ static int __devinit mthca_init_one(stru
 		return -ENODEV;
 	}
 
-	rc = __mthca_init_one(pdev, id->driver_data);
+	ret = __mthca_init_one(pdev, id->driver_data);
+
 	mutex_unlock(&mthca_device_mutex);
-	return rc;
+
+	return ret;
 }
 
 static void __devexit mthca_remove_one(struct pci_dev *pdev)
@@ -1230,18 +1288,6 @@ static void __devexit mthca_remove_one(s
 	mutex_lock(&mthca_device_mutex);
 	__mthca_remove_one(pdev);
 	mutex_unlock(&mthca_device_mutex);
-	return;
-}
-
-int mthca_restart_one(struct pci_dev *pdev)
-{
-	struct mthca_dev *mdev;
-
-	mdev = pci_get_drvdata(pdev);
-	if (!mdev)
-		return -ENODEV;
-	__mthca_remove_one(pdev);
-	return __mthca_init_one(pdev, mdev->hca_type);
 }
 
 static struct pci_device_id mthca_pci_table[] = {
@@ -1277,19 +1323,65 @@ static struct pci_driver mthca_driver = 
 	.remove		= __devexit_p(mthca_remove_one)
 };
 
+static void __init __mthca_check_profile_val(const char *name, int *pval,
+					     int pval_default)
+{
+	/* value must be positive and power of 2 */
+	int old_pval = *pval;
+
+	if (old_pval <= 0)
+		*pval = pval_default;
+	else
+		*pval = roundup_pow_of_two(old_pval);
+
+	if (old_pval != *pval) {
+		printk(KERN_WARNING PFX "Invalid value %d for %s in module parameter.\n",
+		       old_pval, name);
+		printk(KERN_WARNING PFX "Corrected %s to %d.\n", name, *pval);
+	}
+}
+
+#define mthca_check_profile_val(name, default)				\
+	__mthca_check_profile_val(#name, &hca_profile.name, default)
+
+static void __init mthca_validate_profile(void)
+{
+	mthca_check_profile_val(num_qp,            MTHCA_DEFAULT_NUM_QP);
+	mthca_check_profile_val(rdb_per_qp,        MTHCA_DEFAULT_RDB_PER_QP);
+	mthca_check_profile_val(num_cq,            MTHCA_DEFAULT_NUM_CQ);
+	mthca_check_profile_val(num_mcg, 	   MTHCA_DEFAULT_NUM_MCG);
+	mthca_check_profile_val(num_mpt, 	   MTHCA_DEFAULT_NUM_MPT);
+	mthca_check_profile_val(num_mtt, 	   MTHCA_DEFAULT_NUM_MTT);
+	mthca_check_profile_val(num_udav,          MTHCA_DEFAULT_NUM_UDAV);
+	mthca_check_profile_val(fmr_reserved_mtts, MTHCA_DEFAULT_NUM_RESERVED_MTTS);
+
+	if (hca_profile.fmr_reserved_mtts >= hca_profile.num_mtt) {
+		printk(KERN_WARNING PFX "Invalid fmr_reserved_mtts module parameter %d.\n",
+		       hca_profile.fmr_reserved_mtts);
+		printk(KERN_WARNING PFX "(Must be smaller than num_mtt %d)\n",
+		       hca_profile.num_mtt);
+		hca_profile.fmr_reserved_mtts = hca_profile.num_mtt / 2;
+		printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n",
+		       hca_profile.fmr_reserved_mtts);
+	}
+}
+
 static int __init mthca_init(void)
 {
 	int ret;
 
-	mutex_init(&mthca_device_mutex);
-	if (mthca_catas_init())
-		return -ENOMEM;
+	mthca_validate_profile();
+
+	ret = mthca_catas_init();
+	if (ret)
+		return ret;
 
 	ret = pci_register_driver(&mthca_driver);
 	if (ret < 0) {
 		mthca_catas_cleanup();
 		return ret;
 	}
+
 	return 0;
 }
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -32,7 +32,6 @@
  * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <linux/init.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 
@@ -371,7 +370,7 @@ int mthca_multicast_detach(struct ib_qp 
 	return err;
 }
 
-int __devinit mthca_init_mcg_table(struct mthca_dev *dev)
+int mthca_init_mcg_table(struct mthca_dev *dev)
 {
 	int err;
 	int table_size = dev->limits.num_mgms + dev->limits.num_amgms;
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -35,6 +35,8 @@
  */
 
 #include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <asm/page.h>
 
 #include "mthca_memfree.h"
 #include "mthca_dev.h"
@@ -58,22 +60,31 @@ struct mthca_user_db_table {
 	}                page[0];
 };
 
-void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
 {
 	struct mthca_icm_chunk *chunk, *tmp;
+	void *buf;
 	int i;
 
 	if (!icm)
 		return;
 
 	list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
-		if (chunk->nsg > 0)
-			pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
-				     PCI_DMA_BIDIRECTIONAL);
-
-		for (i = 0; i < chunk->npages; ++i)
-			__free_pages(chunk->mem[i].page,
-				     get_order(chunk->mem[i].length));
+		if (coherent)
+			for (i = 0; i < chunk->npages; ++i) {
+				buf = lowmem_page_address(chunk->mem[i].page);
+				dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
+						  buf, sg_dma_address(&chunk->mem[i]));
+			}
+		else {
+			if (chunk->nsg > 0)
+				pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
+					     PCI_DMA_BIDIRECTIONAL);
+
+			for (i = 0; i < chunk->npages; ++i)
+				__free_pages(chunk->mem[i].page,
+					     get_order(chunk->mem[i].length));
+		}
 
 		kfree(chunk);
 	}
@@ -81,12 +92,41 @@ void mthca_free_icm(struct mthca_dev *de
 	kfree(icm);
 }
 
+static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
+{
+	mem->page = alloc_pages(gfp_mask, order);
+	if (!mem->page)
+		return -ENOMEM;
+
+	mem->length = PAGE_SIZE << order;
+	mem->offset = 0;
+	return 0;
+}
+
+static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
+				    int order, gfp_t gfp_mask)
+{
+	void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem),
+				       gfp_mask);
+	if (!buf)
+		return -ENOMEM;
+
+	sg_set_buf(mem, buf, PAGE_SIZE << order);
+	BUG_ON(mem->offset);
+	sg_dma_len(mem) = PAGE_SIZE << order;
+	return 0;
+}
+
 struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
-				  gfp_t gfp_mask)
+				  gfp_t gfp_mask, int coherent)
 {
 	struct mthca_icm *icm;
 	struct mthca_icm_chunk *chunk = NULL;
 	int cur_order;
+	int ret;
+
+	/* We use sg_set_buf for coherent allocs, which assumes low memory */
+	BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
 
 	icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
 	if (!icm)
@@ -112,21 +152,30 @@ struct mthca_icm *mthca_alloc_icm(struct
 		while (1 << cur_order > npages)
 			--cur_order;
 
-		chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order);
-		if (chunk->mem[chunk->npages].page) {
-			chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order;
-			chunk->mem[chunk->npages].offset = 0;
-
-			if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) {
+		if (coherent)
+			ret = mthca_alloc_icm_coherent(&dev->pdev->dev,
+						       &chunk->mem[chunk->npages],
+						       cur_order, gfp_mask);
+		else
+		       	ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages],
+						    cur_order, gfp_mask);
+
+		if (!ret) {
+			++chunk->npages;
+
+			if (coherent)
+				++chunk->nsg;
+			else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
 				chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
 							chunk->npages,
 							PCI_DMA_BIDIRECTIONAL);
 
 				if (chunk->nsg <= 0)
 					goto fail;
+			}
 
+			if (chunk->npages == MTHCA_ICM_CHUNK_LEN)
 				chunk = NULL;
-			}
 
 			npages -= 1 << cur_order;
 		} else {
@@ -136,7 +185,7 @@ struct mthca_icm *mthca_alloc_icm(struct
 		}
 	}
 
-	if (chunk) {
+	if (!coherent && chunk) {
 		chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
 					chunk->npages,
 					PCI_DMA_BIDIRECTIONAL);
@@ -148,7 +197,7 @@ struct mthca_icm *mthca_alloc_icm(struct
 	return icm;
 
 fail:
-	mthca_free_icm(dev, icm);
+	mthca_free_icm(dev, icm, coherent);
 	return NULL;
 }
 
@@ -167,7 +216,7 @@ int mthca_table_get(struct mthca_dev *de
 
 	table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
 					(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
-					__GFP_NOWARN);
+					__GFP_NOWARN, table->coherent);
 	if (!table->icm[i]) {
 		ret = -ENOMEM;
 		goto out;
@@ -175,7 +224,7 @@ int mthca_table_get(struct mthca_dev *de
 
 	if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
 			  &status) || status) {
-		mthca_free_icm(dev, table->icm[i]);
+		mthca_free_icm(dev, table->icm[i], table->coherent);
 		table->icm[i] = NULL;
 		ret = -ENOMEM;
 		goto out;
@@ -204,16 +253,16 @@ void mthca_table_put(struct mthca_dev *d
 		mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
 				MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
 				&status);
-		mthca_free_icm(dev, table->icm[i]);
+		mthca_free_icm(dev, table->icm[i], table->coherent);
 		table->icm[i] = NULL;
 	}
 
 	mutex_unlock(&table->mutex);
 }
 
-void *mthca_table_find(struct mthca_icm_table *table, int obj)
+void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle)
 {
-	int idx, offset, i;
+	int idx, offset, dma_offset, i;
 	struct mthca_icm_chunk *chunk;
 	struct mthca_icm *icm;
 	struct page *page = NULL;
@@ -225,14 +274,23 @@ void *mthca_table_find(struct mthca_icm_
 
 	idx = (obj & (table->num_obj - 1)) * table->obj_size;
 	icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
-	offset = idx % MTHCA_TABLE_CHUNK_SIZE;
+	dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE;
 
 	if (!icm)
 		goto out;
 
 	list_for_each_entry(chunk, &icm->chunk_list, list) {
 		for (i = 0; i < chunk->npages; ++i) {
-			if (chunk->mem[i].length >= offset) {
+			if (dma_handle && dma_offset >= 0) {
+				if (sg_dma_len(&chunk->mem[i]) > dma_offset)
+					*dma_handle = sg_dma_address(&chunk->mem[i]) +
+					       	dma_offset;
+				dma_offset -= sg_dma_len(&chunk->mem[i]);
+			}
+			/* DMA mapping can merge pages but not split them,
+			 * so if we found the page, dma_handle has already
+			 * been assigned to. */
+			if (chunk->mem[i].length > offset) {
 				page = chunk->mem[i].page;
 				goto out;
 			}
@@ -283,7 +341,7 @@ void mthca_table_put_range(struct mthca_
 struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
 					      u64 virt, int obj_size,
 					      int nobj, int reserved,
-					      int use_lowmem)
+					      int use_lowmem, int use_coherent)
 {
 	struct mthca_icm_table *table;
 	int num_icm;
@@ -302,6 +360,7 @@ struct mthca_icm_table *mthca_alloc_icm_
 	table->num_obj  = nobj;
 	table->obj_size = obj_size;
 	table->lowmem   = use_lowmem;
+	table->coherent = use_coherent;
 	mutex_init(&table->mutex);
 
 	for (i = 0; i < num_icm; ++i)
@@ -314,12 +373,12 @@ struct mthca_icm_table *mthca_alloc_icm_
 
 		table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
 						(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
-						__GFP_NOWARN);
+						__GFP_NOWARN, use_coherent);
 		if (!table->icm[i])
 			goto err;
 		if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
 				  &status) || status) {
-			mthca_free_icm(dev, table->icm[i]);
+			mthca_free_icm(dev, table->icm[i], table->coherent);
 			table->icm[i] = NULL;
 			goto err;
 		}
@@ -339,7 +398,7 @@ err:
 			mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
 					MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
 				        &status);
-			mthca_free_icm(dev, table->icm[i]);
+			mthca_free_icm(dev, table->icm[i], table->coherent);
 		}
 
 	kfree(table);
@@ -357,7 +416,7 @@ void mthca_free_icm_table(struct mthca_d
 			mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
 					MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
 					&status);
-			mthca_free_icm(dev, table->icm[i]);
+			mthca_free_icm(dev, table->icm[i], table->coherent);
 		}
 
 	kfree(table);
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -69,6 +69,7 @@ struct mthca_icm_table {
 	int               num_obj;
 	int               obj_size;
 	int               lowmem;
+	int               coherent;
 	struct mutex      mutex;
 	struct mthca_icm *icm[0];
 };
@@ -82,17 +83,17 @@ struct mthca_icm_iter {
 struct mthca_dev;
 
 struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
-				  gfp_t gfp_mask);
-void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
+				  gfp_t gfp_mask, int coherent);
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent);
 
 struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
 					      u64 virt, int obj_size,
 					      int nobj, int reserved,
-					      int use_lowmem);
+					      int use_lowmem, int use_coherent);
 void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
 int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
 void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
-void *mthca_table_find(struct mthca_icm_table *table, int obj);
+void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle);
 int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
 			  int start, int end);
 void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_mr.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -34,7 +34,6 @@
  */
 
 #include <linux/slab.h>
-#include <linux/init.h>
 #include <linux/errno.h>
 
 #include "mthca_dev.h"
@@ -135,7 +134,7 @@ static void mthca_buddy_free(struct mthc
 	spin_unlock(&buddy->lock);
 }
 
-static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
+static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
 {
 	int i, s;
 
@@ -244,8 +243,8 @@ void mthca_free_mtt(struct mthca_dev *de
 	kfree(mtt);
 }
 
-int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
-		    int start_index, u64 *buffer_list, int list_len)
+static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+			     int start_index, u64 *buffer_list, int list_len)
 {
 	struct mthca_mailbox *mailbox;
 	__be64 *mtt_entry;
@@ -296,6 +295,88 @@ out:
 	return err;
 }
 
+void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
+			      int start_index, u64 *buffer_list, int list_len)
+{
+	u64 __iomem *mtts;
+	u32 mtt_seg;
+	int i;
+
+	mtt_seg = mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+       	mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg + start_index * sizeof (u64);
+	for (i = 0; i < list_len; ++i) {
+		__be64 mtt_entry = cpu_to_be64(buffer_list[i] |
+					       MTHCA_MTT_FLAG_PRESENT);
+		mthca_write64_raw(mtt_entry, mtts + i);
+	}
+}
+
+void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
+			      int start_index, u64 *buffer_list, int list_len)
+{
+	__be64 *mtts;
+	dma_addr_t dma_handle;
+	int i;
+	int s = start_index * sizeof (u64);
+
+	/* For Arbel, all MTTs must fit in the same page. */
+	BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
+	/* Require full segments */
+	BUG_ON(s % MTHCA_MTT_SEG_SIZE);
+
+	mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
+				s / MTHCA_MTT_SEG_SIZE, &dma_handle);
+
+	BUG_ON(!mtts);
+
+	for (i = 0; i < list_len; ++i)
+		mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
+
+	dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof(u64), DMA_TO_DEVICE);
+}
+
+int mthca_write_mtt_size(struct mthca_dev *dev)
+{
+	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+		/*
+		 * Be friendly to WRITE_MTT command
+		 * and leave two empty slots for the
+		 * index and reserved fields of the
+		 * mailbox.
+		 */
+		return PAGE_SIZE / sizeof (u64) - 2;
+
+	/* For Arbel, all MTTs must fit in the same page. */
+	return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
+}
+
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+		    int start_index, u64 *buffer_list, int list_len)
+{
+	int size = mthca_write_mtt_size(dev);
+	int chunk;
+
+	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+	    !(dev->mthca_flags & MTHCA_FLAG_FMR))
+		return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
+
+	while (list_len > 0) {
+		chunk = min(size, list_len);
+		if (mthca_is_memfree(dev))
+			mthca_arbel_write_mtt_seg(dev, mtt, start_index,
+						  buffer_list, chunk);
+		else
+			mthca_tavor_write_mtt_seg(dev, mtt, start_index,
+						  buffer_list, chunk);
+
+		list_len    -= chunk;
+		start_index += chunk;
+		buffer_list += chunk;
+	}
+
+	return 0;
+}
+
 static inline u32 tavor_hw_index_to_key(u32 ind)
 {
 	return ind;
@@ -525,7 +606,7 @@ int mthca_fmr_alloc(struct mthca_dev *de
 		if (err)
 			goto err_out_mpt_free;
 
-		mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
+		mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
 		BUG_ON(!mr->mem.arbel.mpt);
 	} else
 		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
@@ -539,7 +620,8 @@ int mthca_fmr_alloc(struct mthca_dev *de
 
 	if (mthca_is_memfree(dev)) {
 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
-						      mr->mtt->first_seg);
+						      mr->mtt->first_seg,
+						      &mr->mem.arbel.dma_handle);
 		BUG_ON(!mr->mem.arbel.mtts);
 	} else
 		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
@@ -713,6 +795,9 @@ int mthca_arbel_map_phys_fmr(struct ib_f
 		fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
 						     MTHCA_MTT_FLAG_PRESENT);
 
+	dma_sync_single(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
+		       	list_len * sizeof(u64), DMA_TO_DEVICE);
+
 	fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
 	fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
 	fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
@@ -752,6 +837,7 @@ void mthca_arbel_fmr_unmap(struct mthca_
 
 	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
 	key &= dev->limits.num_mpts - 1;
+	key = adjust_key(dev, key);
 	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
 
 	fmr->maps = 0;
@@ -759,10 +845,10 @@ void mthca_arbel_fmr_unmap(struct mthca_
 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
 }
 
-int __devinit mthca_init_mr_table(struct mthca_dev *dev)
+int mthca_init_mr_table(struct mthca_dev *dev)
 {
 	unsigned long addr;
-	int err, i;
+	int mpts, mtts, err, i;
 
 	err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
 			       dev->limits.num_mpts,
@@ -796,13 +882,21 @@ int __devinit mthca_init_mr_table(struct
 			err = -EINVAL;
 			goto err_fmr_mpt;
 		}
+		mpts = mtts = 1 << i;
+	} else {
+		mtts = dev->limits.num_mtt_segs;
+		mpts = dev->limits.num_mpts;
+	}
+
+	if (!mthca_is_memfree(dev) &&
+	    (dev->mthca_flags & MTHCA_FLAG_FMR)) {
 
 		addr = pci_resource_start(dev->pdev, 4) +
 			((pci_resource_len(dev->pdev, 4) - 1) &
 			 dev->mr_table.mpt_base);
 
 		dev->mr_table.tavor_fmr.mpt_base =
-			ioremap(addr, (1 << i) * sizeof(struct mthca_mpt_entry));
+			ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
 
 		if (!dev->mr_table.tavor_fmr.mpt_base) {
 			mthca_warn(dev, "MPT ioremap for FMR failed.\n");
@@ -815,19 +909,21 @@ int __devinit mthca_init_mr_table(struct
 			 dev->mr_table.mtt_base);
 
 		dev->mr_table.tavor_fmr.mtt_base =
-			ioremap(addr, (1 << i) * MTHCA_MTT_SEG_SIZE);
+			ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE);
 		if (!dev->mr_table.tavor_fmr.mtt_base) {
 			mthca_warn(dev, "MTT ioremap for FMR failed.\n");
 			err = -ENOMEM;
 			goto err_fmr_mtt;
 		}
+	}
 
-		err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
+	if (dev->limits.fmr_reserved_mtts) {
+		err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
 		if (err)
 			goto err_fmr_mtt_buddy;
 
 		/* Prevent regular MRs from using FMR keys */
-		err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
+		err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
 		if (err)
 			goto err_reserve_fmr;
 
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_pd.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -34,7 +34,6 @@
  * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 
 #include "mthca_dev.h"
@@ -69,7 +68,7 @@ void mthca_pd_free(struct mthca_dev *dev
 	mthca_free(&dev->pd_table.alloc, pd->pd_num);
 }
 
-int __devinit mthca_init_pd_table(struct mthca_dev *dev)
+int mthca_init_pd_table(struct mthca_dev *dev)
 {
 	return mthca_alloc_init(&dev->pd_table.alloc,
 				dev->limits.num_pds,
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_profile.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -277,7 +277,7 @@ u64 mthca_make_profile(struct mthca_dev 
 	 * out of the MR pool. They don't use additional memory, but
 	 * we assign them as part of the HCA profile anyway.
 	 */
-	if (mthca_is_memfree(dev))
+	if (mthca_is_memfree(dev) || BITS_PER_LONG == 64)
 		dev->limits.fmr_reserved_mtts = 0;
 	else
 		dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_provider.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -124,7 +124,7 @@ static int mthca_query_device(struct ib_
 		props->max_map_per_fmr = 255;
 	else
 		props->max_map_per_fmr =
-			(1 << (32 - long_log2(mdev->limits.num_mpts))) - 1;
+			(1 << (32 - ilog2(mdev->limits.num_mpts))) - 1;
 
 	err = 0;
  out:
@@ -816,7 +816,7 @@ static int mthca_resize_cq(struct ib_cq 
 		lkey = ucmd.lkey;
 	}
 
-	ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status);
+	ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries), &status);
 	if (status)
 		ret = -EINVAL;
 
@@ -1015,6 +1015,7 @@ static struct ib_mr *mthca_reg_user_mr(s
 	int shift, n, len;
 	int i, j, k;
 	int err = 0;
+	int write_mtt_size;
 
 	shift = ffs(region->page_size) - 1;
 
@@ -1040,6 +1041,8 @@ static struct ib_mr *mthca_reg_user_mr(s
 
 	i = n = 0;
 
+	write_mtt_size = min(mthca_write_mtt_size(dev), (int)(PAGE_SIZE / sizeof *pages));
+
 	list_for_each_entry(chunk, &region->chunk_list, list)
 		for (j = 0; j < chunk->nmap; ++j) {
 			len = sg_dma_len(&chunk->page_list[j]) >> shift;
@@ -1047,14 +1050,11 @@ static struct ib_mr *mthca_reg_user_mr(s
 				pages[i++] = sg_dma_address(&chunk->page_list[j]) +
 					region->page_size * k;
 				/*
-				 * Be friendly to WRITE_MTT command
-				 * and leave two empty slots for the
-				 * index and reserved fields of the
-				 * mailbox.
+				 * Be friendly to write_mtt and pass it chunks
+				 * of appropriate size.
 				 */
-				if (i == PAGE_SIZE / sizeof (u64) - 2) {
-					err = mthca_write_mtt(dev, mr->mtt,
-							      n, pages, i);
+				if (i == write_mtt_size) {
+					err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
 					if (err)
 						goto mtt_done;
 					n += i;
@@ -1290,7 +1290,7 @@ int mthca_register_device(struct mthca_d
 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-	dev->ib_dev.node_type            = IB_NODE_CA;
+	dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;
 	dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_provider.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -89,6 +89,7 @@ struct mthca_fmr {
 		struct {
 			struct mthca_mpt_entry *mpt;
 			__be64 *mtts;
+			dma_addr_t dma_handle;
 		} arbel;
 	} mem;
 };
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_qp.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -35,9 +35,11 @@
  * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $
  */
 
-#include <linux/init.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/hardirq.h>
+
+#include <asm/io.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
@@ -48,6 +50,10 @@
 #include "mthca_memfree.h"
 #include "mthca_wqe.h"
 
+static int mthca_qos_support = 0;
+module_param_named(qos_support, mthca_qos_support, int, 0644);
+MODULE_PARM_DESC(qos_support, "Enable QoS support if > 0");
+
 enum {
 	MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
 	MTHCA_ACK_REQ_FREQ       = 10,
@@ -294,7 +300,7 @@ static int to_mthca_st(int transport)
 	}
 }
 
-static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
+static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
 			int attr_mask)
 {
 	if (attr_mask & IB_QP_PKEY_INDEX)
@@ -326,7 +332,7 @@ static void init_port(struct mthca_dev *
 		mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
 }
 
-static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
+static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
 				  int attr_mask)
 {
 	u8 dest_rd_atomic;
@@ -428,13 +434,18 @@ int mthca_query_qp(struct ib_qp *ibqp, s
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
-	int err;
-	struct mthca_mailbox *mailbox;
+	int err = 0;
+	struct mthca_mailbox *mailbox = NULL;
 	struct mthca_qp_param *qp_param;
 	struct mthca_qp_context *context;
 	int mthca_state;
 	u8 status;
 
+	if (qp->state == IB_QPS_RESET) {
+		qp_attr->qp_state = IB_QPS_RESET;
+		goto done;
+	}
+
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
@@ -453,7 +464,6 @@ int mthca_query_qp(struct ib_qp *ibqp, s
 	mthca_state = be32_to_cpu(context->flags) >> 28;
 
 	qp_attr->qp_state 	     = to_ib_qp_state(mthca_state);
-	qp_attr->cur_qp_state 	     = qp_attr->qp_state;
 	qp_attr->path_mtu 	     = context->mtu_msgmax >> 5;
 	qp_attr->path_mig_state      =
 		to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -463,11 +473,6 @@ int mthca_query_qp(struct ib_qp *ibqp, s
 	qp_attr->dest_qp_num 	     = be32_to_cpu(context->remote_qpn) & 0xffffff;
 	qp_attr->qp_access_flags     =
 		to_ib_qp_access_flags(be32_to_cpu(context->params2));
-	qp_attr->cap.max_send_wr     = qp->sq.max;
-	qp_attr->cap.max_recv_wr     = qp->rq.max;
-	qp_attr->cap.max_send_sge    = qp->sq.max_gs;
-	qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
-	qp_attr->cap.max_inline_data = qp->max_inline_data;
 
 	if (qp->transport == RC || qp->transport == UC) {
 		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
@@ -494,14 +499,23 @@ int mthca_query_qp(struct ib_qp *ibqp, s
 	qp_attr->retry_cnt 	    = (be32_to_cpu(context->params1) >> 16) & 0x7;
 	qp_attr->rnr_retry 	    = context->pri_path.rnr_retry >> 5;
 	qp_attr->alt_timeout 	    = context->alt_path.ackto >> 3;
-	qp_init_attr->cap 	    = qp_attr->cap;
+
+done:
+	qp_attr->cur_qp_state	     = qp_attr->qp_state;
+	qp_attr->cap.max_send_wr     = qp->sq.max;
+	qp_attr->cap.max_recv_wr     = qp->rq.max;
+	qp_attr->cap.max_send_sge    = qp->sq.max_gs;
+	qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
+	qp_attr->cap.max_inline_data = qp->max_inline_data;
+
+	qp_init_attr->cap	     = qp_attr->cap;
 
 out:
 	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
-static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
+static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
 			  struct mthca_qp_path *path, u8 port)
 {
 	path->g_mylmc     = ah->src_path_bits & 0x7f;
@@ -529,11 +543,12 @@ static int mthca_path_set(struct mthca_d
 	return 0;
 }
 
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+static int __mthca_modify_qp(struct ib_qp *ibqp,
+			     const struct ib_qp_attr *attr, int attr_mask,
+			     enum ib_qp_state cur_state, enum ib_qp_state new_state)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
-	enum ib_qp_state cur_state, new_state;
 	struct mthca_mailbox *mailbox;
 	struct mthca_qp_param *qp_param;
 	struct mthca_qp_context *qp_context;
@@ -541,55 +556,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 	u8 status;
 	int err = -EINVAL;
 
-	mutex_lock(&qp->mutex);
-
-	if (attr_mask & IB_QP_CUR_STATE) {
-		cur_state = attr->cur_qp_state;
-	} else {
-		spin_lock_irq(&qp->sq.lock);
-		spin_lock(&qp->rq.lock);
-		cur_state = qp->state;
-		spin_unlock(&qp->rq.lock);
-		spin_unlock_irq(&qp->sq.lock);
-	}
-
-	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
-		mthca_dbg(dev, "Bad QP transition (transport %d) "
-			  "%d->%d with attr 0x%08x\n",
-			  qp->transport, cur_state, new_state,
-			  attr_mask);
-		goto out;
-	}
-
-	if ((attr_mask & IB_QP_PKEY_INDEX) &&
-	     attr->pkey_index >= dev->limits.pkey_table_len) {
-		mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
-			  attr->pkey_index, dev->limits.pkey_table_len-1);
-		goto out;
-	}
-
-	if ((attr_mask & IB_QP_PORT) &&
-	    (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
-		mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
-		goto out;
-	}
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
-	    attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
-		mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
-			  attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
-		goto out;
-	}
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
-	    attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
-		mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
-			  attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
-		goto out;
-	}
-
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox)) {
 		err = PTR_ERR(mailbox);
@@ -634,11 +600,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 
 	if (mthca_is_memfree(dev)) {
 		if (qp->rq.max)
-			qp_context->rq_size_stride = long_log2(qp->rq.max) << 3;
+			qp_context->rq_size_stride = ilog2(qp->rq.max) << 3;
 		qp_context->rq_size_stride |= qp->rq.wqe_shift - 4;
 
 		if (qp->sq.max)
-			qp_context->sq_size_stride = long_log2(qp->sq.max) << 3;
+			qp_context->sq_size_stride = ilog2(qp->sq.max) << 3;
 		qp_context->sq_size_stride |= qp->sq.wqe_shift - 4;
 	}
 
@@ -684,6 +650,32 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 			goto out_mailbox;
 
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
+		if (mthca_qos_support) {
+			u8 sl = attr->ah_attr.sl;
+			u8 sched_queue = (sl & 0x8) | (sl & (~(sl >> 1)) & 0x4) |
+				((sl >> 1) & (sl >> 2) & 0x2) | ((sl >> 1) & 0x1);
+
+			if (mthca_is_memfree(dev)) {
+				qp_context->rlkey_arbel_sched_queue |= sched_queue;
+			} else {
+				qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue);
+			}
+			qp_param->opt_param_mask |=
+				cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE);
+		}
+	}
+
+	if (ibqp->qp_type == IB_QPT_RC &&
+	    cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+		u8 sched_queue = ibqp->uobject ? 0x2 : 0x1;
+
+		if (mthca_is_memfree(dev))
+			qp_context->rlkey_arbel_sched_queue |= sched_queue;
+		else
+			qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue);
+
+		qp_param->opt_param_mask |=
+			cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE);
 	}
 
 	if (attr_mask & IB_QP_TIMEOUT) {
@@ -844,11 +836,15 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 	 * entries and reinitialize the QP.
 	 */
 	if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
-		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
+		mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
 			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
 		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
-				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+			mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
+
+		if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
+			synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector);
+		else
+			synchronize_irq(dev->pdev->irq);
 
 		mthca_wq_reset(&qp->sq);
 		qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
@@ -864,7 +860,98 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 
 out_mailbox:
 	mthca_free_mailbox(dev, mailbox);
+out:
+	return err;
+}
 
+static const struct ib_qp_attr mthca_qp_attr = { .port_num = 1};
+static const int mthca_qp_attr_mask_table[IB_QPT_UD + 1] = {
+		[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
+				IB_QP_PORT			|
+				IB_QP_QKEY),
+		[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
+				IB_QP_PORT			|
+				IB_QP_ACCESS_FLAGS),
+		[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
+				IB_QP_PORT			|
+				IB_QP_ACCESS_FLAGS),
+		[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
+				IB_QP_QKEY),
+		[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
+				IB_QP_QKEY),
+};
+
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		    struct ib_udata *udata)
+{
+	struct mthca_dev *dev = to_mdev(ibqp->device);
+	struct mthca_qp *qp = to_mqp(ibqp);
+	enum ib_qp_state cur_state, new_state;
+	int err = -EINVAL;
+
+	mutex_lock(&qp->mutex);
+	if (attr_mask & IB_QP_CUR_STATE) {
+		cur_state = attr->cur_qp_state;
+	} else {
+		spin_lock_irq(&qp->sq.lock);
+		spin_lock(&qp->rq.lock);
+		cur_state = qp->state;
+		spin_unlock(&qp->rq.lock);
+		spin_unlock_irq(&qp->sq.lock);
+	}
+
+	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+		mthca_dbg(dev, "Bad QP transition (transport %d) "
+			  "%d->%d with attr 0x%08x\n",
+			  qp->transport, cur_state, new_state,
+			  attr_mask);
+		goto out;
+	}
+
+	if ((attr_mask & IB_QP_PKEY_INDEX) &&
+	     attr->pkey_index >= dev->limits.pkey_table_len) {
+		mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
+			  attr->pkey_index, dev->limits.pkey_table_len-1);
+		goto out;
+	}
+
+	if ((attr_mask & IB_QP_PORT) &&
+	    (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
+		mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
+		goto out;
+	}
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+	    attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
+		mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
+			  attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
+		goto out;
+	}
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+	    attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
+		mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
+			  attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
+		goto out;
+	}
+
+	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+		err = 0;
+		goto out;
+	}
+
+	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
+		err = __mthca_modify_qp(ibqp, &mthca_qp_attr,
+					mthca_qp_attr_mask_table[ibqp->qp_type],
+					IB_QPS_RESET, IB_QPS_INIT);
+		if (err)
+			goto out;
+		cur_state = IB_QPS_INIT;
+	}
+
+	err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
 out:
 	mutex_unlock(&qp->mutex);
 	return err;
@@ -1376,6 +1463,10 @@ void mthca_free_qp(struct mthca_dev *dev
 	struct mthca_cq *send_cq;
 	struct mthca_cq *recv_cq;
 
+	if (qp->state != IB_QPS_RESET)
+		mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
+				NULL, 0, &status);
+
 	send_cq = to_mcq(qp->ibqp.send_cq);
 	recv_cq = to_mcq(qp->ibqp.recv_cq);
 
@@ -1389,15 +1480,25 @@ void mthca_free_qp(struct mthca_dev *dev
 	mthca_array_clear(&dev->qp_table.qp,
 			  qp->qpn & (dev->limits.num_qps - 1));
 	--qp->refcount;
+
+	if (!qp->ibqp.uobject) {
+		__mthca_cq_clean(dev, send_cq, qp->qpn,
+			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+		if (send_cq != recv_cq)
+			__mthca_cq_clean(dev, recv_cq, qp->qpn,
+					 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+	}
+
 	spin_unlock(&dev->qp_table.lock);
 
 	mthca_unlock_cqs(send_cq, recv_cq);
 
-	wait_event(qp->wait, !get_qp_refcount(dev, qp));
+	if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
+		synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector);
+	else
+		synchronize_irq(dev->pdev->irq);
 
-	if (qp->state != IB_QPS_RESET)
-		mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
-				NULL, 0, &status);
+	wait_event(qp->wait, !get_qp_refcount(dev, qp));
 
 	/*
 	 * If this is a userspace QP, the buffers, MR, CQs and so on
@@ -1405,12 +1506,6 @@ void mthca_free_qp(struct mthca_dev *dev
 	 * unref the mem-free tables and free the QPN in our table.
 	 */
 	if (!qp->ibqp.uobject) {
-		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
-			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
-		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
-				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
-
 		mthca_free_memfree(dev, qp);
 		mthca_free_wqe_buf(dev, qp);
 	}
@@ -1688,7 +1783,7 @@ int mthca_tavor_post_send(struct ib_qp *
 			size += sizeof (struct mthca_data_seg) / 16;
 		}
 
-		qp->wrid[ind + qp->rq.max] = wr->wr_id;
+		qp->wrid[ind] = wr->wr_id;
 
 		if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
 			mthca_err(dev, "opcode invalid\n");
@@ -1732,6 +1827,11 @@ out:
 		mthca_write64(doorbell,
 			      dev->kar + MTHCA_SEND_DOORBELL,
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+		/*
+		 * Make sure doorbells don't leak out of SQ spinlock
+		 * and reach the HCA out of order:
+		 */
+		mmiowb();
 	}
 
 	qp->sq.next_ind = ind;
@@ -1803,7 +1903,7 @@ int mthca_tavor_post_receive(struct ib_q
 			size += sizeof (struct mthca_data_seg) / 16;
 		}
 
-		qp->wrid[ind] = wr->wr_id;
+		qp->wrid[ind + qp->sq.max] = wr->wr_id;
 
 		((struct mthca_next_seg *) prev_wqe)->nda_op =
 			cpu_to_be32((ind << qp->rq.wqe_shift) | 1);
@@ -1851,6 +1951,12 @@ out:
 	qp->rq.next_ind = ind;
 	qp->rq.head    += nreq;
 
+	/*
+	 * Make sure doorbells don't leak out of RQ spinlock and reach
+	 * the HCA out of order:
+	 */
+	mmiowb();
+
 	spin_unlock_irqrestore(&qp->rq.lock, flags);
 	return err;
 }
@@ -2055,7 +2161,7 @@ int mthca_arbel_post_send(struct ib_qp *
 			size += sizeof (struct mthca_data_seg) / 16;
 		}
 
-		qp->wrid[ind + qp->rq.max] = wr->wr_id;
+		qp->wrid[ind] = wr->wr_id;
 
 		if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
 			mthca_err(dev, "opcode invalid\n");
@@ -2112,6 +2218,12 @@ out:
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 
+	/*
+	 * Make sure doorbells don't leak out of SQ spinlock and reach
+	 * the HCA out of order:
+	 */
+	mmiowb();
+
 	spin_unlock_irqrestore(&qp->sq.lock, flags);
 	return err;
 }
@@ -2173,7 +2285,7 @@ int mthca_arbel_post_receive(struct ib_q
 			((struct mthca_data_seg *) wqe)->addr = 0;
 		}
 
-		qp->wrid[ind] = wr->wr_id;
+		qp->wrid[ind + qp->sq.max] = wr->wr_id;
 
 		++ind;
 		if (unlikely(ind >= qp->rq.max))
@@ -2204,7 +2316,7 @@ void mthca_free_err_wqe(struct mthca_dev
 	 * For SRQs, all WQEs generate a CQE, so we're always at the
 	 * end of the doorbell chain.
 	 */
-	if (qp->ibqp.srq) {
+	if (qp->ibqp.srq && !is_send) {
 		*new_wqe = 0;
 		return;
 	}
@@ -2222,7 +2334,7 @@ void mthca_free_err_wqe(struct mthca_dev
 		*new_wqe = 0;
 }
 
-int __devinit mthca_init_qp_table(struct mthca_dev *dev)
+int mthca_init_qp_table(struct mthca_dev *dev)
 {
 	int err;
 	u8 status;
--- linux-2.6.18.noarch/drivers/infiniband/hw/mthca/mthca_srq.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -35,6 +35,8 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 
+#include <asm/io.h>
+
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 #include "mthca_memfree.h"
@@ -118,7 +120,7 @@ static void mthca_arbel_init_srq_context
 
 	memset(context, 0, sizeof *context);
 
-	logsize = long_log2(srq->max) + srq->wqe_shift;
+	logsize = ilog2(srq->max);
 	context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn);
 	context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
 	context->db_index = cpu_to_be32(srq->db_index);
@@ -201,6 +203,8 @@ int mthca_alloc_srq(struct mthca_dev *de
 
 	if (mthca_is_memfree(dev))
 		srq->max = roundup_pow_of_two(srq->max + 1);
+	else
+		srq->max = srq->max + 1;
 
 	ds = max(64UL,
 		 roundup_pow_of_two(sizeof (struct mthca_next_seg) +
@@ -209,7 +213,7 @@ int mthca_alloc_srq(struct mthca_dev *de
 	if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz))
 		return -EINVAL;
 
-	srq->wqe_shift = long_log2(ds);
+	srq->wqe_shift = ilog2(ds);
 
 	srq->srqn = mthca_alloc(&dev->srq_table.alloc);
 	if (srq->srqn == -1)
@@ -277,7 +281,7 @@ int mthca_alloc_srq(struct mthca_dev *de
 	srq->first_free = 0;
 	srq->last_free  = srq->max - 1;
 
-	attr->max_wr    = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max;
+	attr->max_wr    = srq->max - 1;
 	attr->max_sge   = srq->max_gs;
 
 	return 0;
@@ -358,7 +362,7 @@ void mthca_free_srq(struct mthca_dev *de
 }
 
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask)
+		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
@@ -413,7 +417,7 @@ int mthca_query_srq(struct ib_srq *ibsrq
 		srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark);
 	}
 
-	srq_attr->max_wr  = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max;
+	srq_attr->max_wr  = srq->max - 1;
 	srq_attr->max_sge = srq->max_gs;
 
 out:
@@ -593,6 +597,12 @@ int mthca_tavor_post_srq_recv(struct ib_
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 
+	/*
+	 * Make sure doorbells don't leak out of SRQ spinlock and
+	 * reach the HCA out of order:
+	 */
+	mmiowb();
+
 	spin_unlock_irqrestore(&srq->lock, flags);
 	return err;
 }
@@ -705,7 +715,7 @@ int mthca_max_srq_sge(struct mthca_dev *
 		     sizeof (struct mthca_data_seg));
 }
 
-int __devinit mthca_init_srq_table(struct mthca_dev *dev)
+int mthca_init_srq_table(struct mthca_dev *dev)
 {
 	int err;
 
--- linux-2.6.18.noarch/drivers/infiniband/Kconfig
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/Kconfig
@@ -14,7 +14,7 @@ config INFINIBAND_USER_MAD
 	---help---
 	  Userspace InfiniBand Management Datagram (MAD) support.  This
 	  is the kernel side of the userspace MAD support, which allows
-	  userspace processes to send and receive MADs. You will also 
+	  userspace processes to send and receive MADs. You will also
 	  need libibumad from <http://www.openib.org>.
 
 config INFINIBAND_USER_ACCESS
@@ -37,6 +37,8 @@ config INFINIBAND_ADDR_TRANS
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
 source "drivers/infiniband/hw/ehca/Kconfig"
+source "drivers/infiniband/hw/amso1100/Kconfig"
+source "drivers/infiniband/hw/cxgb3/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
@@ -46,6 +48,8 @@ source "drivers/infiniband/ulp/iser/Kcon
 
 source "drivers/infiniband/ulp/sdp/Kconfig"
 
+source "drivers/infiniband/ulp/vnic/Kconfig"
+
 source "drivers/infiniband/util/Kconfig"
 
 endmenu
--- linux-2.6.18.noarch/drivers/infiniband/Makefile
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/Makefile
@@ -2,8 +2,11 @@ obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
 obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
 obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
+obj-$(CONFIG_INFINIBAND_CXGB3)		+= hw/cxgb3/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
 obj-$(CONFIG_INFINIBAND_SDP)		+= ulp/sdp/
+obj-$(CONFIG_INFINIBAND_VNIC)		+= ulp/vnic/
 obj-$(CONFIG_INFINIBAND_MADEYE)		+= util/
--- linux-2.6.18.noarch/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -0,0 +1,1379 @@
+/*
+ * Copyright (c) 2006 Mellanox Technologies. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include <rdma/ib_cm.h>
+#include <rdma/ib_cache.h>
+#include <net/dst.h>
+#include <net/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/delay.h>
+
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
+static int data_debug_level;
+
+module_param_named(cm_data_debug_level, data_debug_level, int, 0644);
+MODULE_PARM_DESC(cm_data_debug_level,
+		 "Enable data path debug tracing for connected mode if > 0");
+#endif
+
+#include "ipoib.h"
+
+#define IPOIB_CM_IETF_ID 0x1000000000000000ULL
+
+#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)
+#define IPOIB_CM_RX_TIMEOUT     (2 * 256 * HZ)
+#define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)
+#define IPOIB_CM_RX_UPDATE_MASK (0x3)
+
+struct ipoib_cm_id {
+	struct ib_cm_id *id;
+	int flags;
+	u32 remote_qpn;
+	u32 remote_mtu;
+};
+
+static struct ib_qp_attr ipoib_cm_err_attr = {
+	.qp_state = IB_QPS_ERR
+};
+
+#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
+
+static struct ib_send_wr ipoib_cm_rx_drain_wr = {
+	.wr_id = IPOIB_CM_RX_DRAIN_WRID,
+	.opcode = IB_WR_SEND,
+};
+
+static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
+			       struct ib_cm_event *event);
+
+static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
+				  u64 mapping[IPOIB_CM_RX_SG])
+{
+	int i;
+
+	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
+
+	for (i = 0; i < frags; ++i)
+		ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
+}
+
+static int ipoib_cm_post_receive(struct net_device *dev, int id)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ib_recv_wr *bad_wr;
+	int i, ret;
+
+	priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ;
+
+	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
+
+	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
+	if (unlikely(ret)) {
+		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
+		ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+				      priv->cm.srq_ring[id].mapping);
+		dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
+		priv->cm.srq_ring[id].skb = NULL;
+	}
+
+	return ret;
+}
+
+static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int frags,
+					     u64 mapping[IPOIB_CM_RX_SG])
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct sk_buff *skb;
+	int i;
+
+	skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
+	if (unlikely(!skb))
+		return NULL;
+
+	/*
+	 * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
+	 * IP header to a multiple of 16.
+	 */
+	skb_reserve(skb, 12);
+
+	mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
+				       DMA_FROM_DEVICE);
+	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
+		dev_kfree_skb_any(skb);
+		return NULL;
+	}
+
+	for (i = 0; i < frags; i++) {
+		struct page *page = alloc_page(GFP_ATOMIC);
+
+		if (!page)
+			goto partial_error;
+		skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
+
+		mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,
+						 0, PAGE_SIZE, DMA_FROM_DEVICE);
+		if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
+			goto partial_error;
+	}
+
+	priv->cm.srq_ring[id].skb = skb;
+	return skb;
+
+partial_error:
+
+	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
+
+	for (; i >= 0; --i)
+		ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
+
+	dev_kfree_skb_any(skb);
+	return NULL;
+}
+
+static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv)
+{
+	struct ib_send_wr *bad_wr;
+	struct ipoib_cm_rx *p;
+
+	/* We only reserved 1 extra slot in CQ for drain WRs, so
+	 * make sure we have at most 1 outstanding WR. */
+	if (list_empty(&priv->cm.rx_flush_list) ||
+	    !list_empty(&priv->cm.rx_drain_list))
+		return;
+
+	/*
+	 * QPs on flush list are error state.  This way, a "flush
+	 * error" WC will be immediately generated for each WR we post.
+	 */
+	p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+	if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
+		ipoib_warn(priv, "failed to post drain wr\n");
+
+	list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
+}
+
+static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
+{
+	struct ipoib_cm_rx *p = ctx;
+	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+	unsigned long flags;
+
+	if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
+		return;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	list_move(&p->list, &priv->cm.rx_flush_list);
+	p->state = IPOIB_CM_RX_FLUSH;
+	ipoib_cm_start_rx_drain(priv);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
+					   struct ipoib_cm_rx *p)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ib_qp_init_attr attr = {
+		.event_handler = ipoib_cm_rx_event_handler,
+		.send_cq = priv->cq, /* For drain WR */
+		.recv_cq = priv->cq,
+		.srq = priv->cm.srq,
+		.cap.max_send_wr = 1, /* For drain WR */
+		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
+		.sq_sig_type = IB_SIGNAL_ALL_WR,
+		.qp_type = IB_QPT_RC,
+		.qp_context = p,
+	};
+	return ib_create_qp(priv->pd, &attr);
+}
+
+static int ipoib_cm_modify_rx_qp(struct net_device *dev,
+				  struct ib_cm_id *cm_id, struct ib_qp *qp,
+				  unsigned psn)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
+		return ret;
+	}
+	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);
+		return ret;
+	}
+	qp_attr.qp_state = IB_QPS_RTR;
+	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
+		return ret;
+	}
+	qp_attr.rq_psn = psn;
+	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
+		return ret;
+	}
+
+	/* Mellanox firmware won't generate completions with error for drain WRs
+	 * unless the QP has been moved to RTS first. This work-around leaves a
+	 * window where a QP has moved to error asynchronously, but this will
+	 * eventually get fixed in firmware, so let's not error out if modify QP
+	 * fails. */
+	qp_attr.qp_state = IB_QPS_RTS;
+	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
+		return 0;
+	}
+	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
+			     struct ib_qp *qp, struct ib_cm_req_event_param *req,
+			     unsigned psn)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_cm_data data = {};
+	struct ib_cm_rep_param rep = {};
+
+	data.qpn = cpu_to_be32(priv->qp->qp_num);
+	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
+
+	rep.private_data = &data;
+	rep.private_data_len = sizeof data;
+	rep.flow_control = 0;
+	rep.rnr_retry_count = req->rnr_retry_count;
+	rep.target_ack_delay = 20; /* FIXME */
+	rep.srq = 1;
+	rep.qp_num = qp->qp_num;
+	rep.starting_psn = psn;
+	return ib_send_cm_rep(cm_id, &rep);
+}
+
+static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+	struct net_device *dev = cm_id->context;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_cm_rx *p;
+	unsigned psn;
+	int ret;
+
+	ipoib_dbg(priv, "REQ arrived\n");
+	p = kzalloc(sizeof *p, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+	p->dev = dev;
+	p->id = cm_id;
+	cm_id->context = p;
+	p->state = IPOIB_CM_RX_LIVE;
+	p->jiffies = jiffies;
+	INIT_LIST_HEAD(&p->list);
+
+	p->qp = ipoib_cm_create_rx_qp(dev, p);
+	if (IS_ERR(p->qp)) {
+		ret = PTR_ERR(p->qp);
+		goto err_qp;
+	}
+
+	psn = net_random() & 0xffffff;
+	ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
+	if (ret)
+		goto err_modify;
+
+	spin_lock_irq(&priv->lock);
+	queue_delayed_work(ipoib_workqueue,
+			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
+	/* Add this entry to passive ids list head, but do not re-add it
+	 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
+	p->jiffies = jiffies;
+	if (p->state == IPOIB_CM_RX_LIVE)
+		list_move(&p->list, &priv->cm.passive_ids);
+	spin_unlock_irq(&priv->lock);
+
+	ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
+	if (ret) {
+		ipoib_warn(priv, "failed to send REP: %d\n", ret);
+		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
+			ipoib_warn(priv, "unable to move qp to error state\n");
+	}
+	return 0;
+
+err_modify:
+	ib_destroy_qp(p->qp);
+err_qp:
+	kfree(p);
+	return ret;
+}
+
+static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
+			       struct ib_cm_event *event)
+{
+	struct ipoib_cm_rx *p;
+	struct ipoib_dev_priv *priv;
+
+	switch (event->event) {
+	case IB_CM_REQ_RECEIVED:
+		return ipoib_cm_req_handler(cm_id, event);
+	case IB_CM_DREQ_RECEIVED:
+		p = cm_id->context;
+		ib_send_cm_drep(cm_id, NULL, 0);
+		/* Fall through */
+	case IB_CM_REJ_RECEIVED:
+		p = cm_id->context;
+		priv = netdev_priv(p->dev);
+		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
+			ipoib_warn(priv, "unable to move qp to error state\n");
+		/* Fall through */
+	default:
+		return 0;
+	}
+}
+/* Adjust length of skb with fragments to match received data */
+static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
+			  unsigned int length, struct sk_buff *toskb)
+{
+	int i, num_frags;
+	unsigned int size;
+
+	/* put header into skb */
+	size = min(length, hdr_space);
+	skb->tail += size;
+	skb->len += size;
+	length -= size;
+
+	num_frags = skb_shinfo(skb)->nr_frags;
+	for (i = 0; i < num_frags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		if (length == 0) {
+			/* don't need this page */
+			skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE);
+			--skb_shinfo(skb)->nr_frags;
+		} else {
+			size = min(length, (unsigned) PAGE_SIZE);
+
+			frag->size = size;
+			skb->data_len += size;
+			skb->truesize += size;
+			skb->len += size;
+			length -= size;
+		}
+	}
+}
+
+void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
+	struct sk_buff *skb, *newskb;
+	struct ipoib_cm_rx *p;
+	unsigned long flags;
+	u64 mapping[IPOIB_CM_RX_SG];
+	int frags;
+
+	ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n",
+		       wr_id, wc->opcode, wc->status);
+
+	if (unlikely(wr_id >= ipoib_recvq_size)) {
+		if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) {
+			spin_lock_irqsave(&priv->lock, flags);
+			list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
+			ipoib_cm_start_rx_drain(priv);
+			queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+			spin_unlock_irqrestore(&priv->lock, flags);
+		} else
+			ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
+				   wr_id, ipoib_recvq_size);
+		return;
+	}
+
+	skb  = priv->cm.srq_ring[wr_id].skb;
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		ipoib_dbg(priv, "cm recv error "
+			   "(status=%d, wrid=%d vend_err %x)\n",
+			   wc->status, wr_id, wc->vendor_err);
+		++priv->stats.rx_dropped;
+		goto repost;
+	}
+
+	if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) {
+		p = wc->qp->qp_context;
+		if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
+			spin_lock_irqsave(&priv->lock, flags);
+			p->jiffies = jiffies;
+			/* Move this entry to list head, but do not re-add it
+			 * if it has been moved out of list. */
+			if (p->state == IPOIB_CM_RX_LIVE)
+				list_move(&p->list, &priv->cm.passive_ids);
+			spin_unlock_irqrestore(&priv->lock, flags);
+		}
+	}
+
+	frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
+					      (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
+
+	newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping);
+	if (unlikely(!newskb)) {
+		/*
+		 * If we can't allocate a new RX buffer, dump
+		 * this packet and reuse the old buffer.
+		 */
+		ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
+		++priv->stats.rx_dropped;
+		goto repost;
+	}
+
+	ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping);
+	memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
+
+	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+		       wc->byte_len, wc->slid);
+
+	skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
+
+	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+	skb->mac.raw = skb->data;
+	skb_pull(skb, IPOIB_ENCAP_LEN);
+
+	dev->last_rx = jiffies;
+	++priv->stats.rx_packets;
+	priv->stats.rx_bytes += skb->len;
+
+	skb->dev = dev;
+	/* XXX get correct PACKET_ type here */
+	skb->pkt_type = PACKET_HOST;
+	netif_rx_ni(skb);
+
+repost:
+	if (unlikely(ipoib_cm_post_receive(dev, wr_id)))
+		ipoib_warn(priv, "ipoib_cm_post_receive failed "
+			   "for buf %d\n", wr_id);
+}
+
+static inline int post_send(struct ipoib_dev_priv *priv,
+			    struct ipoib_cm_tx *tx,
+			    unsigned int wr_id,
+			    u64 addr, int len)
+{
+	struct ib_send_wr *bad_wr;
+
+	priv->tx_sge.addr             = addr;
+	priv->tx_sge.length           = len;
+
+	priv->tx_wr.wr_id 	      = wr_id;
+
+	return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
+}
+
+void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_tx_buf *tx_req;
+	u64 addr;
+
+	if (unlikely(skb->len > tx->mtu)) {
+		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
+			   skb->len, tx->mtu);
+		++priv->stats.tx_dropped;
+		++priv->stats.tx_errors;
+		ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
+		return;
+	}
+
+	ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
+		       tx->tx_head, skb->len, tx->qp->qp_num);
+
+	/*
+	 * We put the skb into the tx_ring _before_ we call post_send()
+	 * because it's entirely possible that the completion handler will
+	 * run before we execute anything after the post_send().  That
+	 * means we have to make sure everything is properly recorded and
+	 * our state is consistent before we call post_send().
+	 */
+	tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
+	tx_req->skb = skb;
+	addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
+	if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+		++priv->stats.tx_errors;
+		dev_kfree_skb_any(skb);
+		return;
+	}
+
+	tx_req->mapping = addr;
+
+	if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
+			        addr, skb->len))) {
+		ipoib_warn(priv, "post_send failed\n");
+		++priv->stats.tx_errors;
+		ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
+		dev_kfree_skb_any(skb);
+	} else {
+		dev->trans_start = jiffies;
+		++tx->tx_head;
+
+		if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) {
+			ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
+				  tx->qp->qp_num);
+			netif_stop_queue(dev);
+			set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
+		}
+	}
+}
+
+static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx,
+				  struct ib_wc *wc)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	unsigned int wr_id = wc->wr_id;
+	struct ipoib_tx_buf *tx_req;
+	unsigned long flags;
+
+	ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n",
+		       wr_id, wc->opcode, wc->status);
+
+	if (unlikely(wr_id >= ipoib_sendq_size)) {
+		ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
+			   wr_id, ipoib_sendq_size);
+		return;
+	}
+
+	tx_req = &tx->tx_ring[wr_id];
+
+	ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
+
+	/* FIXME: is this right? Shouldn't we only increment on success? */
+	++priv->stats.tx_packets;
+	priv->stats.tx_bytes += tx_req->skb->len;
+
+	dev_kfree_skb_any(tx_req->skb);
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	++tx->tx_tail;
+	if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) &&
+	    tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) {
+		clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
+		netif_wake_queue(dev);
+	}
+
+	if (wc->status != IB_WC_SUCCESS &&
+	    wc->status != IB_WC_WR_FLUSH_ERR) {
+		struct ipoib_neigh *neigh;
+
+		ipoib_dbg(priv, "failed cm send event "
+			   "(status=%d, wrid=%d vend_err %x)\n",
+			   wc->status, wr_id, wc->vendor_err);
+
+		spin_lock(&priv->lock);
+		neigh = tx->neigh;
+
+		if (neigh) {
+			neigh->cm = NULL;
+			list_del(&neigh->list);
+			if (neigh->ah)
+				ipoib_put_ah(neigh->ah);
+			ipoib_neigh_free(dev, neigh);
+
+			tx->neigh = NULL;
+		}
+
+		/* queue would be re-started anyway when TX is destroyed,
+		 * but it makes sense to do it ASAP here. */
+		if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags))
+			netif_wake_queue(dev);
+
+		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
+			list_move(&tx->list, &priv->cm.reap_list);
+			queue_work(ipoib_workqueue, &priv->cm.reap_task);
+		}
+
+		clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
+
+		spin_unlock(&priv->lock);
+	}
+
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+}
+
+static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
+{
+	struct ipoib_cm_tx *tx = tx_ptr;
+	int n, i;
+
+	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+	do {
+		n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
+		for (i = 0; i < n; ++i)
+			ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
+	} while (n == IPOIB_NUM_WC);
+}
+
+int ipoib_cm_dev_open(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int ret;
+
+	if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
+		return 0;
+
+	priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
+	if (IS_ERR(priv->cm.id)) {
+		printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
+		ret = PTR_ERR(priv->cm.id);
+		goto err_cm;
+	}
+
+	ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
+			   0, NULL);
+	if (ret) {
+		printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
+		       IPOIB_CM_IETF_ID | priv->qp->qp_num);
+		goto err_listen;
+	}
+
+	return 0;
+
+err_listen:
+	ib_destroy_cm_id(priv->cm.id);
+err_cm:
+	priv->cm.id = NULL;
+	return ret;
+}
+
+void ipoib_cm_dev_stop(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_cm_rx *p, *n;
+	unsigned long begin;
+	LIST_HEAD(list);
+	int ret;
+
+	if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
+		return;
+
+	ib_destroy_cm_id(priv->cm.id);
+	priv->cm.id = NULL;
+
+	spin_lock_irq(&priv->lock);
+	while (!list_empty(&priv->cm.passive_ids)) {
+		p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
+		list_move(&p->list, &priv->cm.rx_error_list);
+		p->state = IPOIB_CM_RX_ERROR;
+		spin_unlock_irq(&priv->lock);
+		ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
+		if (ret)
+			ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
+		spin_lock_irq(&priv->lock);
+	}
+
+	/* Wait for all RX to be drained */
+	begin = jiffies;
+
+	while (!list_empty(&priv->cm.rx_error_list) ||
+	       !list_empty(&priv->cm.rx_flush_list) ||
+	       !list_empty(&priv->cm.rx_drain_list)) {
+		if (time_after(jiffies, begin + 5 * HZ)) {
+			ipoib_warn(priv, "RX drain timing out\n");
+
+			/*
+			 * assume the HW is wedged and just free up everything.
+			 */
+			list_splice_init(&priv->cm.rx_flush_list, &list);
+			list_splice_init(&priv->cm.rx_error_list, &list);
+			list_splice_init(&priv->cm.rx_drain_list, &list);
+			break;
+		}
+		spin_unlock_irq(&priv->lock);
+		msleep(1);
+		spin_lock_irq(&priv->lock);
+	}
+
+	list_splice_init(&priv->cm.rx_reap_list, &list);
+
+	spin_unlock_irq(&priv->lock);
+
+	list_for_each_entry_safe(p, n, &list, list) {
+		ib_destroy_cm_id(p->id);
+		ib_destroy_qp(p->qp);
+		kfree(p);
+	}
+
+	cancel_delayed_work(&priv->cm.stale_task);
+}
+
+static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+	struct ipoib_cm_tx *p = cm_id->context;
+	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+	struct ipoib_cm_data *data = event->private_data;
+	struct sk_buff_head skqueue;
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+	struct sk_buff *skb;
+
+	p->mtu = be32_to_cpu(data->mtu);
+
+	if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) {
+		ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n",
+			   p->mtu, priv->dev->mtu);
+		return -EINVAL;
+	}
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
+		return ret;
+	}
+
+	qp_attr.rq_psn = 0 /* FIXME */;
+	ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
+		return ret;
+	}
+
+	qp_attr.qp_state = IB_QPS_RTS;
+	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
+		return ret;
+	}
+	ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
+		return ret;
+	}
+
+	skb_queue_head_init(&skqueue);
+
+	spin_lock_irq(&priv->lock);
+	set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
+	if (p->neigh)
+		while ((skb = __skb_dequeue(&p->neigh->queue)))
+			__skb_queue_tail(&skqueue, skb);
+	spin_unlock_irq(&priv->lock);
+
+	while ((skb = __skb_dequeue(&skqueue))) {
+		skb->dev = p->dev;
+		if (dev_queue_xmit(skb))
+			ipoib_warn(priv, "dev_queue_xmit failed "
+				   "to requeue packet\n");
+	}
+
+	ret = ib_send_cm_rtu(cm_id, NULL, 0);
+	if (ret) {
+		ipoib_warn(priv, "failed to send RTU: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ib_qp_init_attr attr = {};
+	attr.recv_cq = priv->cq;
+	attr.srq = priv->cm.srq;
+	attr.cap.max_send_wr = ipoib_sendq_size;
+	attr.cap.max_send_sge = 1;
+	attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+	attr.qp_type = IB_QPT_RC;
+	attr.send_cq = cq;
+	return ib_create_qp(priv->pd, &attr);
+}
+
+static int ipoib_cm_send_req(struct net_device *dev,
+			     struct ib_cm_id *id, struct ib_qp *qp,
+			     u32 qpn,
+			     struct ib_sa_path_rec *pathrec)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_cm_data data = {};
+	struct ib_cm_req_param req = {};
+
+	data.qpn = cpu_to_be32(priv->qp->qp_num);
+	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
+
+	req.primary_path 	      = pathrec;
+	req.alternate_path 	      = NULL;
+	req.service_id                = cpu_to_be64(IPOIB_CM_IETF_ID | qpn);
+	req.qp_num 		      = qp->qp_num;
+	req.qp_type 		      = qp->qp_type;
+	req.private_data 	      = &data;
+	req.private_data_len 	      = sizeof data;
+	req.flow_control 	      = 0;
+
+	req.starting_psn              = 0; /* FIXME */
+
+	/*
+	 * Pick some arbitrary defaults here; we could make these
+	 * module parameters if anyone cared about setting them.
+	 */
+	req.responder_resources	      = 4;
+	req.remote_cm_response_timeout = 20;
+	req.local_cm_response_timeout  = 20;
+	req.retry_count 	      = 0; /* RFC draft warns against retries */
+	req.rnr_retry_count 	      = 0; /* RFC draft warns against retries */
+	req.max_cm_retries 	      = 15;
+	req.srq 	              = 1;
+	return ib_send_cm_req(id, &req);
+}
+
+static int ipoib_cm_modify_tx_init(struct net_device *dev,
+				  struct ib_cm_id *cm_id, struct ib_qp *qp)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+	ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
+	if (ret) {
+		ipoib_warn(priv, "pkey 0x%x not in cache: %d\n", priv->pkey, ret);
+		return ret;
+	}
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+	qp_attr.port_num = priv->port;
+	qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
+			    struct ib_sa_path_rec *pathrec)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+	int ret;
+
+	p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring,
+				GFP_KERNEL);
+	if (!p->tx_ring) {
+		ipoib_warn(priv, "failed to allocate tx ring\n");
+		ret = -ENOMEM;
+		goto err_tx;
+	}
+
+	p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p,
+			     ipoib_sendq_size + 1);
+	if (IS_ERR(p->cq)) {
+		ret = PTR_ERR(p->cq);
+		ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret);
+		goto err_cq;
+	}
+
+	ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP);
+	if (ret) {
+		ipoib_warn(priv, "failed to request completion notification: %d\n", ret);
+		goto err_req_notify;
+	}
+
+	p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq);
+	if (IS_ERR(p->qp)) {
+		ret = PTR_ERR(p->qp);
+		ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
+		goto err_qp;
+	}
+
+	p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p);
+	if (IS_ERR(p->id)) {
+		ret = PTR_ERR(p->id);
+		ipoib_warn(priv, "failed to create tx cm id: %d\n", ret);
+		goto err_id;
+	}
+
+	ret = ipoib_cm_modify_tx_init(p->dev, p->id,  p->qp);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
+		goto err_modify;
+	}
+
+	ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
+	if (ret) {
+		ipoib_warn(priv, "failed to send cm req: %d\n", ret);
+		goto err_send_cm;
+	}
+
+	ipoib_dbg(priv, "Request connection 0x%x for gid " IPOIB_GID_FMT " qpn 0x%x\n",
+		  p->qp->qp_num, IPOIB_GID_ARG(pathrec->dgid), qpn);
+
+	return 0;
+
+err_send_cm:
+err_modify:
+	ib_destroy_cm_id(p->id);
+err_id:
+	p->id = NULL;
+	ib_destroy_qp(p->qp);
+err_req_notify:
+err_qp:
+	p->qp = NULL;
+	ib_destroy_cq(p->cq);
+err_cq:
+	p->cq = NULL;
+err_tx:
+	return ret;
+}
+
+static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+	struct ipoib_tx_buf *tx_req;
+
+	ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
+		  p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
+
+	if (p->id)
+		ib_destroy_cm_id(p->id);
+
+	if (p->qp)
+		ib_destroy_qp(p->qp);
+
+	if (p->cq)
+		ib_destroy_cq(p->cq);
+
+	if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags))
+		netif_wake_queue(p->dev);
+
+	if (p->tx_ring) {
+		while ((int) p->tx_tail - (int) p->tx_head < 0) {
+			tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
+			ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
+					 DMA_TO_DEVICE);
+			dev_kfree_skb_any(tx_req->skb);
+			++p->tx_tail;
+		}
+
+		kfree(p->tx_ring);
+	}
+
+	kfree(p);
+}
+
+static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
+			       struct ib_cm_event *event)
+{
+	struct ipoib_cm_tx *tx = cm_id->context;
+	struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+	struct net_device *dev = priv->dev;
+	struct ipoib_neigh *neigh;
+	int ret;
+
+	switch (event->event) {
+	case IB_CM_DREQ_RECEIVED:
+		ipoib_dbg(priv, "DREQ received.\n");
+		ib_send_cm_drep(cm_id, NULL, 0);
+		break;
+	case IB_CM_REP_RECEIVED:
+		ipoib_dbg(priv, "REP received.\n");
+		ret = ipoib_cm_rep_handler(cm_id, event);
+		if (ret)
+			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+				       NULL, 0, NULL, 0);
+		break;
+	case IB_CM_REQ_ERROR:
+	case IB_CM_REJ_RECEIVED:
+	case IB_CM_TIMEWAIT_EXIT:
+		ipoib_dbg(priv, "CM error %d.\n", event->event);
+		spin_lock_irq(&priv->tx_lock);
+		spin_lock(&priv->lock);
+		neigh = tx->neigh;
+
+		if (neigh) {
+			neigh->cm = NULL;
+			list_del(&neigh->list);
+			if (neigh->ah)
+				ipoib_put_ah(neigh->ah);
+			ipoib_neigh_free(dev, neigh);
+
+			tx->neigh = NULL;
+		}
+
+		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
+			list_move(&tx->list, &priv->cm.reap_list);
+			queue_work(ipoib_workqueue, &priv->cm.reap_task);
+		}
+
+		spin_unlock(&priv->lock);
+		spin_unlock_irq(&priv->tx_lock);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
+				       struct ipoib_neigh *neigh)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_cm_tx *tx;
+
+	tx = kzalloc(sizeof *tx, GFP_ATOMIC);
+	if (!tx)
+		return NULL;
+
+	neigh->cm = tx;
+	tx->neigh = neigh;
+	tx->path = path;
+	tx->dev = dev;
+	list_add(&tx->list, &priv->cm.start_list);
+	set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
+	queue_work(ipoib_workqueue, &priv->cm.start_task);
+	return tx;
+}
+
+void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+	if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
+		list_move(&tx->list, &priv->cm.reap_list);
+		queue_work(ipoib_workqueue, &priv->cm.reap_task);
+		ipoib_dbg(priv, "Reap connection for gid " IPOIB_GID_FMT "\n",
+			  IPOIB_GID_ARG(tx->neigh->dgid));
+		tx->neigh = NULL;
+	}
+}
+
+static void ipoib_cm_tx_start(void *work)
+{
+	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+						   cm.start_task);
+	struct net_device *dev = priv->dev;
+	struct ipoib_neigh *neigh;
+	struct ipoib_cm_tx *p;
+	unsigned long flags;
+	int ret;
+
+	struct ib_sa_path_rec pathrec;
+	u32 qpn;
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	spin_lock(&priv->lock);
+	while (!list_empty(&priv->cm.start_list)) {
+		p = list_entry(priv->cm.start_list.next, typeof(*p), list);
+		list_del_init(&p->list);
+		neigh = p->neigh;
+		qpn = IPOIB_QPN(neigh->neighbour->ha);
+		memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
+		spin_unlock(&priv->lock);
+		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		ret = ipoib_cm_tx_init(p, qpn, &pathrec);
+		spin_lock_irqsave(&priv->tx_lock, flags);
+		spin_lock(&priv->lock);
+		if (ret) {
+			neigh = p->neigh;
+			if (neigh) {
+				neigh->cm = NULL;
+				list_del(&neigh->list);
+				if (neigh->ah)
+					ipoib_put_ah(neigh->ah);
+				ipoib_neigh_free(dev, neigh);
+			}
+			list_del(&p->list);
+			kfree(p);
+		}
+	}
+	spin_unlock(&priv->lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+}
+
+static void ipoib_cm_tx_reap(void *work)
+{
+	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+						   cm.reap_task);
+	struct ipoib_cm_tx *p;
+
+	spin_lock_irq(&priv->tx_lock);
+	spin_lock(&priv->lock);
+	while (!list_empty(&priv->cm.reap_list)) {
+		p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
+		list_del(&p->list);
+		spin_unlock(&priv->lock);
+		spin_unlock_irq(&priv->tx_lock);
+		ipoib_cm_tx_destroy(p);
+		spin_lock_irq(&priv->tx_lock);
+		spin_lock(&priv->lock);
+	}
+	spin_unlock(&priv->lock);
+	spin_unlock_irq(&priv->tx_lock);
+}
+
+static void ipoib_cm_skb_reap(void *work)
+{
+	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+						   cm.skb_task);
+	struct net_device *dev = priv->dev;
+	struct sk_buff *skb;
+
+	unsigned mtu = priv->mcast_mtu;
+
+	spin_lock_irq(&priv->tx_lock);
+	spin_lock(&priv->lock);
+	while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
+		spin_unlock(&priv->lock);
+		spin_unlock_irq(&priv->tx_lock);
+		if (skb->protocol == htons(ETH_P_IP))
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+#endif
+		dev_kfree_skb_any(skb);
+		spin_lock_irq(&priv->tx_lock);
+		spin_lock(&priv->lock);
+	}
+	spin_unlock(&priv->lock);
+	spin_unlock_irq(&priv->tx_lock);
+}
+
+void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
+			   unsigned int mtu)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int e = skb_queue_empty(&priv->cm.skb_queue);
+
+	if (skb->dst)
+		skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+	skb_queue_tail(&priv->cm.skb_queue, skb);
+	if (e)
+		queue_work(ipoib_workqueue, &priv->cm.skb_task);
+}
+
+static void ipoib_cm_rx_reap(void *work)
+{
+	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+						   cm.rx_reap_task);
+	struct ipoib_cm_rx *p, *n;
+	LIST_HEAD(list);
+
+	spin_lock_irq(&priv->lock);
+	list_splice_init(&priv->cm.rx_reap_list, &list);
+	spin_unlock_irq(&priv->lock);
+
+	list_for_each_entry_safe(p, n, &list, list) {
+		ib_destroy_cm_id(p->id);
+		ib_destroy_qp(p->qp);
+		kfree(p);
+	}
+}
+
+static void ipoib_cm_stale_task(void *work)
+{
+	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+						   cm.stale_task);
+	struct ipoib_cm_rx *p;
+	int ret;
+
+	spin_lock_irq(&priv->lock);
+	while (!list_empty(&priv->cm.passive_ids)) {
+		/* List is sorted by LRU, start from tail,
+		 * stop when we see a recently used entry */
+		p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
+		if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
+			break;
+		list_move(&p->list, &priv->cm.rx_error_list);
+		p->state = IPOIB_CM_RX_ERROR;
+		spin_unlock_irq(&priv->lock);
+		ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
+		if (ret)
+			ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
+		spin_lock_irq(&priv->lock);
+	}
+
+	if (!list_empty(&priv->cm.passive_ids))
+		queue_delayed_work(ipoib_workqueue,
+				   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
+	spin_unlock_irq(&priv->lock);
+}
+
+
+static ssize_t show_mode(struct class_device *cdev, char *buf)
+{
+	struct net_device *dev = container_of(cdev, struct net_device, class_dev);
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
+		return sprintf(buf, "connected\n");
+	else
+		return sprintf(buf, "datagram\n");
+}
+
+static ssize_t set_mode(struct class_device *cdev,
+			const char *buf, size_t count)
+{
+	struct net_device *dev = container_of(cdev, struct net_device, class_dev);
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	/* flush paths if we switch modes so that connections are restarted */
+	if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
+		set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+		ipoib_warn(priv, "enabling connected mode "
+			   "will cause multicast packet drops\n");
+		ipoib_flush_paths(dev);
+		return count;
+	}
+
+	if (!strcmp(buf, "datagram\n")) {
+		clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+		dev->mtu = min(priv->mcast_mtu, dev->mtu);
+		ipoib_flush_paths(dev);
+		return count;
+	}
+
+	return -EINVAL;
+}
+
+static CLASS_DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
+
+int ipoib_cm_add_mode_attr(struct net_device *dev)
+{
+	return class_device_create_file(&dev->class_dev, &class_device_attr_mode);
+}
+
+int ipoib_cm_dev_init(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ib_srq_init_attr srq_init_attr = {
+		.attr = {
+			.max_wr  = ipoib_recvq_size,
+			.max_sge = IPOIB_CM_RX_SG
+		}
+	};
+	int ret, i;
+
+	INIT_LIST_HEAD(&priv->cm.passive_ids);
+	INIT_LIST_HEAD(&priv->cm.reap_list);
+	INIT_LIST_HEAD(&priv->cm.start_list);
+	INIT_LIST_HEAD(&priv->cm.rx_error_list);
+	INIT_LIST_HEAD(&priv->cm.rx_flush_list);
+	INIT_LIST_HEAD(&priv->cm.rx_drain_list);
+	INIT_LIST_HEAD(&priv->cm.rx_reap_list);
+	INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start, &priv->cm.start_task);
+	INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap, &priv->cm.reap_task);
+	INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap, &priv->cm.skb_task);
+	INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap, &priv->cm.rx_reap_task);
+	INIT_WORK(&priv->cm.stale_task, ipoib_cm_stale_task, &priv->cm.stale_task);
+
+	skb_queue_head_init(&priv->cm.skb_queue);
+
+	priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
+	if (IS_ERR(priv->cm.srq)) {
+		ret = PTR_ERR(priv->cm.srq);
+		priv->cm.srq = NULL;
+		return ret;
+	}
+
+	priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
+				    GFP_KERNEL);
+	if (!priv->cm.srq_ring) {
+		printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n",
+		       priv->ca->name, ipoib_recvq_size);
+		ipoib_cm_dev_cleanup(dev);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+		priv->cm.rx_sge[i].lkey	= priv->mr->lkey;
+
+	priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
+	for (i = 1; i < IPOIB_CM_RX_SG; ++i)
+		priv->cm.rx_sge[i].length = PAGE_SIZE;
+	priv->cm.rx_wr.next = NULL;
+	priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
+	priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
+
+	for (i = 0; i < ipoib_recvq_size; ++i) {
+		if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1,
+					   priv->cm.srq_ring[i].mapping)) {
+			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
+			ipoib_cm_dev_cleanup(dev);
+			return -ENOMEM;
+		}
+		if (ipoib_cm_post_receive(dev, i)) {
+			ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
+			ipoib_cm_dev_cleanup(dev);
+			return -EIO;
+		}
+	}
+
+	priv->dev->dev_addr[0] = IPOIB_FLAGS_RC;
+	return 0;
+}
+
+void ipoib_cm_dev_cleanup(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int i, ret;
+
+	if (!priv->cm.srq)
+		return;
+
+	ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");
+
+	ret = ib_destroy_srq(priv->cm.srq);
+	if (ret)
+		ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret);
+
+	priv->cm.srq = NULL;
+	if (!priv->cm.srq_ring)
+		return;
+	for (i = 0; i < ipoib_recvq_size; ++i)
+		if (priv->cm.srq_ring[i].skb) {
+			ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+					      priv->cm.srq_ring[i].mapping);
+			dev_kfree_skb_any(priv->cm.srq_ring[i].skb);
+			priv->cm.srq_ring[i].skb = NULL;
+		}
+	kfree(priv->cm.srq_ring);
+	priv->cm.srq_ring = NULL;
+}
--- linux-2.6.18.noarch/drivers/infiniband/ulp/ipoib/ipoib.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -62,6 +62,10 @@ enum {
 
 	IPOIB_ENCAP_LEN 	  = 4,
 
+	IPOIB_CM_MTU              = 0x10000 - 0x10, /* padding to align header to 16 */
+	IPOIB_CM_BUF_SIZE         = IPOIB_CM_MTU  + IPOIB_ENCAP_LEN,
+	IPOIB_CM_HEAD_SIZE 	  = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
+	IPOIB_CM_RX_SG            = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
 	IPOIB_RX_RING_SIZE 	  = 128,
 	IPOIB_TX_RING_SIZE 	  = 64,
 	IPOIB_MAX_QUEUE_SIZE	  = 8192,
@@ -81,6 +85,8 @@ enum {
 	IPOIB_MCAST_RUN 	  = 6,
 	IPOIB_STOP_REAPER         = 7,
 	IPOIB_MCAST_STARTED       = 8,
+	IPOIB_FLAG_NETIF_STOPPED  = 9,
+	IPOIB_FLAG_ADMIN_CM 	  = 10,
 
 	IPOIB_MAX_BACKOFF_SECONDS = 16,
 
@@ -90,6 +96,14 @@ enum {
 	IPOIB_MCAST_FLAG_ATTACHED = 3,
 };
 
+
+#define	IPOIB_OP_RECV   (1ul << 31)
+#ifdef CONFIG_INFINIBAND_IPOIB_CM
+#define	IPOIB_CM_OP_SRQ (1ul << 30)
+#else
+#define	IPOIB_CM_OP_SRQ (0)
+#endif
+
 /* structs */
 
 struct ipoib_header {
@@ -105,12 +119,102 @@ struct ipoib_mcast;
 
 struct ipoib_rx_buf {
 	struct sk_buff *skb;
-	dma_addr_t	mapping;
+	u64		mapping;
 };
 
 struct ipoib_tx_buf {
 	struct sk_buff *skb;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
+	u64		mapping;
+};
+
+struct ib_cm_id;
+
+struct ipoib_cm_data {
+	__be32 qpn; /* High byte MUST be ignored on receive */
+	__be32 mtu;
+};
+
+/*
+ * Quoting 10.3.1 Queue Pair and EE Context States:
+ *
+ * Note, for QPs that are associated with an SRQ, the Consumer should take the
+ * QP through the Error State before invoking a Destroy QP or a Modify QP to the
+ * Reset State.  The Consumer may invoke the Destroy QP without first performing
+ * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
+ * Last WQE Reached Event. However, if the Consumer does not wait for the
+ * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
+ * leakage may occur. Therefore, it is good programming practice to tear down a
+ * QP that is associated with an SRQ by using the following process:
+ *
+ * - Put the QP in the Error State
+ * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
+ * - either:
+ *       drain the CQ by invoking the Poll CQ verb and either wait for CQ
+ *       to be empty or the number of Poll CQ operations has exceeded
+ *       CQ capacity size;
+ * - or
+ *       post another WR that completes on the same CQ and wait for this
+ *       WR to return as a WC; (NB: this is the option that we use)
+ * - and then invoke a Destroy QP or Reset QP.
+ */
+
+enum ipoib_cm_state {
+	IPOIB_CM_RX_LIVE,
+	IPOIB_CM_RX_ERROR, /* Ignored by stale task */
+	IPOIB_CM_RX_FLUSH  /* Last WQE Reached event observed */
+};
+
+struct ipoib_cm_rx {
+	struct ib_cm_id     *id;
+	struct ib_qp        *qp;
+	struct list_head     list;
+	struct net_device   *dev;
+	unsigned long        jiffies;
+	enum ipoib_cm_state  state;
+};
+
+struct ipoib_cm_tx {
+	struct ib_cm_id     *id;
+	struct ib_cq        *cq;
+	struct ib_qp        *qp;
+	struct list_head     list;
+	struct net_device   *dev;
+	struct ipoib_neigh  *neigh;
+	struct ipoib_path   *path;
+	struct ipoib_tx_buf *tx_ring;
+	unsigned             tx_head;
+	unsigned             tx_tail;
+	unsigned long        flags;
+	u32                  mtu;
+	struct ib_wc         ibwc[IPOIB_NUM_WC];
+};
+
+struct ipoib_cm_rx_buf {
+	struct sk_buff *skb;
+	u64 mapping[IPOIB_CM_RX_SG];
+};
+
+struct ipoib_cm_dev_priv {
+	struct ib_srq  	       *srq;
+	struct ipoib_cm_rx_buf *srq_ring;
+	struct ib_cm_id        *id;
+	struct ib_qp           *rx_drain_qp;   /* generates WR described in 10.3.1 */
+	struct list_head        passive_ids;   /* state: LIVE */
+	struct list_head        rx_error_list; /* state: ERROR */
+	struct list_head        rx_flush_list; /* state: FLUSH, drain not started */
+	struct list_head        rx_drain_list; /* state: FLUSH, drain started */
+	struct list_head        rx_reap_list;  /* state: FLUSH, drain done */
+	struct work_struct      start_task;
+	struct work_struct      reap_task;
+	struct work_struct      skb_task;
+	struct work_struct      rx_reap_task;
+	struct work_struct      stale_task;
+	struct sk_buff_head     skb_queue;
+	struct list_head        start_list;
+	struct list_head        reap_list;
+	struct ib_wc            ibwc[IPOIB_NUM_WC];
+	struct ib_sge           rx_sge[IPOIB_CM_RX_SG];
+	struct ib_recv_wr       rx_wr;
 };
 
 /*
@@ -136,15 +240,17 @@ struct ipoib_dev_priv {
 	struct list_head multicast_list;
 	struct rb_root multicast_tree;
 
-	struct work_struct pkey_task;
+	struct work_struct pkey_poll_task;
 	struct work_struct mcast_task;
 	struct work_struct flush_task;
 	struct work_struct restart_task;
 	struct work_struct ah_reap_task;
+	struct work_struct pkey_event_task;
 
 	struct ib_device *ca;
 	u8            	  port;
 	u16           	  pkey;
+	u16               pkey_index;
 	struct ib_pd  	 *pd;
 	struct ib_mr  	 *mr;
 	struct ib_cq  	 *cq;
@@ -179,6 +285,10 @@ struct ipoib_dev_priv {
 	struct list_head child_intfs;
 	struct list_head list;
 
+#ifdef CONFIG_INFINIBAND_IPOIB_CM
+	struct ipoib_cm_dev_priv cm;
+#endif
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 	struct list_head fs_list;
 	struct dentry *mcg_dentry;
@@ -212,10 +322,14 @@ struct ipoib_path {
 
 struct ipoib_neigh {
 	struct ipoib_ah    *ah;
+#ifdef CONFIG_INFINIBAND_IPOIB_CM
+	struct ipoib_cm_tx *cm;
+#endif
 	union ib_gid        dgid;
 	struct sk_buff_head queue;
 
 	struct neighbour   *neighbour;
+	struct net_device *dev;
 
 	struct list_head    list;
 };
@@ -232,8 +346,9 @@ static inline struct ipoib_neigh **to_ip
 				     INFINIBAND_ALEN, sizeof(void *));
 }
 
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh);
-void ipoib_neigh_free(struct ipoib_neigh *neigh);
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh,
+				      struct net_device *dev);
+void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh);
 
 extern struct workqueue_struct *ipoib_workqueue;
 
@@ -254,19 +369,20 @@ int ipoib_add_pkey_attr(struct net_devic
 
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn);
-void ipoib_reap_ah(void *dev_ptr);
+void ipoib_reap_ah(void *_dev);
 
 void ipoib_flush_paths(struct net_device *dev);
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
-void ipoib_ib_dev_flush(void *dev);
+void ipoib_ib_dev_flush(void *_dev);
+void ipoib_pkey_event(void *_dev);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
 int ipoib_ib_dev_open(struct net_device *dev);
 int ipoib_ib_dev_up(struct net_device *dev);
 int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev, int flush);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_dev_cleanup(struct net_device *dev);
@@ -315,6 +431,146 @@ int ipoib_vlan_delete(struct net_device 
 void ipoib_pkey_poll(void *dev);
 int ipoib_pkey_dev_delay_open(struct net_device *dev);
 
+#ifdef CONFIG_INFINIBAND_IPOIB_CM
+
+#define IPOIB_FLAGS_RC          0x80
+#define IPOIB_FLAGS_UC          0x40
+
+/* We don't support UC connections at the moment */
+#define IPOIB_CM_SUPPORTED(ha)   (ha[0] & (IPOIB_FLAGS_RC))
+
+static inline int ipoib_cm_admin_enabled(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
+		test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+}
+
+static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	return IPOIB_CM_SUPPORTED(n->ha) &&
+		test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+}
+
+static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
+
+{
+	return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags);
+}
+
+static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
+{
+	return neigh->cm;
+}
+
+static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
+{
+	neigh->cm = tx;
+}
+
+void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
+int ipoib_cm_dev_open(struct net_device *dev);
+void ipoib_cm_dev_stop(struct net_device *dev);
+int ipoib_cm_dev_init(struct net_device *dev);
+int ipoib_cm_add_mode_attr(struct net_device *dev);
+void ipoib_cm_dev_cleanup(struct net_device *dev);
+struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
+				    struct ipoib_neigh *neigh);
+void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
+void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
+			   unsigned int mtu);
+void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
+#else
+
+struct ipoib_cm_tx;
+
+static inline int ipoib_cm_admin_enabled(struct net_device *dev)
+{
+	return 0;
+}
+static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
+
+{
+	return 0;
+}
+
+static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
+
+{
+	return 0;
+}
+
+static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
+{
+	return NULL;
+}
+
+static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
+{
+}
+
+static inline
+void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
+{
+	return;
+}
+
+static inline
+int ipoib_cm_dev_open(struct net_device *dev)
+{
+	return 0;
+}
+
+static inline
+void ipoib_cm_dev_stop(struct net_device *dev)
+{
+	return;
+}
+
+static inline
+int ipoib_cm_dev_init(struct net_device *dev)
+{
+	return -ENOSYS;
+}
+
+static inline
+void ipoib_cm_dev_cleanup(struct net_device *dev)
+{
+	return;
+}
+
+static inline
+struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
+				    struct ipoib_neigh *neigh)
+{
+	return NULL;
+}
+
+static inline
+void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
+{
+	return;
+}
+
+static inline
+int ipoib_cm_add_mode_attr(struct net_device *dev)
+{
+	return 0;
+}
+
+static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
+					 unsigned int mtu)
+{
+	dev_kfree_skb_any(skb);
+}
+
+static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+}
+
+#endif
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 void ipoib_create_debug_files(struct net_device *dev);
 void ipoib_delete_debug_files(struct net_device *dev);
@@ -336,6 +592,8 @@ static inline void ipoib_unregister_debu
 extern int ipoib_sendq_size;
 extern int ipoib_recvq_size;
 
+extern struct ib_sa_client ipoib_sa_client;
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 extern int ipoib_debug_level;
 
@@ -390,4 +648,6 @@ extern int ipoib_debug_level;
 
 #define IPOIB_GID_ARG(gid)	IPOIB_GID_RAW_ARG((gid).raw)
 
+#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
+
 #endif /* _IPOIB_H */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -50,8 +50,6 @@ MODULE_PARM_DESC(data_debug_level,
 		 "Enable data path debug tracing if > 0");
 #endif
 
-#define	IPOIB_OP_RECV	(1ul << 31)
-
 static DEFINE_MUTEX(pkey_mutex);
 
 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
@@ -109,9 +107,8 @@ static int ipoib_ib_post_receive(struct 
 	ret = ib_post_recv(priv->qp, &param, &bad_wr);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
-		dma_unmap_single(priv->ca->dma_device,
-				 priv->rx_ring[id].mapping,
-				 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+		ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping,
+				    IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 		dev_kfree_skb_any(priv->rx_ring[id].skb);
 		priv->rx_ring[id].skb = NULL;
 	}
@@ -123,7 +120,7 @@ static int ipoib_alloc_rx_skb(struct net
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct sk_buff *skb;
-	dma_addr_t addr;
+	u64 addr;
 
 	skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
 	if (!skb)
@@ -136,10 +133,9 @@ static int ipoib_alloc_rx_skb(struct net
 	 */
 	skb_reserve(skb, 4);
 
-	addr = dma_map_single(priv->ca->dma_device,
-			      skb->data, IPOIB_BUF_SIZE,
-			      DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(addr))) {
+	addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE,
+				 DMA_FROM_DEVICE);
+	if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
 		dev_kfree_skb_any(skb);
 		return -EIO;
 	}
@@ -169,117 +165,129 @@ static int ipoib_ib_post_receives(struct
 	return 0;
 }
 
-static void ipoib_ib_handle_wc(struct net_device *dev,
-			       struct ib_wc *wc)
+static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	unsigned int wr_id = wc->wr_id;
+	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
+	struct sk_buff *skb;
+	u64 addr;
 
-	ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
+	ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
 		       wr_id, wc->opcode, wc->status);
 
-	if (wr_id & IPOIB_OP_RECV) {
-		wr_id &= ~IPOIB_OP_RECV;
+	if (unlikely(wr_id >= ipoib_recvq_size)) {
+		ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
+			   wr_id, ipoib_recvq_size);
+		return;
+	}
 
-		if (wr_id < ipoib_recvq_size) {
-			struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
-			dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
-
-			if (unlikely(wc->status != IB_WC_SUCCESS)) {
-				if (wc->status != IB_WC_WR_FLUSH_ERR)
-					ipoib_warn(priv, "failed recv event "
-						   "(status=%d, wrid=%d vend_err %x)\n",
-						   wc->status, wr_id, wc->vendor_err);
-				dma_unmap_single(priv->ca->dma_device, addr,
-						 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
-				dev_kfree_skb_any(skb);
-				priv->rx_ring[wr_id].skb = NULL;
-				return;
-			}
+	skb  = priv->rx_ring[wr_id].skb;
+	addr = priv->rx_ring[wr_id].mapping;
 
-			/*
-			 * If we can't allocate a new RX buffer, dump
-			 * this packet and reuse the old buffer.
-			 */
-			if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
-				++priv->stats.rx_dropped;
-				goto repost;
-			}
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			ipoib_warn(priv, "failed recv event "
+				   "(status=%d, wrid=%d vend_err %x)\n",
+				   wc->status, wr_id, wc->vendor_err);
+		ib_dma_unmap_single(priv->ca, addr,
+				    IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+		dev_kfree_skb_any(skb);
+		priv->rx_ring[wr_id].skb = NULL;
+		return;
+	}
 
-			ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
-				       wc->byte_len, wc->slid);
+	/*
+	 * If we can't allocate a new RX buffer, dump
+	 * this packet and reuse the old buffer.
+	 */
+	if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+		++priv->stats.rx_dropped;
+		goto repost;
+	}
 
-			dma_unmap_single(priv->ca->dma_device, addr,
-					 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+		       wc->byte_len, wc->slid);
 
-			skb_put(skb, wc->byte_len);
-			skb_pull(skb, IB_GRH_BYTES);
+	ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 
-			if (wc->slid != priv->local_lid ||
-			    wc->src_qp != priv->qp->qp_num) {
-				skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-				skb->mac.raw = skb->data;
-				skb_pull(skb, IPOIB_ENCAP_LEN);
-
-				dev->last_rx = jiffies;
-				++priv->stats.rx_packets;
-				priv->stats.rx_bytes += skb->len;
-
-				skb->dev = dev;
-				/* XXX get correct PACKET_ type here */
-				skb->pkt_type = PACKET_HOST;
-				netif_rx_ni(skb);
-			} else {
-				ipoib_dbg_data(priv, "dropping loopback packet\n");
-				dev_kfree_skb_any(skb);
-			}
+	skb_put(skb, wc->byte_len);
+	skb_pull(skb, IB_GRH_BYTES);
 
-		repost:
-			if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
-				ipoib_warn(priv, "ipoib_ib_post_receive failed "
-					   "for buf %d\n", wr_id);
-		} else
-			ipoib_warn(priv, "completion event with wrid %d\n",
-				   wr_id);
+	if (wc->slid != priv->local_lid ||
+	    wc->src_qp != priv->qp->qp_num) {
+		skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+		skb->mac.raw = skb->data;
+		skb_pull(skb, IPOIB_ENCAP_LEN);
 
+		dev->last_rx = jiffies;
+		++priv->stats.rx_packets;
+		priv->stats.rx_bytes += skb->len;
+
+		skb->dev = dev;
+		/* XXX get correct PACKET_ type here */
+		skb->pkt_type = PACKET_HOST;
+		netif_rx_ni(skb);
 	} else {
-		struct ipoib_tx_buf *tx_req;
-		unsigned long flags;
+		ipoib_dbg_data(priv, "dropping loopback packet\n");
+		dev_kfree_skb_any(skb);
+	}
 
-		if (wr_id >= ipoib_sendq_size) {
-			ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
-				   wr_id, ipoib_sendq_size);
-			return;
-		}
+repost:
+	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
+		ipoib_warn(priv, "ipoib_ib_post_receive failed "
+			   "for buf %d\n", wr_id);
+}
 
-		ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
+static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	unsigned int wr_id = wc->wr_id;
+	struct ipoib_tx_buf *tx_req;
+	unsigned long flags;
+
+	ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
+		       wr_id, wc->opcode, wc->status);
 
-		tx_req = &priv->tx_ring[wr_id];
+	if (unlikely(wr_id >= ipoib_sendq_size)) {
+		ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
+			   wr_id, ipoib_sendq_size);
+		return;
+	}
 
-		dma_unmap_single(priv->ca->dma_device,
-				 pci_unmap_addr(tx_req, mapping),
-				 tx_req->skb->len,
-				 DMA_TO_DEVICE);
+	tx_req = &priv->tx_ring[wr_id];
 
-		++priv->stats.tx_packets;
-		priv->stats.tx_bytes += tx_req->skb->len;
+	ib_dma_unmap_single(priv->ca, tx_req->mapping,
+			    tx_req->skb->len, DMA_TO_DEVICE);
 
-		dev_kfree_skb_any(tx_req->skb);
+	++priv->stats.tx_packets;
+	priv->stats.tx_bytes += tx_req->skb->len;
 
-		spin_lock_irqsave(&priv->tx_lock, flags);
-		++priv->tx_tail;
-		if (netif_queue_stopped(dev) &&
-		    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
-		    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
-			netif_wake_queue(dev);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
-
-		if (wc->status != IB_WC_SUCCESS &&
-		    wc->status != IB_WC_WR_FLUSH_ERR)
-			ipoib_warn(priv, "failed send event "
-				   "(status=%d, wrid=%d vend_err %x)\n",
-				   wc->status, wr_id, wc->vendor_err);
+	dev_kfree_skb_any(tx_req->skb);
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	++priv->tx_tail;
+	if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) &&
+	    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) {
+		clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+		netif_wake_queue(dev);
 	}
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	if (wc->status != IB_WC_SUCCESS &&
+	    wc->status != IB_WC_WR_FLUSH_ERR)
+		ipoib_warn(priv, "failed send event "
+			   "(status=%d, wrid=%d vend_err %x)\n",
+			   wc->status, wr_id, wc->vendor_err);
+}
+
+static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	if (wc->wr_id & IPOIB_CM_OP_SRQ)
+		ipoib_cm_handle_rx_wc(dev, wc);
+	else if (wc->wr_id & IPOIB_OP_RECV)
+		ipoib_ib_handle_rx_wc(dev, wc);
+	else
+		ipoib_ib_handle_tx_wc(dev, wc);
 }
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
@@ -299,7 +307,7 @@ void ipoib_ib_completion(struct ib_cq *c
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    unsigned int wr_id,
 			    struct ib_ah *address, u32 qpn,
-			    dma_addr_t addr, int len)
+			    u64 addr, int len)
 {
 	struct ib_send_wr *bad_wr;
 
@@ -318,14 +326,14 @@ void ipoib_send(struct net_device *dev, 
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_tx_buf *tx_req;
-	dma_addr_t addr;
+	u64 addr;
 
-	if (skb->len > dev->mtu + INFINIBAND_ALEN) {
+	if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
 		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
-			   skb->len, dev->mtu + INFINIBAND_ALEN);
+			   skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
 		++priv->stats.tx_dropped;
 		++priv->stats.tx_errors;
-		dev_kfree_skb_any(skb);
+		ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
 		return;
 	}
 
@@ -341,16 +349,20 @@ void ipoib_send(struct net_device *dev, 
 	 */
 	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
 	tx_req->skb = skb;
-	addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
-			      DMA_TO_DEVICE);
-	pci_unmap_addr_set(tx_req, mapping, addr);
+	addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
+				 DMA_TO_DEVICE);
+	if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+		++priv->stats.tx_errors;
+		dev_kfree_skb_any(skb);
+		return;
+	}
+	tx_req->mapping = addr;
 
 	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
 			       address->ah, qpn, addr, skb->len))) {
 		ipoib_warn(priv, "post_send failed\n");
 		++priv->stats.tx_errors;
-		dma_unmap_single(priv->ca->dma_device, addr, skb->len,
-				 DMA_TO_DEVICE);
+		ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
 		dev_kfree_skb_any(skb);
 	} else {
 		dev->trans_start = jiffies;
@@ -361,6 +373,7 @@ void ipoib_send(struct net_device *dev, 
 		if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
 			ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
 			netif_stop_queue(dev);
+			set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
 		}
 	}
 }
@@ -399,6 +412,18 @@ int ipoib_ib_dev_open(struct net_device 
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
 
+	/*
+	 * Search through the port P_Key table for the requested pkey value.
+	 * The port has to be assigned to the respective IB partition in
+	 * advance.
+	 */
+	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
+		ipoib_warn(priv, "pkey 0x%04x nof found\n", priv->pkey);
+		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+		return -1;
+	}
+	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
 	ret = ipoib_init_qp(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -408,7 +433,14 @@ int ipoib_ib_dev_open(struct net_device 
 	ret = ipoib_ib_post_receives(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
-		ipoib_ib_dev_stop(dev);
+		ipoib_ib_dev_stop(dev, 1);
+		return -1;
+	}
+
+	ret = ipoib_cm_dev_open(dev);
+	if (ret) {
+		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
+		ipoib_ib_dev_stop(dev, 1);
 		return -1;
 	}
 
@@ -460,7 +492,7 @@ int ipoib_ib_dev_down(struct net_device 
 	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
 		mutex_lock(&pkey_mutex);
 		set_bit(IPOIB_PKEY_STOP, &priv->flags);
-		cancel_delayed_work(&priv->pkey_task);
+		cancel_delayed_work(&priv->pkey_poll_task);
 		mutex_unlock(&pkey_mutex);
 		if (flush)
 			flush_workqueue(ipoib_workqueue);
@@ -487,7 +519,7 @@ static int recvs_pending(struct net_devi
 	return pending;
 }
 
-int ipoib_ib_dev_stop(struct net_device *dev)
+int ipoib_ib_dev_stop(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_attr qp_attr;
@@ -497,6 +529,8 @@ int ipoib_ib_dev_stop(struct net_device 
 
 	clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
 
+	ipoib_cm_dev_stop(dev);
+
 	/*
 	 * Move our QP to the error state and then reinitialize in
 	 * when all work requests have completed or have been flushed.
@@ -520,24 +554,27 @@ int ipoib_ib_dev_stop(struct net_device 
 			while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
 				tx_req = &priv->tx_ring[priv->tx_tail &
 							(ipoib_sendq_size - 1)];
-				dma_unmap_single(priv->ca->dma_device,
-						 pci_unmap_addr(tx_req, mapping),
-						 tx_req->skb->len,
-						 DMA_TO_DEVICE);
+				ib_dma_unmap_single(priv->ca,
+						    tx_req->mapping,
+						    tx_req->skb->len,
+						    DMA_TO_DEVICE);
 				dev_kfree_skb_any(tx_req->skb);
 				++priv->tx_tail;
 			}
 
-			for (i = 0; i < ipoib_recvq_size; ++i)
-				if (priv->rx_ring[i].skb) {
-					dma_unmap_single(priv->ca->dma_device,
-							 pci_unmap_addr(&priv->rx_ring[i],
-									mapping),
-							 IPOIB_BUF_SIZE,
-							 DMA_FROM_DEVICE);
-					dev_kfree_skb_any(priv->rx_ring[i].skb);
-					priv->rx_ring[i].skb = NULL;
-				}
+			for (i = 0; i < ipoib_recvq_size; ++i) {
+				struct ipoib_rx_buf *rx_req;
+
+				rx_req = &priv->rx_ring[i];
+				if (!rx_req->skb)
+					continue;
+				ib_dma_unmap_single(priv->ca,
+						    rx_req->mapping,
+						    IPOIB_BUF_SIZE,
+						    DMA_FROM_DEVICE);
+				dev_kfree_skb_any(rx_req->skb);
+				rx_req->skb = NULL;
+			}
 
 			goto timeout;
 		}
@@ -555,7 +592,8 @@ timeout:
 	/* Wait for all AHs to be reaped */
 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
 	cancel_delayed_work(&priv->ah_reap_task);
-	flush_workqueue(ipoib_workqueue);
+	if (flush)
+		flush_workqueue(ipoib_workqueue);
 
 	begin = jiffies;
 
@@ -596,12 +634,22 @@ int ipoib_ib_dev_init(struct net_device 
 	return 0;
 }
 
-void ipoib_ib_dev_flush(void *_dev)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
 {
-	struct net_device *dev = (struct net_device *)_dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv;
+	struct ipoib_dev_priv *cpriv;
+	struct net_device *dev = priv->dev;
+	u16 new_index;
 
-	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
+	mutex_lock(&priv->vlan_mutex);
+
+	/* Flush any child interfaces too -
+ 	 * they might be up even if the parent is down */
+ 	list_for_each_entry(cpriv, &priv->child_intfs, list)
+		__ipoib_ib_dev_flush(cpriv, pkey_event);
+
+	mutex_unlock(&priv->vlan_mutex);
+
+	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
 		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
 		return;
 	}
@@ -611,10 +659,32 @@ void ipoib_ib_dev_flush(void *_dev)
 		return;
 	}
 
+	if (pkey_event) {
+		if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
+			clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+			ipoib_ib_dev_down(dev, 0);
+			ipoib_pkey_dev_delay_open(dev);
+			return;
+		}
+		set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
+		/* restart qp only of pkey index is cahnged */
+		if (new_index == priv->pkey_index) {
+			ipoib_dbg(priv, "Not flushing - pkey index not changed.\n");
+			return;
+		}
+		priv->pkey_index = new_index;
+	}
+
 	ipoib_dbg(priv, "flushing\n");
 
 	ipoib_ib_dev_down(dev, 0);
 
+	if (pkey_event) {
+		ipoib_ib_dev_stop(dev, 0);
+		ipoib_ib_dev_open(dev);
+	}
+
 	/*
 	 * The device could have been brought down between the start and when
 	 * we get here, don't bring it back up if it's not configured up
@@ -623,14 +693,24 @@ void ipoib_ib_dev_flush(void *_dev)
 		ipoib_ib_dev_up(dev);
 		ipoib_mcast_restart_task(dev);
 	}
+}
 
-	mutex_lock(&priv->vlan_mutex);
+void ipoib_ib_dev_flush(void *_dev)
+{
+	struct net_device *dev = (struct net_device *)_dev;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	/* Flush any child interfaces too */
-	list_for_each_entry(cpriv, &priv->child_intfs, list)
-		ipoib_ib_dev_flush(cpriv->dev);
+	ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
+	__ipoib_ib_dev_flush(priv, 0);
+}
 
-	mutex_unlock(&priv->vlan_mutex);
+void ipoib_pkey_event(void *_dev)
+{
+	struct net_device *dev = (struct net_device *)_dev;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	ipoib_dbg(priv, "Flushing %s and restarting it's QP\n", priv->dev->name);
+	__ipoib_ib_dev_flush(priv, 1);
 }
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -655,9 +735,9 @@ void ipoib_ib_dev_cleanup(struct net_dev
  * change async notification is available.
  */
 
-void ipoib_pkey_poll(void *dev_ptr)
+void ipoib_pkey_poll(void *_dev)
 {
-	struct net_device *dev = dev_ptr;
+	struct net_device *dev = (struct net_device *)_dev;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
 	ipoib_pkey_dev_check_presence(dev);
@@ -668,7 +748,7 @@ void ipoib_pkey_poll(void *dev_ptr)
 		mutex_lock(&pkey_mutex);
 		if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
 			queue_delayed_work(ipoib_workqueue,
-					   &priv->pkey_task,
+					   &priv->pkey_poll_task,
 					   HZ);
 		mutex_unlock(&pkey_mutex);
 	}
@@ -687,7 +767,7 @@ int ipoib_pkey_dev_delay_open(struct net
 		mutex_lock(&pkey_mutex);
 		clear_bit(IPOIB_PKEY_STOP, &priv->flags);
 		queue_delayed_work(ipoib_workqueue,
-				   &priv->pkey_task,
+				   &priv->pkey_poll_task,
 				   HZ);
 		mutex_unlock(&pkey_mutex);
 		return 1;
--- linux-2.6.18.noarch/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -40,7 +40,6 @@
 
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 
 #include <linux/if_arp.h>	/* For ARPHRD_xxx */
@@ -82,6 +81,8 @@ static const u8 ipv4_bcast_addr[] = {
 
 struct workqueue_struct *ipoib_workqueue;
 
+struct ib_sa_client ipoib_sa_client;
+
 static void ipoib_add_one(struct ib_device *device);
 static void ipoib_remove_one(struct ib_device *device);
 
@@ -106,7 +107,7 @@ int ipoib_open(struct net_device *dev)
 		return -EINVAL;
 
 	if (ipoib_ib_dev_up(dev)) {
-		ipoib_ib_dev_stop(dev);
+		ipoib_ib_dev_stop(dev, 1);
 		return -EINVAL;
 	}
 
@@ -142,6 +143,8 @@ static int ipoib_stop(struct net_device 
 
 	netif_stop_queue(dev);
 
+	clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+
 	/*
 	 * Now flush workqueue to make sure a scheduled task doesn't
 	 * bring our internal state back up.
@@ -149,7 +152,7 @@ static int ipoib_stop(struct net_device 
 	flush_workqueue(ipoib_workqueue);
 
 	ipoib_ib_dev_down(dev, 1);
-	ipoib_ib_dev_stop(dev);
+	ipoib_ib_dev_stop(dev, 1);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		struct ipoib_dev_priv *cpriv;
@@ -175,8 +178,18 @@ static int ipoib_change_mtu(struct net_d
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
+	/* dev->mtu > 2K ==> connected mode */
+	if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) {
+		if (new_mtu > priv->mcast_mtu)
+			ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
+				   priv->mcast_mtu);
+		dev->mtu = new_mtu;
+		return 0;
+	}
+
+	if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) {
 		return -EINVAL;
+	}
 
 	priv->admin_mtu = new_mtu;
 
@@ -263,7 +276,7 @@ static void path_free(struct net_device 
 		if (neigh->ah)
 			ipoib_put_ah(neigh->ah);
 
-		ipoib_neigh_free(neigh);
+		ipoib_neigh_free(dev, neigh);
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -413,6 +426,20 @@ static void path_rec_completion(int stat
 			memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
 			       sizeof(union ib_gid));
 
+			if (ipoib_cm_enabled(dev, neigh->neighbour)) {
+				if (!ipoib_cm_get(neigh))
+					ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
+									       path,
+									       neigh));
+				if (!ipoib_cm_get(neigh)) {
+					list_del(&neigh->list);
+					if (neigh->ah)
+						ipoib_put_ah(neigh->ah);
+					ipoib_neigh_free(dev, neigh);
+					continue;
+				}
+			}
+
 			while ((skb = __skb_dequeue(&neigh->queue)))
 				__skb_queue_tail(&skqueue, skb);
 		}
@@ -489,7 +516,7 @@ static int path_rec_start(struct net_dev
 	init_completion(&path->done);
 
 	path->query_id =
-		ib_sa_path_rec_get(priv->ca, priv->port,
+		ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
 				   &path->pathrec, comp_mask    |
 				   IB_SA_PATH_REC_DGID		|
 				   IB_SA_PATH_REC_SGID		|
@@ -513,15 +540,13 @@ static void neigh_add_path(struct sk_buf
 	struct ipoib_path *path;
 	struct ipoib_neigh *neigh;
 
-	neigh = ipoib_neigh_alloc(skb->dst->neighbour);
+	neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
 	if (!neigh) {
 		++priv->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
 		return;
 	}
 
-	skb_queue_head_init(&neigh->queue);
-
 	/*
 	 * We can only be called from ipoib_start_xmit, so we're
 	 * inside tx_lock -- no need to save/restore flags.
@@ -545,14 +570,32 @@ static void neigh_add_path(struct sk_buf
 		memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
 		       sizeof(union ib_gid));
 
-		ipoib_send(dev, skb, path->ah,
-			   be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
+		if (ipoib_cm_enabled(dev, neigh->neighbour)) {
+			if (!ipoib_cm_get(neigh))
+				ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
+			if (!ipoib_cm_get(neigh)) {
+				list_del(&neigh->list);
+				if (neigh->ah)
+					ipoib_put_ah(neigh->ah);
+				ipoib_neigh_free(dev, neigh);
+				goto err_drop;
+			}
+			if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+				__skb_queue_tail(&neigh->queue, skb);
+			else {
+				ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
+					   skb_queue_len(&neigh->queue));
+				goto err_drop;
+			}
+		} else
+			ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
 	} else {
 		neigh->ah  = NULL;
-		__skb_queue_tail(&neigh->queue, skb);
 
 		if (!path->query && path_rec_start(dev, path))
 			goto err_list;
+
+		__skb_queue_tail(&neigh->queue, skb);
 	}
 
 	spin_unlock(&priv->lock);
@@ -562,7 +605,8 @@ err_list:
 	list_del(&neigh->list);
 
 err_path:
-	ipoib_neigh_free(neigh);
+	ipoib_neigh_free(dev, neigh);
+err_drop:
 	++priv->stats.tx_dropped;
 	dev_kfree_skb_any(skb);
 
@@ -624,8 +668,7 @@ static void unicast_arp_send(struct sk_b
 		ipoib_dbg(priv, "Send unicast ARP to %04x\n",
 			  be16_to_cpu(path->pathrec.dlid));
 
-		ipoib_send(dev, skb, path->ah,
-			   be32_to_cpup((__be32 *) phdr->hwaddr));
+		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
 	} else if ((path->query || !path_rec_start(dev, path)) &&
 		   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 		/* put pseudoheader back on for next time */
@@ -645,7 +688,7 @@ static int ipoib_start_xmit(struct sk_bu
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
 
-	if (!spin_trylock_irqsave(&priv->tx_lock, flags))
+	if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
 		return NETDEV_TX_LOCKED;
 
 	/*
@@ -658,7 +701,7 @@ static int ipoib_start_xmit(struct sk_bu
 		return NETDEV_TX_BUSY;
 	}
 
-	if (skb->dst && skb->dst->neighbour) {
+	if (likely(skb->dst && skb->dst->neighbour)) {
 		if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
 			ipoib_path_lookup(skb, dev);
 			goto out;
@@ -666,7 +709,12 @@ static int ipoib_start_xmit(struct sk_bu
 
 		neigh = *to_ipoib_neigh(skb->dst->neighbour);
 
-		if (likely(neigh->ah)) {
+		if (ipoib_cm_get(neigh)) {
+			if (ipoib_cm_up(neigh)) {
+				ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
+				goto out;
+			}
+		} else if (neigh->ah) {
 			if (unlikely(memcmp(&neigh->dgid.raw,
 					    skb->dst->neighbour->ha + 4,
 					    sizeof(union ib_gid)))) {
@@ -680,14 +728,13 @@ static int ipoib_start_xmit(struct sk_bu
 				 */
 				ipoib_put_ah(neigh->ah);
 				list_del(&neigh->list);
-				ipoib_neigh_free(neigh);
+				ipoib_neigh_free(dev, neigh);
 				spin_unlock(&priv->lock);
 				ipoib_path_lookup(skb, dev);
 				goto out;
 			}
 
-			ipoib_send(dev, skb, neigh->ah,
-				   be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
+			ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
 			goto out;
 		}
 
@@ -719,7 +766,7 @@ static int ipoib_start_xmit(struct sk_bu
 					   IPOIB_GID_FMT "\n",
 					   skb->dst ? "neigh" : "dst",
 					   be16_to_cpup((__be16 *) skb->data),
-					   be32_to_cpup((__be32 *) phdr->hwaddr),
+					   IPOIB_QPN(phdr->hwaddr),
 					   IPOIB_GID_RAW_ARG(phdr->hwaddr + 4));
 				dev_kfree_skb_any(skb);
 				++priv->stats.tx_dropped;
@@ -800,9 +847,22 @@ static void ipoib_neigh_destructor(struc
 	unsigned long flags;
 	struct ipoib_ah *ah = NULL;
 
+        if (n->dev->type != ARPHRD_INFINIBAND)
+                return;
+
+	if (n->dev->flags & IFF_MASTER) {
+		/* n->dev is not an IPoIB device and we have to take priv from elsewhere */
+		neigh = *to_ipoib_neigh(n);
+		if (neigh){
+			priv = netdev_priv(neigh->dev);
+			ipoib_dbg(priv, "neigh_destructor for bonding device: %s\n",
+				  n->dev->name);
+		} else
+			return;
+	}
 	ipoib_dbg(priv,
 		  "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
-		  be32_to_cpup((__be32 *) n->ha),
+		  IPOIB_QPN(n->ha),
 		  IPOIB_GID_RAW_ARG(n->ha + 4));
 
 	spin_lock_irqsave(&priv->lock, flags);
@@ -812,7 +872,7 @@ static void ipoib_neigh_destructor(struc
 		if (neigh->ah)
 			ah = neigh->ah;
 		list_del(&neigh->list);
-		ipoib_neigh_free(neigh);
+		ipoib_neigh_free(n->dev, neigh);
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -821,7 +881,9 @@ static void ipoib_neigh_destructor(struc
 		ipoib_put_ah(ah);
 }
 
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
+				      struct net_device *dev)
+
 {
 	struct ipoib_neigh *neigh;
 
@@ -830,14 +892,25 @@ struct ipoib_neigh *ipoib_neigh_alloc(st
 		return NULL;
 
 	neigh->neighbour = neighbour;
+	neigh->dev = dev;
 	*to_ipoib_neigh(neighbour) = neigh;
+	skb_queue_head_init(&neigh->queue);
+	ipoib_cm_set(neigh, NULL);
 
 	return neigh;
 }
 
-void ipoib_neigh_free(struct ipoib_neigh *neigh)
+void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
 {
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct sk_buff *skb;
 	*to_ipoib_neigh(neigh->neighbour) = NULL;
+	while ((skb = __skb_dequeue(&neigh->queue))) {
+		++priv->stats.tx_dropped;
+		dev_kfree_skb_any(skb);
+	}
+	if (ipoib_cm_get(neigh))
+		ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
 	kfree(neigh);
 }
 
@@ -959,11 +1032,12 @@ static void ipoib_setup(struct net_devic
 	INIT_LIST_HEAD(&priv->dead_ahs);
 	INIT_LIST_HEAD(&priv->multicast_list);
 
-	INIT_WORK(&priv->pkey_task,    ipoib_pkey_poll,          priv->dev);
-	INIT_WORK(&priv->mcast_task,   ipoib_mcast_join_task,    priv->dev);
-	INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush,       priv->dev);
-	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev);
-	INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah,            priv->dev);
+	INIT_WORK(&priv->pkey_poll_task, ipoib_pkey_poll,	priv->dev);
+	INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event,	priv->dev);
+	INIT_WORK(&priv->mcast_task,   ipoib_mcast_join_task,	priv->dev);
+	INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush,	priv->dev);
+	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task,priv->dev);
+	INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah,		priv->dev);
 }
 
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
@@ -1101,6 +1175,8 @@ static struct net_device *ipoib_add_port
 
 	ipoib_create_debug_files(priv->dev);
 
+	if (ipoib_cm_add_mode_attr(priv->dev))
+		goto sysfs_failed;
 	if (ipoib_add_pkey_attr(priv->dev))
 		goto sysfs_failed;
 	if (class_device_create_file(&priv->dev->class_dev,
@@ -1137,13 +1213,16 @@ static void ipoib_add_one(struct ib_devi
 	struct ipoib_dev_priv *priv;
 	int s, e, p;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
 	if (!dev_list)
 		return;
 
 	INIT_LIST_HEAD(dev_list);
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
@@ -1167,6 +1246,9 @@ static void ipoib_remove_one(struct ib_d
 	struct ipoib_dev_priv *priv, *tmp;
 	struct list_head *dev_list;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	dev_list = ib_get_client_data(device, &ipoib_client);
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
@@ -1211,13 +1293,16 @@ static int __init ipoib_init_module(void
 		goto err_fs;
 	}
 
+	ib_sa_register_client(&ipoib_sa_client);
+
 	ret = ib_register_client(&ipoib_client);
 	if (ret)
-		goto err_wq;
+		goto err_sa;
 
 	return 0;
 
-err_wq:
+err_sa:
+	ib_sa_unregister_client(&ipoib_sa_client);
 	destroy_workqueue(ipoib_workqueue);
 
 err_fs:
@@ -1229,6 +1314,7 @@ err_fs:
 static void __exit ipoib_cleanup_module(void)
 {
 	ib_unregister_client(&ipoib_client);
+	ib_sa_unregister_client(&ipoib_sa_client);
 	ipoib_unregister_debugfs();
 	destroy_workqueue(ipoib_workqueue);
 }
--- linux-2.6.18.noarch/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -60,14 +60,11 @@ static DEFINE_MUTEX(mcast_mutex);
 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
 struct ipoib_mcast {
 	struct ib_sa_mcmember_rec mcmember;
+	struct ib_sa_multicast	 *mc;
 	struct ipoib_ah          *ah;
 
 	struct rb_node    rb_node;
 	struct list_head  list;
-	struct completion done;
-
-	int                 query_id;
-	struct ib_sa_query *query;
 
 	unsigned long created;
 	unsigned long backoff;
@@ -114,7 +111,7 @@ static void ipoib_mcast_free(struct ipoi
 		 */
 		if (neigh->ah)
 			ipoib_put_ah(neigh->ah);
-		ipoib_neigh_free(neigh);
+		ipoib_neigh_free(dev, neigh);
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
@@ -299,18 +296,22 @@ static int ipoib_mcast_join_finish(struc
 	return 0;
 }
 
-static void
+static int
 ipoib_mcast_sendonly_join_complete(int status,
-				   struct ib_sa_mcmember_rec *mcmember,
-				   void *mcast_ptr)
+				   struct ib_sa_multicast *multicast)
 {
-	struct ipoib_mcast *mcast = mcast_ptr;
+	struct ipoib_mcast *mcast = multicast->context;
 	struct net_device *dev = mcast->dev;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
+	/* We trap for port events ourselves. */
+	if (status == -ENETRESET)
+		return 0;
+
 	if (!status)
-		ipoib_mcast_join_finish(mcast, mcmember);
-	else {
+		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
+	
+	if (status) {
 		if (mcast->logcount++ < 20)
 			ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
 					IPOIB_GID_FMT ", status %d\n",
@@ -325,10 +326,10 @@ ipoib_mcast_sendonly_join_complete(int s
 		spin_unlock_irq(&priv->tx_lock);
 
 		/* Clear the busy flag so we try again */
-		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+		status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
+					    &mcast->flags);
 	}
-
-	complete(&mcast->done);
+	return status;
 }
 
 static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
@@ -358,35 +359,33 @@ static int ipoib_mcast_sendonly_join(str
 	rec.port_gid = priv->local_gid;
 	rec.pkey     = cpu_to_be16(priv->pkey);
 
-	init_completion(&mcast->done);
-
-	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
-				     IB_SA_MCMEMBER_REC_MGID		|
-				     IB_SA_MCMEMBER_REC_PORT_GID	|
-				     IB_SA_MCMEMBER_REC_PKEY		|
-				     IB_SA_MCMEMBER_REC_JOIN_STATE,
-				     1000, GFP_ATOMIC,
-				     ipoib_mcast_sendonly_join_complete,
-				     mcast, &mcast->query);
-	if (ret < 0) {
-		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n",
+	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
+					 priv->port, &rec,
+					 IB_SA_MCMEMBER_REC_MGID	|
+					 IB_SA_MCMEMBER_REC_PORT_GID	|
+					 IB_SA_MCMEMBER_REC_PKEY	|
+					 IB_SA_MCMEMBER_REC_JOIN_STATE,
+					 GFP_ATOMIC,
+					 ipoib_mcast_sendonly_join_complete,
+					 mcast);
+	if (IS_ERR(mcast->mc)) {
+		ret = PTR_ERR(mcast->mc);
+		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
 			   ret);
 	} else {
 		ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT
 				", starting join\n",
 				IPOIB_GID_ARG(mcast->mcmember.mgid));
-
-		mcast->query_id = ret;
 	}
 
 	return ret;
 }
 
-static void ipoib_mcast_join_complete(int status,
-				      struct ib_sa_mcmember_rec *mcmember,
-				      void *mcast_ptr)
+static int ipoib_mcast_join_complete(int status,
+				     struct ib_sa_multicast *multicast)
 {
-	struct ipoib_mcast *mcast = mcast_ptr;
+	struct ipoib_mcast *mcast = multicast->context;
 	struct net_device *dev = mcast->dev;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -394,23 +393,28 @@ static void ipoib_mcast_join_complete(in
 			" (status %d)\n",
 			IPOIB_GID_ARG(mcast->mcmember.mgid), status);
 
-	if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) {
+	/* We trap for port events ourselves. */
+	if (status == -ENETRESET)
+		return 0;
+
+	if (!status)
+		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
+
+	if (!status) {
 		mcast->backoff = 1;
 		mutex_lock(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
 			queue_work(ipoib_workqueue, &priv->mcast_task);
 		mutex_unlock(&mcast_mutex);
-		complete(&mcast->done);
-		return;
-	}
 
-	if (status == -EINTR) {
-		complete(&mcast->done);
-		return;
+		if (mcast == priv->broadcast)
+			netif_carrier_on(dev);
+
+		return 0;
 	}
 
-	if (status && mcast->logcount++ < 20) {
-		if (status == -ETIMEDOUT || status == -EINTR) {
+	if (mcast->logcount++ < 20) {
+		if (status == -ETIMEDOUT) {
 			ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT
 					", status %d\n",
 					IPOIB_GID_ARG(mcast->mcmember.mgid),
@@ -427,23 +431,18 @@ static void ipoib_mcast_join_complete(in
 	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
 		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
-	mutex_lock(&mcast_mutex);
+	/* Clear the busy flag so we try again */
+	status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 
+	mutex_lock(&mcast_mutex);
 	spin_lock_irq(&priv->lock);
-	mcast->query = NULL;
-
-	if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
-		if (status == -ETIMEDOUT)
-			queue_work(ipoib_workqueue, &priv->mcast_task);
-		else
-			queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
-					   mcast->backoff * HZ);
-	} else
-		complete(&mcast->done);
+	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
+		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+				   mcast->backoff * HZ);
 	spin_unlock_irq(&priv->lock);
 	mutex_unlock(&mcast_mutex);
 
-	return;
+	return status;
 }
 
 static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
@@ -471,36 +470,35 @@ static void ipoib_mcast_join(struct net_
 
 	if (create) {
 		comp_mask |=
-			IB_SA_MCMEMBER_REC_QKEY          |
-			IB_SA_MCMEMBER_REC_SL		 |
-			IB_SA_MCMEMBER_REC_FLOW_LABEL	 |
-			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
-			IB_SA_MCMEMBER_REC_RATE_SELECTOR |
-			IB_SA_MCMEMBER_REC_RATE          |
-			IB_SA_MCMEMBER_REC_HOP_LIMIT     |
-			IB_SA_MCMEMBER_REC_MTU_SELECTOR  |
-			IB_SA_MCMEMBER_REC_MTU;
+			IB_SA_MCMEMBER_REC_QKEY			|
+			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
+			IB_SA_MCMEMBER_REC_MTU			|
+			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS	|
+			IB_SA_MCMEMBER_REC_RATE_SELECTOR	|
+			IB_SA_MCMEMBER_REC_RATE			|
+			IB_SA_MCMEMBER_REC_SL			|
+			IB_SA_MCMEMBER_REC_FLOW_LABEL		|
+			IB_SA_MCMEMBER_REC_HOP_LIMIT;
 
 		rec.qkey	  = priv->broadcast->mcmember.qkey;
-		rec.sl		  = priv->broadcast->mcmember.sl;
-		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
+		rec.mtu_selector  = IB_SA_EQ;
+		rec.mtu		  = priv->broadcast->mcmember.mtu;
 		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
 		rec.rate_selector = IB_SA_EQ;
-		rec.rate          = priv->broadcast->mcmember.rate;
-		rec.hop_limit     = priv->broadcast->mcmember.hop_limit;
-		rec.mtu_selector  = IB_SA_EQ;
-		rec.mtu           = priv->broadcast->mcmember.mtu;
+		rec.rate	  = priv->broadcast->mcmember.rate;
+		rec.sl		  = priv->broadcast->mcmember.sl;
+		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
+		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
 	}
 
-	init_completion(&mcast->done);
-
-	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
-				     mcast->backoff * 1000, GFP_ATOMIC,
-				     ipoib_mcast_join_complete,
-				     mcast, &mcast->query);
-
-	if (ret < 0) {
-		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
+	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
+					 &rec, comp_mask, GFP_KERNEL,
+					 ipoib_mcast_join_complete, mcast);
+	if (IS_ERR(mcast->mc)) {
+		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+		ret = PTR_ERR(mcast->mc);
+		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
 
 		mcast->backoff *= 2;
 		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
@@ -512,8 +510,7 @@ static void ipoib_mcast_join(struct net_
 					   &priv->mcast_task,
 					   mcast->backoff * HZ);
 		mutex_unlock(&mcast_mutex);
-	} else
-		mcast->query_id = ret;
+	}
 }
 
 void ipoib_mcast_join_task(void *dev_ptr)
@@ -564,7 +561,8 @@ void ipoib_mcast_join_task(void *dev_ptr
 	}
 
 	if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
-		ipoib_mcast_join(dev, priv->broadcast, 0);
+		if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+			ipoib_mcast_join(dev, priv->broadcast, 0);
 		return;
 	}
 
@@ -593,12 +591,13 @@ void ipoib_mcast_join_task(void *dev_ptr
 
 	priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -
 		IPOIB_ENCAP_LEN;
-	dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
+
+	if (!ipoib_cm_admin_enabled(dev))
+		dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
 
 	ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
 
 	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
-	netif_carrier_on(dev);
 }
 
 int ipoib_mcast_start_thread(struct net_device *dev)
@@ -619,26 +618,9 @@ int ipoib_mcast_start_thread(struct net_
 	return 0;
 }
 
-static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
-				struct ipoib_mcast *mcast)
-{
-	spin_lock_irq(&priv->lock);
-	if (mcast && mcast->query) {
-		ib_sa_cancel_query(mcast->query_id, mcast->query);
-		mcast->query = NULL;
-		spin_unlock_irq(&priv->lock);
-		ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
-				IPOIB_GID_ARG(mcast->mcmember.mgid));
-		wait_for_completion(&mcast->done);
-	}
-	else
-		spin_unlock_irq(&priv->lock);
-}
-
 int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_mcast *mcast;
 
 	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
 
@@ -654,52 +636,27 @@ int ipoib_mcast_stop_thread(struct net_d
 	if (flush)
 		flush_workqueue(ipoib_workqueue);
 
-	wait_for_mcast_join(priv, priv->broadcast);
-
-	list_for_each_entry(mcast, &priv->multicast_list, list)
-		wait_for_mcast_join(priv, mcast);
-
 	return 0;
 }
 
 static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ib_sa_mcmember_rec rec = {
-		.join_state = 1
-	};
 	int ret = 0;
 
-	if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))
-		return 0;
-
-	ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
-			IPOIB_GID_ARG(mcast->mcmember.mgid));
+	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+		ib_sa_free_multicast(mcast->mc);
 
-	rec.mgid     = mcast->mcmember.mgid;
-	rec.port_gid = priv->local_gid;
-	rec.pkey     = cpu_to_be16(priv->pkey);
-
-	/* Remove ourselves from the multicast group */
-	ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
-				 &mcast->mcmember.mgid);
-	if (ret)
-		ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+		ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
+				IPOIB_GID_ARG(mcast->mcmember.mgid));
 
-	/*
-	 * Just make one shot at leaving and don't wait for a reply;
-	 * if we fail, too bad.
-	 */
-	ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec,
-					IB_SA_MCMEMBER_REC_MGID		|
-					IB_SA_MCMEMBER_REC_PORT_GID	|
-					IB_SA_MCMEMBER_REC_PKEY		|
-					IB_SA_MCMEMBER_REC_JOIN_STATE,
-					0, GFP_ATOMIC, NULL,
-					mcast, &mcast->query);
-	if (ret < 0)
-		ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
-			   "for leave (result = %d)\n", ret);
+		/* Remove ourselves from the multicast group */
+		ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
+					 &mcast->mcmember.mgid);
+		if (ret)
+			ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+	}
 
 	return 0;
 }
@@ -752,7 +709,7 @@ void ipoib_mcast_send(struct net_device 
 			dev_kfree_skb_any(skb);
 		}
 
-		if (mcast->query)
+		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
 			ipoib_dbg_mcast(priv, "no address vector, "
 					"but multicast join already started\n");
 		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -770,7 +727,7 @@ out:
 		if (skb->dst            &&
 		    skb->dst->neighbour &&
 		    !*to_ipoib_neigh(skb->dst->neighbour)) {
-			struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour);
+			struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
 
 			if (neigh) {
 				kref_get(&mcast->ah->ref);
@@ -804,7 +761,7 @@ void ipoib_mcast_dev_flush(struct net_de
 	}
 
 	if (priv->broadcast) {
- 		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
+		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
 		list_add_tail(&priv->broadcast->list, &remove_list);
 		priv->broadcast = NULL;
 	}
@@ -909,7 +866,6 @@ void ipoib_mcast_restart_task(void *dev_
 
 	/* We have to cancel outside of the spinlock */
 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-		wait_for_mcast_join(priv, mcast);
 		ipoib_mcast_leave(mcast->dev, mcast);
 		ipoib_mcast_free(mcast);
 	}
--- linux-2.6.18.noarch/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -33,8 +33,6 @@
  * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <rdma/ib_cache.h>
-
 #include "ipoib.h"
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device
 	if (!qp_attr)
 		goto out;
 
-	if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
+	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 		ret = -ENXIO;
 		goto out;
@@ -94,26 +92,17 @@ int ipoib_init_qp(struct net_device *dev
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
-	u16 pkey_index;
 	struct ib_qp_attr qp_attr;
 	int attr_mask;
 
-	/*
-	 * Search through the port P_Key table for the requested pkey value.
-	 * The port has to be assigned to the respective IB partition in
-	 * advance.
-	 */
-	ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
-	if (ret) {
-		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
-		return ret;
-	}
-	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+	/* Make sure we have a valid pkey_index in priv->pkey_index */
+	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+		return -1;
 
 	qp_attr.qp_state = IB_QPS_INIT;
 	qp_attr.qkey = 0;
 	qp_attr.port_num = priv->port;
-	qp_attr.pkey_index = pkey_index;
+	qp_attr.pkey_index = priv->pkey_index;
 	attr_mask =
 	    IB_QP_QKEY |
 	    IB_QP_PORT |
@@ -168,35 +157,41 @@ int ipoib_transport_dev_init(struct net_
 		.qp_type     = IB_QPT_UD
 	};
 
+	int ret, size;
+
 	priv->pd = ib_alloc_pd(priv->ca);
 	if (IS_ERR(priv->pd)) {
 		printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
 		return -ENODEV;
 	}
 
-	priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
-				ipoib_sendq_size + ipoib_recvq_size + 1);
+	priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(priv->mr)) {
+		printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
+		goto out_free_pd;
+	}
+
+	size = ipoib_sendq_size + ipoib_recvq_size + 1;
+	ret = ipoib_cm_dev_init(dev);
+	if (!ret)
+		size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
+
+	priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size);
 	if (IS_ERR(priv->cq)) {
 		printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
-		goto out_free_pd;
+		goto out_free_mr;
 	}
 
 	if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
 		goto out_free_cq;
 
-	priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
-	if (IS_ERR(priv->mr)) {
-		printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
-		goto out_free_cq;
-	}
-
 	init_attr.send_cq = priv->cq;
 	init_attr.recv_cq = priv->cq,
 
 	priv->qp = ib_create_qp(priv->pd, &init_attr);
 	if (IS_ERR(priv->qp)) {
 		printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
-		goto out_free_mr;
+		goto out_free_cq;
 	}
 
 	priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
@@ -212,12 +207,12 @@ int ipoib_transport_dev_init(struct net_
 
 	return 0;
 
-out_free_mr:
-	ib_dereg_mr(priv->mr);
-
 out_free_cq:
 	ib_destroy_cq(priv->cq);
 
+out_free_mr:
+	ib_dereg_mr(priv->mr);
+
 out_free_pd:
 	ib_dealloc_pd(priv->pd);
 	return -ENODEV;
@@ -235,12 +230,14 @@ void ipoib_transport_dev_cleanup(struct 
 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 	}
 
-	if (ib_dereg_mr(priv->mr))
-		ipoib_warn(priv, "ib_dereg_mr failed\n");
-
 	if (ib_destroy_cq(priv->cq))
 		ipoib_warn(priv, "ib_cq_destroy failed\n");
 
+	ipoib_cm_dev_cleanup(dev);
+
+	if (ib_dereg_mr(priv->mr))
+		ipoib_warn(priv, "ib_dereg_mr failed\n");
+
 	if (ib_dealloc_pd(priv->pd))
 		ipoib_warn(priv, "ib_dealloc_pd failed\n");
 }
@@ -251,13 +248,18 @@ void ipoib_event(struct ib_event_handler
 	struct ipoib_dev_priv *priv =
 		container_of(handler, struct ipoib_dev_priv, event_handler);
 
+	if (record->element.port_num != priv->port)
+		return;
+
 	if (record->event == IB_EVENT_PORT_ERR    ||
-	    record->event == IB_EVENT_PKEY_CHANGE ||
 	    record->event == IB_EVENT_PORT_ACTIVE ||
 	    record->event == IB_EVENT_LID_CHANGE  ||
 	    record->event == IB_EVENT_SM_CHANGE   ||
 	    record->event == IB_EVENT_CLIENT_REREGISTER) {
 		ipoib_dbg(priv, "Port state change event\n");
 		queue_work(ipoib_workqueue, &priv->flush_task);
+	} else if (record->event == IB_EVENT_PKEY_CHANGE) {
+		ipoib_dbg(priv, "pkey change event on port:%d\n", priv->port);
+		queue_work(ipoib_workqueue, &priv->pkey_event_task);
 	}
 }
--- linux-2.6.18.noarch/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -115,6 +115,8 @@ int ipoib_vlan_add(struct net_device *pd
 
 	ipoib_create_debug_files(priv->dev);
 
+	if (ipoib_cm_add_mode_attr(priv->dev))
+		goto sysfs_failed;
 	if (ipoib_add_pkey_attr(priv->dev))
 		goto sysfs_failed;
 
--- linux-2.6.18.noarch/drivers/infiniband/ulp/ipoib/Kconfig
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,6 @@
 config INFINIBAND_IPOIB
 	tristate "IP-over-InfiniBand"
-	depends on INFINIBAND && NETDEVICES && INET
+	depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n)
 	---help---
 	  Support for the IP-over-InfiniBand protocol (IPoIB). This
 	  transports IP packets over InfiniBand so you can use your IB
@@ -8,6 +8,20 @@ config INFINIBAND_IPOIB
 
 	  See Documentation/infiniband/ipoib.txt for more information
 
+config INFINIBAND_IPOIB_CM
+	bool "IP-over-InfiniBand Connected Mode support"
+	depends on INFINIBAND_IPOIB && EXPERIMENTAL
+	default n
+	---help---
+	  This option enables experimental support for IPoIB connected mode.
+	  After enabling this option, you need to switch to connected mode through
+	  /sys/class/net/ibXXX/mode to actually create connections, and then increase
+	  the interface MTU with e.g. ifconfig ib0 mtu 65520.
+
+	  WARNING: Enabling connected mode will trigger some
+	  packet drops for multicast and UD mode traffic from this interface,
+	  unless you limit mtu for these destinations to 2044.
+
 config INFINIBAND_IPOIB_DEBUG
 	bool "IP-over-InfiniBand debugging" if EMBEDDED
 	depends on INFINIBAND_IPOIB
@@ -26,7 +40,7 @@ config INFINIBAND_IPOIB_DEBUG_DATA
 	bool "IP-over-InfiniBand data path debugging"
 	depends on INFINIBAND_IPOIB_DEBUG
 	---help---
-	  This option compiles debugging code into the the data path
+	  This option compiles debugging code into the data path
 	  of the IPoIB driver.  The output can be turned on via the
 	  data_debug_level module parameter; however, even with output
 	  turned off, this debugging code will have some performance
--- linux-2.6.18.noarch/drivers/infiniband/ulp/ipoib/Makefile
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/ipoib/Makefile
@@ -5,5 +5,6 @@ ib_ipoib-y					:= ipoib_main.o \
 						   ipoib_multicast.o \
 						   ipoib_verbs.o \
 						   ipoib_vlan.o
+ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM)		+= ipoib_cm.o
 ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG)	+= ipoib_fs.o
 
--- linux-2.6.18.noarch/drivers/infiniband/ulp/sdp/sdp_bcopy.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c
@@ -37,9 +37,52 @@
 #include <rdma/rdma_cm.h>
 #include "sdp.h"
 
+#define SDP_RESIZE_WAIT 16
+
+struct sdp_chrecvbuf {
+	u32 size;
+};
+
 static int rcvbuf_scale = 0x10;
+
 module_param_named(rcvbuf_scale, rcvbuf_scale, int, 0644);
-MODULE_PARM_DESC(srcvbuf_scale, "Receive buffer size scale factor.");
+MODULE_PARM_DESC(rcvbuf_scale, "Receive buffer size scale factor.");
+
+static int top_mem_usage = 0;
+module_param_named(top_mem_usage, top_mem_usage, int, 0644);
+MODULE_PARM_DESC(top_mem_usage, "Top system wide sdp memory usage for recv (in MB).");
+
+#ifdef CONFIG_PPC
+static int max_large_sockets = 100;
+#else
+static int max_large_sockets = 1000;
+#endif
+module_param_named(max_large_sockets, max_large_sockets, int, 0644);
+MODULE_PARM_DESC(max_large_sockets, "Max number of large sockets (32k buffers).");
+
+static int curr_large_sockets = 0;
+atomic_t sdp_current_mem_usage;
+spinlock_t sdp_large_sockets_lock;
+
+static int sdp_can_resize(void)
+{
+	int count, ret;
+	spin_lock_irq(&sdp_large_sockets_lock);
+	count = curr_large_sockets;
+	ret = curr_large_sockets < max_large_sockets;
+	if (ret)
+		curr_large_sockets++;
+	spin_unlock_irq(&sdp_large_sockets_lock);
+
+	return ret;
+}
+
+void sdp_remove_large_sock(void)
+{
+	spin_lock_irq(&sdp_large_sockets_lock);
+	curr_large_sockets--;
+	spin_unlock_irq(&sdp_large_sockets_lock);
+}
 
 /* Like tcp_fin */
 static void sdp_fin(struct sock *sk)
@@ -70,8 +113,8 @@ void sdp_post_send(struct sdp_sock *ssk,
 	struct sdp_bsdh *h = (struct sdp_bsdh *)skb_push(skb, sizeof *h);
 	unsigned mseq = ssk->tx_head;
 	int i, rc, frags;
-	dma_addr_t addr;
-	struct device *hwdev;
+	u64 addr;
+	struct ib_device *dev;
 	struct ib_sge *sge;
 	struct ib_send_wr *bad_wr;
 
@@ -80,6 +123,7 @@ void sdp_post_send(struct sdp_sock *ssk,
 		h->flags = SDP_OOB_PRES | SDP_OOB_PEND;
 	else
 		h->flags = 0;
+
 	h->bufs = htons(ssk->rx_head - ssk->rx_tail);
 	h->len = htonl(skb->len);
 	h->mseq = htonl(mseq);
@@ -87,27 +131,26 @@ void sdp_post_send(struct sdp_sock *ssk,
 
 	tx_req = &ssk->tx_ring[mseq & (SDP_TX_SIZE - 1)];
 	tx_req->skb = skb;
-	hwdev = ssk->dma_device;
+	dev = ssk->ib_device;
 	sge = ssk->ibsge;
-	addr = dma_map_single(hwdev,
-			      skb->data, skb->len - skb->data_len,
-			      DMA_TO_DEVICE);
+	addr = ib_dma_map_single(dev, skb->data, skb->len - skb->data_len,
+				 DMA_TO_DEVICE);
 	tx_req->mapping[0] = addr;
 
 	/* TODO: proper error handling */
-	BUG_ON(dma_mapping_error(addr));
+	BUG_ON(ib_dma_mapping_error(dev, addr));
 
-	sge->addr = (u64)addr;
+	sge->addr = addr;
 	sge->length = skb->len - skb->data_len;
 	sge->lkey = ssk->mr->lkey;
 	frags = skb_shinfo(skb)->nr_frags;
 	for (i = 0; i < frags; ++i) {
 		++sge;
-		addr = dma_map_page(hwdev, skb_shinfo(skb)->frags[i].page,
-				    skb_shinfo(skb)->frags[i].page_offset,
-				    skb_shinfo(skb)->frags[i].size,
-				    DMA_TO_DEVICE);
-		BUG_ON(dma_mapping_error(addr));
+		addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page,
+				       skb_shinfo(skb)->frags[i].page_offset,
+				       skb_shinfo(skb)->frags[i].size,
+				       DMA_TO_DEVICE);
+		BUG_ON(ib_dma_mapping_error(dev, addr));
 		tx_req->mapping[i + 1] = addr;
 		sge->addr = addr;
 		sge->length = skb_shinfo(skb)->frags[i].size;
@@ -120,7 +163,8 @@ void sdp_post_send(struct sdp_sock *ssk,
 	ssk->tx_wr.num_sge = frags + 1;
 	ssk->tx_wr.opcode = IB_WR_SEND;
 	ssk->tx_wr.send_flags = IB_SEND_SIGNALED;
-	if (unlikely(mid != SDP_MID_DATA))
+	if (unlikely(mid != SDP_MID_DATA) ||
+	    unlikely(TCP_SKB_CB(skb)->flags & TCPCB_URG))
 		ssk->tx_wr.send_flags |= IB_SEND_SOLICITED;
 	rc = ib_post_send(ssk->qp, &ssk->tx_wr, &bad_wr);
 	++ssk->tx_head;
@@ -135,7 +179,7 @@ void sdp_post_send(struct sdp_sock *ssk,
 
 struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq)
 {
-	struct device *hwdev;
+	struct ib_device *dev;
 	struct sdp_buf *tx_req;
 	struct sk_buff *skb;
 	int i, frags;
@@ -146,18 +190,19 @@ struct sk_buff *sdp_send_completion(stru
 		return NULL;
 	}
 
-	hwdev = ssk->dma_device;
+	dev = ssk->ib_device;
         tx_req = &ssk->tx_ring[mseq & (SDP_TX_SIZE - 1)];
 	skb = tx_req->skb;
-	dma_unmap_single(hwdev, tx_req->mapping[0], skb->len - skb->data_len,
-			 DMA_TO_DEVICE);
+	ib_dma_unmap_single(dev, tx_req->mapping[0], skb->len - skb->data_len,
+			    DMA_TO_DEVICE);
 	frags = skb_shinfo(skb)->nr_frags;
 	for (i = 0; i < frags; ++i) {
-		dma_unmap_page(hwdev, tx_req->mapping[i + 1],
-			       skb_shinfo(skb)->frags[i].size,
-			       DMA_TO_DEVICE);
+		ib_dma_unmap_page(dev, tx_req->mapping[i + 1],
+				  skb_shinfo(skb)->frags[i].size,
+				  DMA_TO_DEVICE);
 	}
 
+	ssk->snd_una += TCP_SKB_CB(skb)->end_seq;
 	++ssk->tx_tail;
 	return skb;
 }
@@ -167,8 +212,8 @@ static void sdp_post_recv(struct sdp_soc
 {
 	struct sdp_buf *rx_req;
 	int i, rc, frags;
-	dma_addr_t addr;
-	struct device *hwdev;
+	u64 addr;
+	struct ib_device *dev;
 	struct ib_sge *sge;
 	struct ib_recv_wr *bad_wr;
 	struct sk_buff *skb;
@@ -179,12 +224,12 @@ static void sdp_post_recv(struct sdp_soc
 
 	/* Now, allocate and repost recv */
 	/* TODO: allocate from cache */
-	skb = sk_stream_alloc_skb(&ssk->isk.sk, sizeof(struct sdp_bsdh),
+	skb = sk_stream_alloc_skb(&ssk->isk.sk, SDP_HEAD_SIZE,
 				  GFP_KERNEL);
 	/* FIXME */
 	BUG_ON(!skb);
-	h = (struct sdp_bsdh *)skb_push(skb, sizeof *h);
-	for (i = 0; i < SDP_MAX_SEND_SKB_FRAGS; ++i) {
+	h = (struct sdp_bsdh *)skb->head;
+	for (i = 0; i < ssk->recv_frags; ++i) {
 		page = alloc_pages(GFP_HIGHUSER, 0);
 		BUG_ON(!page);
 		frag = &skb_shinfo(skb)->frags[i];
@@ -199,26 +244,25 @@ static void sdp_post_recv(struct sdp_soc
 
         rx_req = ssk->rx_ring + (id & (SDP_RX_SIZE - 1));
 	rx_req->skb = skb;
-	hwdev = ssk->dma_device;
+	dev = ssk->ib_device;
 	sge = ssk->ibsge;
-	addr = dma_map_single(hwdev, h, skb_headlen(skb),
-			      DMA_FROM_DEVICE);
-	BUG_ON(dma_mapping_error(addr));
+	addr = ib_dma_map_single(dev, h, SDP_HEAD_SIZE, DMA_FROM_DEVICE);
+	BUG_ON(ib_dma_mapping_error(dev, addr));
 
 	rx_req->mapping[0] = addr;
 
 	/* TODO: proper error handling */
 	sge->addr = (u64)addr;
-	sge->length = skb_headlen(skb);
+	sge->length = SDP_HEAD_SIZE;
 	sge->lkey = ssk->mr->lkey;
 	frags = skb_shinfo(skb)->nr_frags;
 	for (i = 0; i < frags; ++i) {
 		++sge;
-		addr = dma_map_page(hwdev, skb_shinfo(skb)->frags[i].page,
-				    skb_shinfo(skb)->frags[i].page_offset,
-				    skb_shinfo(skb)->frags[i].size,
-				    DMA_FROM_DEVICE);
-		BUG_ON(dma_mapping_error(addr));
+		addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page,
+				       skb_shinfo(skb)->frags[i].page_offset,
+				       skb_shinfo(skb)->frags[i].size,
+				       DMA_FROM_DEVICE);
+		BUG_ON(ib_dma_mapping_error(dev, addr));
 		rx_req->mapping[i + 1] = addr;
 		sge->addr = addr;
 		sge->length = skb_shinfo(skb)->frags[i].size;
@@ -235,17 +279,25 @@ static void sdp_post_recv(struct sdp_soc
 		sdp_dbg(&ssk->isk.sk, "ib_post_recv failed with status %d\n", rc);
 		sdp_reset(&ssk->isk.sk);
 	}
+
+	atomic_add(SDP_MAX_SEND_SKB_FRAGS, &sdp_current_mem_usage);
 }
 
 void sdp_post_recvs(struct sdp_sock *ssk)
 {
+	int scale = ssk->rcvbuf_scale;
 	if (unlikely(!ssk->id))
 		return;
 
+	if (top_mem_usage &&
+	    (top_mem_usage * 0x100000) < atomic_read(&sdp_current_mem_usage) * PAGE_SIZE)
+		scale = 1;
+
 	while ((likely(ssk->rx_head - ssk->rx_tail < SDP_RX_SIZE) &&
 		(ssk->rx_head - ssk->rx_tail - SDP_MIN_BUFS) *
-		SDP_MAX_SEND_SKB_FRAGS * PAGE_SIZE + ssk->rcv_nxt - ssk->copied_seq <
-		ssk->isk.sk.sk_rcvbuf * rcvbuf_scale) ||
+		(SDP_HEAD_SIZE + ssk->recv_frags * PAGE_SIZE) +
+		ssk->rcv_nxt - ssk->copied_seq <
+		ssk->isk.sk.sk_rcvbuf * scale) ||
 	       unlikely(ssk->rx_head - ssk->rx_tail < SDP_MIN_BUFS))
 		sdp_post_recv(ssk);
 }
@@ -253,7 +305,7 @@ void sdp_post_recvs(struct sdp_sock *ssk
 struct sk_buff *sdp_recv_completion(struct sdp_sock *ssk, int id)
 {
 	struct sdp_buf *rx_req;
-	struct device *hwdev;
+	struct ib_device *dev;
 	struct sk_buff *skb;
 	int i, frags;
 
@@ -263,26 +315,28 @@ struct sk_buff *sdp_recv_completion(stru
 		return NULL;
 	}
 
-	hwdev = ssk->dma_device;
+	dev = ssk->ib_device;
         rx_req = &ssk->rx_ring[id & (SDP_RX_SIZE - 1)];
 	skb = rx_req->skb;
-	dma_unmap_single(hwdev, rx_req->mapping[0], skb_headlen(skb),
-			 DMA_FROM_DEVICE);
+	ib_dma_unmap_single(dev, rx_req->mapping[0], SDP_HEAD_SIZE,
+			    DMA_FROM_DEVICE);
 	frags = skb_shinfo(skb)->nr_frags;
 	for (i = 0; i < frags; ++i)
-		dma_unmap_page(hwdev, rx_req->mapping[i + 1],
-			       skb_shinfo(skb)->frags[i].size,
-			       DMA_TO_DEVICE);
+		ib_dma_unmap_page(dev, rx_req->mapping[i + 1],
+				  skb_shinfo(skb)->frags[i].size,
+				  DMA_FROM_DEVICE);
 	++ssk->rx_tail;
 	--ssk->remote_credits;
 	return skb;
 }
 
 /* Here because I do not want queue to fail. */
-static inline int sdp_sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static inline struct sk_buff *sdp_sock_queue_rcv_skb(struct sock *sk,
+						     struct sk_buff *skb)
 {
 	int skb_len;
 	struct sdp_sock *ssk = sdp_sk(sk);
+	struct sk_buff *tail;
 
 	/* not needed since sk_rmem_alloc is not currently used
 	 * TODO - remove this?
@@ -293,11 +347,17 @@ static inline int sdp_sock_queue_rcv_skb
 	TCP_SKB_CB(skb)->seq = ssk->rcv_nxt;
 	ssk->rcv_nxt += skb_len;
 
-	skb_queue_tail(&sk->sk_receive_queue, skb);
+	if (likely(skb_len && (tail = skb_peek_tail(&sk->sk_receive_queue))) &&
+	    unlikely(skb_tailroom(tail) >= skb_len)) {
+		skb_copy_bits(skb, 0, skb_put(tail, skb_len), skb_len);
+		__kfree_skb(skb);
+		skb = tail;
+	} else
+		skb_queue_tail(&sk->sk_receive_queue, skb);
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_data_ready(sk, skb_len);
-	return 0;
+	return skb;
 }
 
 static inline void update_send_head(struct sock *sk, struct sk_buff *skb)
@@ -355,6 +415,23 @@ void sdp_post_sends(struct sdp_sock *ssk
 		return;
 	}
 
+	if (ssk->recv_request &&
+	    ssk->rx_tail >= ssk->recv_request_head &&
+	    ssk->bufs >= SDP_MIN_BUFS &&
+	    ssk->tx_head - ssk->tx_tail < SDP_TX_SIZE) {
+		struct sdp_chrecvbuf *resp_size;
+		ssk->recv_request = 0;
+		skb = sk_stream_alloc_skb(&ssk->isk.sk,
+					  sizeof(struct sdp_bsdh) +
+					  sizeof(*resp_size),
+					  GFP_KERNEL);
+		/* FIXME */
+		BUG_ON(!skb);
+		resp_size = (struct sdp_chrecvbuf *)skb_put(skb, sizeof *resp_size);
+		resp_size->size = htons(ssk->recv_frags * PAGE_SIZE);
+		sdp_post_send(ssk, skb, SDP_MID_CHRCVBUF_ACK);
+	}
+
 	while (ssk->bufs > SDP_MIN_BUFS &&
 	       ssk->tx_head - ssk->tx_tail < SDP_TX_SIZE &&
 	       (skb = ssk->isk.sk.sk_send_head) &&
@@ -363,6 +440,25 @@ void sdp_post_sends(struct sdp_sock *ssk
 		__skb_dequeue(&ssk->isk.sk.sk_write_queue);
 		sdp_post_send(ssk, skb, SDP_MID_DATA);
 	}
+
+	if (ssk->bufs == SDP_MIN_BUFS &&
+	    !ssk->sent_request &&
+	    ssk->tx_head > ssk->sent_request_head + SDP_RESIZE_WAIT &&
+	    ssk->tx_head - ssk->tx_tail < SDP_TX_SIZE) {
+		struct sdp_chrecvbuf *req_size;
+		skb = sk_stream_alloc_skb(&ssk->isk.sk,
+					  sizeof(struct sdp_bsdh) +
+					  sizeof(*req_size),
+					  GFP_KERNEL);
+		/* FIXME */
+		BUG_ON(!skb);
+		ssk->sent_request = SDP_MAX_SEND_SKB_FRAGS * PAGE_SIZE;
+		ssk->sent_request_head = ssk->tx_head;
+		req_size = (struct sdp_chrecvbuf *)skb_put(skb, sizeof *req_size);
+		req_size->size = htons(ssk->sent_request);
+		sdp_post_send(ssk, skb, SDP_MID_CHRCVBUF);
+	}
+
 	c = ssk->remote_credits;
 	if (likely(c > SDP_MIN_BUFS))
 		c *= 2;
@@ -395,6 +491,13 @@ void sdp_post_sends(struct sdp_sock *ssk
 	}
 }
 
+static inline void sdp_resize(struct sdp_sock *ssk, u32 new_size)
+{
+	ssk->recv_frags = PAGE_ALIGN(new_size - SDP_HEAD_SIZE)	/ PAGE_SIZE;
+	if (ssk->recv_frags > SDP_MAX_SEND_SKB_FRAGS)
+		ssk->recv_frags = SDP_MAX_SEND_SKB_FRAGS;
+}
+
 static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc)
 {
 	struct sk_buff *skb;
@@ -406,6 +509,8 @@ static void sdp_handle_wc(struct sdp_soc
 		if (unlikely(!skb))
 			return;
 
+		atomic_sub(SDP_MAX_SEND_SKB_FRAGS, &sdp_current_mem_usage);
+
 		if (unlikely(wc->status)) {
 			if (wc->status != IB_WC_WR_FLUSH_ERR) {
 				sdp_dbg(&ssk->isk.sk,
@@ -415,15 +520,24 @@ static void sdp_handle_wc(struct sdp_soc
 			}
 			__kfree_skb(skb);
 		} else {
-			/* TODO: handle msg < bsdh */
+			int frags;
+
 			sdp_dbg_data(&ssk->isk.sk,
 				     "Recv completion. ID %d Length %d\n",
 				     (int)wc->wr_id, wc->byte_len);
-			skb->len = wc->byte_len;
-			skb->data_len = wc->byte_len - sizeof(struct sdp_bsdh);
-			if (unlikely(skb->data_len < 0)) {
-				printk("SDP: FIXME len %d\n", wc->byte_len);
+			if (unlikely(wc->byte_len < sizeof(struct sdp_bsdh))) {
+				printk("SDP BUG! byte_len %d < %zd\n",
+				       wc->byte_len, sizeof(struct sdp_bsdh));
+				__kfree_skb(skb);
+				return;
 			}
+			skb->len = wc->byte_len;
+			if (likely(wc->byte_len > SDP_HEAD_SIZE))
+				skb->data_len = wc->byte_len - SDP_HEAD_SIZE;
+			else
+				skb->data_len = 0;
+			skb->data = skb->head;
+			skb->tail = skb->head + skb_headlen(skb);
 			h = (struct sdp_bsdh *)skb->data;
 			skb->h.raw = skb->data;
 			ssk->mseq_ack = ntohl(h->mseq);
@@ -433,11 +547,12 @@ static void sdp_handle_wc(struct sdp_soc
 			ssk->bufs = ntohl(h->mseq_ack) - ssk->tx_head + 1 +
 				ntohs(h->bufs);
 
+			frags = skb_shinfo(skb)->nr_frags;
 			pagesz = PAGE_ALIGN(skb->data_len);
 			skb_shinfo(skb)->nr_frags = pagesz / PAGE_SIZE;
 
 			for (i = skb_shinfo(skb)->nr_frags;
-			     i < SDP_MAX_SEND_SKB_FRAGS; ++i) {
+			     i < frags; ++i) {
 				put_page(skb_shinfo(skb)->frags[i].page);
 				skb->truesize -= PAGE_SIZE;
 			}
@@ -445,26 +560,49 @@ static void sdp_handle_wc(struct sdp_soc
 			if (unlikely(h->flags & SDP_OOB_PEND))
 				sk_send_sigurg(&ssk->isk.sk);
 
+			skb_pull(skb, sizeof(struct sdp_bsdh));
+
 			if (likely(h->mid == SDP_MID_DATA) &&
-			    likely(skb->data_len > 0)) {
-				skb_pull(skb, sizeof(struct sdp_bsdh));
-				/* TODO: queue can fail? */
-				sdp_sock_queue_rcv_skb(&ssk->isk.sk, skb);
-				if (unlikely(h->flags & SDP_OOB_PRES))
+			    likely(skb->len > 0)) {
+				int oob = h->flags & SDP_OOB_PRES;
+				skb = sdp_sock_queue_rcv_skb(&ssk->isk.sk, skb);
+				if (unlikely(oob))
 					sdp_urg(ssk, skb);
 			} else if (likely(h->mid == SDP_MID_DATA)) {
 				__kfree_skb(skb);
 			} else if (h->mid == SDP_MID_DISCONN) {
-				skb_pull(skb, sizeof(struct sdp_bsdh));
 				/* this will wake recvmsg */
 				sdp_sock_queue_rcv_skb(&ssk->isk.sk, skb);
 				sdp_fin(&ssk->isk.sk);
+			} else if (h->mid == SDP_MID_CHRCVBUF) {
+				u32 new_size = *(u32 *)skb->data;
+
+				if (ssk->recv_request || sdp_can_resize()) {
+					ssk->rcvbuf_scale = rcvbuf_scale;
+					sdp_resize(ssk, ntohs(new_size));
+					ssk->recv_request_head = ssk->rx_head + 1;
+				} else
+					ssk->recv_request_head = ssk->rx_tail;
+				ssk->recv_request = 1;
+				__kfree_skb(skb);
+			} else if (h->mid == SDP_MID_CHRCVBUF_ACK) {
+				u32 new_size = *(u32 *)skb->data;
+				new_size = ntohs(new_size);
+
+				if (new_size > ssk->xmit_size_goal) {
+					ssk->sent_request = -1;
+					ssk->xmit_size_goal = new_size;
+					ssk->send_frags =
+						PAGE_ALIGN(ssk->xmit_size_goal) /
+						PAGE_SIZE;
+				} else
+					ssk->sent_request = 0;
+				__kfree_skb(skb);
 			} else {
 				/* TODO: Handle other messages */
 				printk("SDP: FIXME MID %d\n", h->mid);
 				__kfree_skb(skb);
 			}
-			sdp_post_recvs(ssk);
 		}
 	} else {
 		skb = sdp_send_completion(ssk, wc->wr_id);
@@ -518,10 +656,10 @@ int sdp_poll_cq(struct sdp_sock *ssk, st
 	return ret;
 }
 
-void sdp_work(void *data)
+void sdp_work(void *_work)
 {
-	struct sock *sk = (struct sock *)data;
-	struct sdp_sock *ssk = sdp_sk(sk);
+	struct sdp_sock *ssk = container_of(_work, struct sdp_sock, work);
+	struct sock *sk = &ssk->isk.sk;
 	struct ib_cq *cq;
 
 	sdp_dbg_data(sk, "%s\n", __func__);
@@ -534,7 +672,7 @@ void sdp_work(void *data)
 	if (unlikely(!ssk->poll_cq)) {
 		struct rdma_cm_id *id = ssk->id;
 		if (id && id->qp)
-			rdma_establish(id);
+			rdma_notify(id, RDMA_CM_EVENT_ESTABLISHED);
 		goto out;
 	}
 
--- linux-2.6.18.noarch/drivers/infiniband/ulp/sdp/sdp_cma.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/sdp/sdp_cma.c
@@ -81,6 +81,11 @@ struct sdp_hah {
 	__u32 actrcvsz;
 };
 
+enum {
+	SDP_HH_SIZE = 76,
+	SDP_HAH_SIZE = 180,
+};
+
 static void sdp_cq_event_handler(struct ib_event *event, void *data)
 {
 }
@@ -146,7 +151,7 @@ int sdp_init_qp(struct sock *sk, struct 
         }
 
 	sdp_sk(sk)->mr = mr;
-	INIT_WORK(&sdp_sk(sk)->work, sdp_work, sdp_sk(sk));
+	INIT_WORK(&sdp_sk(sk)->work, sdp_work, &sdp_sk(sk)->work);
 
 	cq = ib_create_cq(device, sdp_completion_handler, sdp_cq_event_handler,
 			  sk, SDP_TX_SIZE + SDP_RX_SIZE);
@@ -168,10 +173,12 @@ int sdp_init_qp(struct sock *sk, struct 
 	}
 	sdp_sk(sk)->cq = cq;
 	sdp_sk(sk)->qp = id->qp;
-	sdp_sk(sk)->dma_device = device->dma_device;
+	sdp_sk(sk)->ib_device = device;
 
 	init_waitqueue_head(&sdp_sk(sk)->wq);
 
+	sdp_sk(sk)->recv_frags = 0;
+	sdp_sk(sk)->rcvbuf_scale = 1;
 	sdp_post_recvs(sdp_sk(sk));
 
 	sdp_dbg(sk, "%s done\n", __func__);
@@ -196,20 +203,25 @@ int sdp_connect_handler(struct sock *sk,
 {
 	struct sockaddr_in *dst_addr;
 	struct sock *child;
-	struct sdp_hh *h;
+	const struct sdp_hh *h;
 	int rc;
 
 	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);
 
-        child = sk_clone(sk, GFP_KERNEL);
-        if (!child) {
-                return -ENOMEM;
-	}
+	h = event->param.conn.private_data;
 
+	if (!h->max_adverts)
+		return -EINVAL;
+
+	child = sk_clone(sk, GFP_KERNEL);
+	if (!child)
+		return -ENOMEM;
+
+	sdp_add_sock(sdp_sk(child));
 	INIT_LIST_HEAD(&sdp_sk(child)->accept_queue);
 	INIT_LIST_HEAD(&sdp_sk(child)->backlog_queue);
-	INIT_WORK(&sdp_sk(child)->time_wait_work, sdp_time_wait_work, child);
-	INIT_WORK(&sdp_sk(child)->destroy_work, sdp_destroy_work, child);
+	INIT_WORK(&sdp_sk(child)->time_wait_work, sdp_time_wait_work, &sdp_sk(child)->time_wait_work);
+	INIT_WORK(&sdp_sk(child)->destroy_work, sdp_destroy_work, &sdp_sk(child)->destroy_work);
 
 	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
 	inet_sk(child)->dport = dst_addr->sin_port;
@@ -224,10 +236,11 @@ int sdp_connect_handler(struct sock *sk,
 		return rc;
 	}
 
-	h = event->private_data;
 	sdp_sk(child)->bufs = ntohs(h->bsdh.bufs);
 	sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) -
 		sizeof(struct sdp_bsdh);
+	sdp_sk(child)->send_frags = PAGE_ALIGN(sdp_sk(child)->xmit_size_goal) /
+		PAGE_SIZE;
 
 	sdp_dbg(child, "%s bufs %d xmit_size_goal %d\n", __func__,
 		sdp_sk(child)->bufs,
@@ -239,6 +252,8 @@ int sdp_connect_handler(struct sock *sk,
 	list_add_tail(&sdp_sk(child)->backlog_queue, &sdp_sk(sk)->backlog_queue);
 	sdp_sk(child)->parent = sk;
 
+	child->sk_state = TCP_SYN_RECV;
+
 	/* child->sk_write_space(child); */
 	/* child->sk_data_ready(child, 0); */
 	sk->sk_data_ready(sk, 0);
@@ -249,7 +264,7 @@ int sdp_connect_handler(struct sock *sk,
 static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id,
 				struct rdma_cm_event *event)
 {
-	struct sdp_hah *h;
+	const struct sdp_hah *h;
 	struct sockaddr_in *dst_addr;
 	sdp_dbg(sk, "%s\n", __func__);
 
@@ -261,10 +276,12 @@ static int sdp_response_handler(struct s
 	if (sock_flag(sk, SOCK_DEAD))
 		return 0;
 
-	h = event->private_data;
+	h = event->param.conn.private_data;
 	sdp_sk(sk)->bufs = ntohs(h->bsdh.bufs);
 	sdp_sk(sk)->xmit_size_goal = ntohl(h->actrcvsz) -
 		sizeof(struct sdp_bsdh);
+	sdp_sk(sk)->send_frags = PAGE_ALIGN(sdp_sk(sk)->xmit_size_goal) /
+		PAGE_SIZE;
 
 	sdp_dbg(sk, "%s bufs %d xmit_size_goal %d\n", __func__,
 		sdp_sk(sk)->bufs,
@@ -326,9 +343,23 @@ done:
 	return 0;
 }
 
-void sdp_disconnected_handler(struct sock *sk)
+int sdp_disconnected_handler(struct sock *sk)
 {
+	struct sdp_sock *ssk = sdp_sk(sk);
+
 	sdp_dbg(sk, "%s\n", __func__);
+
+	if (ssk->cq)
+		sdp_poll_cq(ssk, ssk->cq);
+
+	if (sk->sk_state == TCP_SYN_RECV) {
+		sdp_connected_handler(sk, NULL);
+
+		if (ssk->rcv_nxt)
+			return 0;
+	}
+
+	return -ECONNRESET;
 }
 
 int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
@@ -379,9 +410,11 @@ int sdp_cma_handler(struct rdma_cm_id *i
 		memset(&hh, 0, sizeof hh);
 		hh.bsdh.mid = SDP_MID_HELLO;
 		hh.bsdh.bufs = htons(sdp_sk(sk)->remote_credits);
+		hh.bsdh.len = htonl(sizeof(struct sdp_bsdh) + SDP_HH_SIZE);
+		hh.max_adverts = 1;
 		hh.majv_minv = SDP_MAJV_MINV;
-		hh.localrcvsz = hh.desremrcvsz = htonl(SDP_MAX_SEND_SKB_FRAGS *
-			PAGE_SIZE + sizeof(struct sdp_bsdh));
+		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags *
+						       PAGE_SIZE + SDP_HEAD_SIZE);
 		hh.max_adverts = 0x1;
 		inet_sk(sk)->saddr = inet_sk(sk)->rcv_saddr =
 			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
@@ -410,9 +443,11 @@ int sdp_cma_handler(struct rdma_cm_id *i
 		memset(&hah, 0, sizeof hah);
 		hah.bsdh.mid = SDP_MID_HELLO_ACK;
 		hah.bsdh.bufs = htons(sdp_sk(child)->remote_credits);
+		hah.bsdh.len = htonl(sizeof(struct sdp_bsdh) + SDP_HAH_SIZE);
 		hah.majv_minv = SDP_MAJV_MINV;
-		hah.actrcvsz = htonl(SDP_MAX_SEND_SKB_FRAGS * PAGE_SIZE +
-				     sizeof(struct sdp_bsdh));
+		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
+					    but just in case */
+		hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE + SDP_HEAD_SIZE);
 		memset(&conn_param, 0, sizeof conn_param);
 		conn_param.private_data_len = sizeof hah;
 		conn_param.private_data = &hah;
@@ -459,12 +494,10 @@ int sdp_cma_handler(struct rdma_cm_id *i
 	case RDMA_CM_EVENT_DISCONNECTED:
 		sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
 		rdma_disconnect(id);
-		sdp_disconnected_handler(sk);
-		rc = -ECONNRESET;
+		rc = sdp_disconnected_handler(sk);
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 		sdp_warn(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n");
-		sdp_disconnected_handler(sk);
 		rc = -ENETRESET;
 		break;
 	default:
--- linux-2.6.18.noarch/drivers/infiniband/ulp/sdp/sdp.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/sdp/sdp.h
@@ -47,7 +47,7 @@ extern int sdp_data_debug_level;
 #define SDP_RX_SIZE 0x40
 
 #define SDP_MAX_SEND_SKB_FRAGS (PAGE_SIZE > 0x8000 ? 1 : 0x8000 / PAGE_SIZE)
-
+#define SDP_HEAD_SIZE (PAGE_SIZE / 2 + sizeof(struct sdp_bsdh))
 #define SDP_NUM_WC 4
 
 #define SDP_OP_RECV 0x800000000LL
@@ -56,6 +56,8 @@ enum sdp_mid {
 	SDP_MID_HELLO = 0x0,
 	SDP_MID_HELLO_ACK = 0x1,
 	SDP_MID_DISCONN = 0x2,
+	SDP_MID_CHRCVBUF = 0xB,
+	SDP_MID_CHRCVBUF_ACK = 0xC,
 	SDP_MID_DATA = 0xFF,
 };
 
@@ -82,12 +84,13 @@ struct sdp_bsdh {
 
 struct sdp_buf {
         struct sk_buff *skb;
-        dma_addr_t      mapping[SDP_MAX_SEND_SKB_FRAGS + 1];
+        u64             mapping[SDP_MAX_SEND_SKB_FRAGS + 1];
 };
 
 struct sdp_sock {
 	/* sk has to be the first member of inet_sock */
 	struct inet_sock isk;
+	struct list_head sock_list;
 	struct list_head accept_queue;
 	struct list_head backlog_queue;
 	struct sock *parent;
@@ -105,6 +108,7 @@ struct sdp_sock {
 	u32 rcv_nxt;
 
 	int write_seq;
+	int snd_una;
 	int pushed_seq;
 	int xmit_size_goal;
 	int nonagle;
@@ -117,7 +121,7 @@ struct sdp_sock {
 	struct ib_qp *qp;
 	struct ib_cq *cq;
 	struct ib_mr *mr;
-	struct device *dma_device;
+	struct ib_device *ib_device;
 
 	/* SDP specific */
 	struct sdp_buf *rx_ring;
@@ -135,6 +139,15 @@ struct sdp_sock {
 	unsigned          tx_tail;
 	struct ib_send_wr tx_wr;
 
+	/* SDP slow start */
+	int rcvbuf_scale;
+	int sent_request;
+	int sent_request_head;
+	int recv_request_head;
+	int recv_request;
+	int recv_frags;
+	int send_frags;
+
 	struct ib_sge ibsge[SDP_MAX_SEND_SKB_FRAGS + 1];
 	struct ib_wc  ibwc[SDP_NUM_WC];
 };
@@ -142,6 +155,24 @@ struct sdp_sock {
 extern struct proto sdp_proto;
 extern struct workqueue_struct *sdp_workqueue;
 
+extern atomic_t sdp_current_mem_usage;
+extern spinlock_t sdp_large_sockets_lock;
+
+/* just like TCP fs */
+struct sdp_seq_afinfo {
+	struct module           *owner;
+	char                    *name;
+	sa_family_t             family;
+	int                     (*seq_show) (struct seq_file *m, void *v);
+	struct file_operations  *seq_fops;
+};
+
+struct sdp_iter_state {
+	sa_family_t             family;
+	int                     num;
+	struct seq_operations   seq_ops;
+};
+
 static inline struct sdp_sock *sdp_sk(const struct sock *sk)
 {
 	        return (struct sdp_sock *)sk;
@@ -176,16 +207,19 @@ void sdp_reset(struct sock *sk);
 void sdp_reset_sk(struct sock *sk, int rc);
 void sdp_time_wait_destroy_sk(struct sdp_sock *ssk);
 void sdp_completion_handler(struct ib_cq *cq, void *cq_context);
-void sdp_work(void *);
+void sdp_work(void *_work);
 int sdp_post_credits(struct sdp_sock *ssk);
 void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb, u8 mid);
 void sdp_post_recvs(struct sdp_sock *ssk);
 int sdp_poll_cq(struct sdp_sock *ssk, struct ib_cq *cq);
 void sdp_post_sends(struct sdp_sock *ssk, int nonagle);
-void sdp_destroy_work(void *data);
-void sdp_time_wait_work(void *data);
+void sdp_destroy_work(void *_work);
+void sdp_time_wait_work(void *_work);
 struct sk_buff *sdp_recv_completion(struct sdp_sock *ssk, int id);
 struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq);
 void sdp_urg(struct sdp_sock *ssk, struct sk_buff *skb);
+void sdp_add_sock(struct sdp_sock *ssk);
+void sdp_remove_sock(struct sdp_sock *ssk);
+void sdp_remove_large_sock(void);
 
 #endif
--- linux-2.6.18.noarch/drivers/infiniband/ulp/sdp/sdp_main.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -37,6 +37,7 @@
 
 #include <linux/errno.h>
 #include <asm/checksum.h>
+
 static inline
 unsigned int csum_partial_copy_from_user_new (const char *src, char *dst,
 						 int len, unsigned int sum,
@@ -56,6 +57,7 @@ unsigned int csum_partial_copy_from_user
 #include <linux/socket.h>
 #include <net/protocol.h>
 #include <net/inet_common.h>
+#include <linux/proc_fs.h>
 #include <rdma/rdma_cm.h>
 #include <rdma/ib_verbs.h>
 /* TODO: remove when sdp_socket.h becomes part of include/linux/socket.h */
@@ -110,8 +112,33 @@ static int recv_poll = 1000;
 module_param_named(recv_poll, recv_poll, int, 0644);
 MODULE_PARM_DESC(recv_poll, "How many times to poll recv.");
 
+static int send_poll_thresh = 8192;
+
+module_param_named(send_poll_thresh, send_poll_thresh, int, 0644);
+MODULE_PARM_DESC(send_poll_thresh, "Send message size thresh hold over which to start polling.");
+
 struct workqueue_struct *sdp_workqueue;
 
+static struct list_head sock_list;
+static spinlock_t sock_list_lock;
+
+DEFINE_RWLOCK(device_removal_lock);
+
+inline void sdp_add_sock(struct sdp_sock *ssk)
+{
+	spin_lock_irq(&sock_list_lock);
+	list_add_tail(&ssk->sock_list, &sock_list);
+	spin_unlock_irq(&sock_list_lock);
+}
+
+inline void sdp_remove_sock(struct sdp_sock *ssk)
+{
+	spin_lock_irq(&sock_list_lock);
+	BUG_ON(list_empty(&sock_list));
+	list_del_init(&(ssk->sock_list));
+	spin_unlock_irq(&sock_list_lock);
+}
+
 static int sdp_get_port(struct sock *sk, unsigned short snum)
 {
 	struct sdp_sock *ssk = sdp_sk(sk);
@@ -166,6 +193,7 @@ static void sdp_destroy_qp(struct sdp_so
 			skb = sdp_recv_completion(ssk, ssk->rx_tail);
 			if (!skb)
 				break;
+			atomic_sub(SDP_MAX_SEND_SKB_FRAGS, &sdp_current_mem_usage);
 			__kfree_skb(skb);
 		}
 		while (ssk->tx_head != ssk->tx_tail) {
@@ -186,6 +214,9 @@ static void sdp_destroy_qp(struct sdp_so
 	if (pd)
 		ib_dealloc_pd(pd);
 
+	if (ssk->recv_frags)
+		sdp_remove_large_sock();
+
 	kfree(ssk->rx_ring);
 	kfree(ssk->tx_ring);
 }
@@ -196,10 +227,12 @@ void sdp_reset_sk(struct sock *sk, int r
 
 	sdp_dbg(sk, "%s\n", __func__);
 
+	read_lock(&device_removal_lock);
+
 	if (ssk->cq)
 		sdp_poll_cq(ssk, ssk->cq);
 
-	if (!(sk->sk_shutdown & RCV_SHUTDOWN))
+	if (!(sk->sk_shutdown & RCV_SHUTDOWN) || !sk_stream_memory_free(sk))
 		sdp_set_error(sk, rc);
 
 	sdp_destroy_qp(ssk);
@@ -212,6 +245,8 @@ void sdp_reset_sk(struct sock *sk, int r
 	}
 
 	sk->sk_state_change(sk);
+
+	read_unlock(&device_removal_lock);
 }
 
 /* Like tcp_reset */
@@ -278,6 +313,8 @@ static void sdp_destruct(struct sock *sk
 
 	sdp_dbg(sk, "%s\n", __func__);
 
+	sdp_remove_sock(ssk);
+	
 	sdp_close_sk(sk);
 
 	if (ssk->parent)
@@ -289,6 +326,7 @@ static void sdp_destruct(struct sock *sk
 	list_for_each_entry_safe(s, t, &ssk->accept_queue, accept_queue) {
 		sk_common_release(&s->isk.sk);
 	}
+
 done:
 	sdp_dbg(sk, "%s done\n", __func__);
 }
@@ -485,10 +523,34 @@ static int sdp_disconnect(struct sock *s
 {
 	struct sdp_sock *ssk = sdp_sk(sk);
 	int rc = 0;
+	int old_state = sk->sk_state;
+	struct sdp_sock *s, *t;
+	struct rdma_cm_id *id;
+
 	sdp_dbg(sk, "%s\n", __func__);
 	if (ssk->id)
 		rc = rdma_disconnect(ssk->id);
-	return rc;
+
+	if (old_state != TCP_LISTEN)
+		return rc;
+
+	sdp_set_state(sk, TCP_CLOSE);
+	id = ssk->id;
+	ssk->id = NULL;
+	release_sock(sk); /* release socket since locking semantics is parent
+			     inside child */
+	rdma_destroy_id(id);
+
+	list_for_each_entry_safe(s, t, &ssk->backlog_queue, backlog_queue) {
+		sk_common_release(&s->isk.sk);
+	}
+	list_for_each_entry_safe(s, t, &ssk->accept_queue, accept_queue) {
+		sk_common_release(&s->isk.sk);
+	}
+
+	lock_sock(sk);
+
+	return 0;
 }
 
 /* Like inet_csk_wait_for_connect */
@@ -639,6 +701,15 @@ static int sdp_ioctl(struct sock *sk, in
 	case SIOCATMARK:
 		answ = ssk->urg_data && ssk->urg_seq == ssk->copied_seq;
 		break;
+	case SIOCOUTQ:
+		if (sk->sk_state == TCP_LISTEN)
+			return -EINVAL;
+
+		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+			answ = 0;
+		else
+			answ = ssk->write_seq - ssk->snd_una;
+		break;
 	default:
 		return -ENOIOCTLCMD;
 	}
@@ -648,9 +719,10 @@ static int sdp_ioctl(struct sock *sk, in
 	return put_user(answ, (int __user *)arg); 
 }
 
-void sdp_destroy_work(void *data)
+void sdp_destroy_work(void *_work)
 {
-	struct sock *sk = data;
+	struct sdp_sock *ssk = container_of(_work, struct sdp_sock, destroy_work);
+	struct sock *sk = &ssk->isk.sk;
 	sdp_dbg(sk, "%s: refcnt %d\n", __func__, atomic_read(&sk->sk_refcnt));
 
 	cancel_delayed_work(&sdp_sk(sk)->time_wait_work);
@@ -659,9 +731,10 @@ void sdp_destroy_work(void *data)
 	sock_put(sk);
 }
 
-void sdp_time_wait_work(void *data)
+void sdp_time_wait_work(void *_work)
 {
-	struct sock *sk = data;
+	struct sdp_sock *ssk = container_of(_work, struct sdp_sock, time_wait_work);
+	struct sock *sk = &ssk->isk.sk;
 	lock_sock(sk);
 	sdp_dbg(sk, "%s\n", __func__);
 
@@ -677,7 +750,7 @@ void sdp_time_wait_work(void *data)
 	release_sock(sk);
 
 	atomic_dec(sk->sk_prot->orphan_count);
-	sock_put(data);
+	sock_put(sk);
 }
 
 void sdp_time_wait_destroy_sk(struct sdp_sock *ssk)
@@ -695,8 +768,8 @@ static int sdp_init_sock(struct sock *sk
 
 	INIT_LIST_HEAD(&ssk->accept_queue);
 	INIT_LIST_HEAD(&ssk->backlog_queue);
-	INIT_WORK(&ssk->time_wait_work, sdp_time_wait_work, sk);
-	INIT_WORK(&ssk->destroy_work, sdp_destroy_work, sk);
+	INIT_WORK(&ssk->time_wait_work, sdp_time_wait_work, &ssk->time_wait_work);
+	INIT_WORK(&ssk->destroy_work, sdp_destroy_work, &ssk->destroy_work);
 
 	sk->sk_route_caps |= NETIF_F_SG | NETIF_F_NO_CSUM;
 	return 0;
@@ -727,6 +800,7 @@ static void sdp_mark_push(struct sdp_soc
 {
 	TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
 	ssk->pushed_seq = ssk->write_seq;
+	sdp_post_sends(ssk, 0);
 }
 
 static inline void sdp_push_pending_frames(struct sock *sk)
@@ -1088,7 +1162,7 @@ new_segment:
 					/* We can extend the last page
 					 * fragment. */
 					merge = 1;
-				} else if (i == SDP_MAX_SEND_SKB_FRAGS ||
+				} else if (i == ssk->send_frags ||
 					   (!i &&
 					   !(sk->sk_route_caps & NETIF_F_SG))) {
 					/* Need to add new fragment and cannot
@@ -1190,7 +1264,8 @@ wait_for_memory:
 out:
 	if (copied)
 		sdp_push(sk, ssk, flags, mss_now, ssk->nonagle);
-	poll_send_cq(sk);
+	if (size > send_poll_thresh)
+		poll_send_cq(sk);
 	release_sock(sk);
 	return copied;
 
@@ -1620,19 +1695,230 @@ static int sdp_create_socket(struct sock
 
 	sock->ops = &sdp_proto_ops;
 	sock->state = SS_UNCONNECTED;
+
+	sdp_add_sock(sdp_sk(sk));
+
+	return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static void *sdp_get_idx(struct seq_file *seq, loff_t pos)
+{
+	int i = 0;
+	struct sdp_sock *ssk;
+
+	if (!list_empty(&sock_list))
+		list_for_each_entry(ssk, &sock_list, sock_list) {
+			if (i == pos)
+				return ssk;
+			i++;
+		}
+
+	return NULL;
+}
+
+static void *sdp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	void *start = NULL;
+	struct sdp_iter_state* st = seq->private;
+
+	st->num = 0;
+
+	if (!*pos)
+		return SEQ_START_TOKEN;
+
+	spin_lock_irq(&sock_list_lock);
+	start = sdp_get_idx(seq, *pos - 1);
+	if (start)
+		sock_hold((struct sock *)start);
+	spin_unlock_irq(&sock_list_lock);
+
+	return start;
+}
+
+static void *sdp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sdp_iter_state* st = seq->private;
+	void *next = NULL;
+
+	spin_lock_irq(&sock_list_lock);
+	if (v == SEQ_START_TOKEN)
+		next = sdp_get_idx(seq, 0);
+	else
+		next = sdp_get_idx(seq, *pos);
+	if (next)
+		sock_hold((struct sock *)next);
+	spin_unlock_irq(&sock_list_lock);
+
+	*pos += 1;
+	st->num++;
+
+	return next;
+}
+
+static void sdp_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+#define TMPSZ 150
+
+static int sdp_seq_show(struct seq_file *seq, void *v)
+{
+	struct sdp_iter_state* st;
+	struct sock *sk = v;
+	char tmpbuf[TMPSZ + 1];
+	unsigned int dest;
+	unsigned int src;
+	int uid;
+	unsigned long inode;
+	__u16 destp;
+	__u16 srcp;
+	__u32 rx_queue, tx_queue;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "%-*s\n", TMPSZ - 1,
+				"  sl  local_address rem_address        uid inode"
+				"   rx_queue tx_queue");
+		goto out;
+	}
+
+	st = seq->private;
+
+	dest = inet_sk(sk)->daddr;
+	src = inet_sk(sk)->rcv_saddr;
+	destp = ntohs(inet_sk(sk)->dport);
+	srcp = ntohs(inet_sk(sk)->sport);
+	uid = sock_i_uid(sk);
+	inode = sock_i_ino(sk);
+	rx_queue = sdp_sk(sk)->rcv_nxt - sdp_sk(sk)->copied_seq;
+	tx_queue = sdp_sk(sk)->write_seq - sdp_sk(sk)->snd_una;
+
+	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %5d %lu	%08X:%08X",
+		st->num, src, srcp, dest, destp, uid, inode,
+		rx_queue, tx_queue);
+
+	seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
+
+	sock_put(sk);
+out:
 	return 0;
 }
 
+static int sdp_seq_open(struct inode *inode, struct file *file)
+{
+	struct sdp_seq_afinfo *afinfo = PDE(inode)->data;
+	struct seq_file *seq;
+	struct sdp_iter_state *s;
+	int rc;
+
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+	s->family               = afinfo->family;
+	s->seq_ops.start        = sdp_seq_start;
+	s->seq_ops.next         = sdp_seq_next;
+	s->seq_ops.show         = afinfo->seq_show;
+	s->seq_ops.stop         = sdp_seq_stop;
+
+	rc = seq_open(file, &s->seq_ops);
+	if (rc)
+		goto out_kfree;
+	seq          = file->private_data;
+	seq->private = s;
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+
+static struct file_operations sdp_seq_fops;
+static struct sdp_seq_afinfo sdp_seq_afinfo = {
+	.owner          = THIS_MODULE,
+	.name           = "sdp",
+	.family         = AF_INET_SDP,
+	.seq_show       = sdp_seq_show,
+	.seq_fops       = &sdp_seq_fops,
+};
+
+
+static int __init sdp_proc_init(void)
+{
+	int rc = 0;
+	struct proc_dir_entry *p;
+
+	sdp_seq_afinfo.seq_fops->owner         = sdp_seq_afinfo.owner;
+	sdp_seq_afinfo.seq_fops->open          = sdp_seq_open;
+	sdp_seq_afinfo.seq_fops->read          = seq_read;
+	sdp_seq_afinfo.seq_fops->llseek        = seq_lseek;
+	sdp_seq_afinfo.seq_fops->release       = seq_release_private;
+
+	p = proc_net_fops_create(sdp_seq_afinfo.name, S_IRUGO, sdp_seq_afinfo.seq_fops);
+	if (p)
+		p->data = &sdp_seq_afinfo;
+	p = proc_net_fops_create(sdp_seq_afinfo.name, S_IRUGO, sdp_seq_afinfo.seq_fops);
+	if (p)
+		p->data = &sdp_seq_afinfo;
+	else
+		rc = -ENOMEM;
+
+	return rc;
+}
+
+static void sdp_proc_unregister(void)
+{
+	proc_net_remove(sdp_seq_afinfo.name);
+	memset(sdp_seq_afinfo.seq_fops, 0, sizeof(*sdp_seq_afinfo.seq_fops));
+}
+
+#else /* CONFIG_PROC_FS */
+
+static int __init sdp_proc_init(void)
+{
+	return 0;
+}
+
+static void sdp_proc_unregister(void)
+{
+
+}
+#endif /* CONFIG_PROC_FS */
+
+static void sdp_add_device(struct ib_device *device)
+{
+}
+
+static void sdp_remove_device(struct ib_device *device)
+{
+	write_lock(&device_removal_lock);
+	write_unlock(&device_removal_lock);
+}
+
 static struct net_proto_family sdp_net_proto = {
 	.family = AF_INET_SDP,
 	.create = sdp_create_socket,
 	.owner  = THIS_MODULE,
 };
 
+struct ib_client sdp_client = {
+	.name   = "sdp",
+	.add    = sdp_add_device,
+	.remove = sdp_remove_device
+};
+
 static int __init sdp_init(void)
 {
 	int rc;
 
+	INIT_LIST_HEAD(&sock_list);
+	spin_lock_init(&sock_list_lock);
+	spin_lock_init(&sdp_large_sockets_lock);
+
 	sdp_workqueue = create_singlethread_workqueue("sdp");
 	if (!sdp_workqueue) {
 		return -ENOMEM;
@@ -1653,6 +1939,12 @@ static int __init sdp_init(void)
 		return rc;
 	}
 
+	sdp_proc_init();
+
+	atomic_set(&sdp_current_mem_usage, 0);
+
+	ib_register_client(&sdp_client);
+
 	return 0;
 }
 
@@ -1666,6 +1958,16 @@ static void __exit sdp_exit(void)
 		       atomic_read(&orphan_count));
 	destroy_workqueue(sdp_workqueue);
 	flush_scheduled_work();
+
+	BUG_ON(!list_empty(&sock_list));
+
+	if (atomic_read(&sdp_current_mem_usage))
+		printk(KERN_WARNING "%s: current mem usage %d\n", __func__,
+		       atomic_read(&sdp_current_mem_usage));
+
+	sdp_proc_unregister();
+
+	ib_unregister_client(&sdp_client);
 }
 
 module_init(sdp_init);
--- linux-2.6.18.noarch/drivers/infiniband/ulp/srp/ib_srp.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/srp/ib_srp.c
@@ -96,6 +96,8 @@ static struct ib_client srp_client = {
 	.remove = srp_remove_one
 };
 
+static struct ib_sa_client srp_sa_client;
+
 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
 {
 	return (struct srp_target_port *) host->hostdata;
@@ -120,9 +122,8 @@ static struct srp_iu *srp_alloc_iu(struc
 	if (!iu->buf)
 		goto out_free_iu;
 
-	iu->dma = dma_map_single(host->dev->dev->dma_device,
-				 iu->buf, size, direction);
-	if (dma_mapping_error(iu->dma))
+	iu->dma = ib_dma_map_single(host->dev->dev, iu->buf, size, direction);
+	if (ib_dma_mapping_error(host->dev->dev, iu->dma))
 		goto out_free_buf;
 
 	iu->size      = size;
@@ -143,8 +144,7 @@ static void srp_free_iu(struct srp_host 
 	if (!iu)
 		return;
 
-	dma_unmap_single(host->dev->dev->dma_device,
-			 iu->dma, iu->size, iu->direction);
+	ib_dma_unmap_single(host->dev->dev, iu->dma, iu->size, iu->direction);
 	kfree(iu->buf);
 	kfree(iu);
 }
@@ -267,7 +267,8 @@ static int srp_lookup_path(struct srp_ta
 
 	init_completion(&target->done);
 
-	target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev,
+	target->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
+						   target->srp_host->dev->dev,
 						   target->srp_host->port,
 						   &target->path,
 						   IB_SA_PATH_REC_DGID		|
@@ -329,17 +330,15 @@ static int srp_send_req(struct srp_targe
 	req->priv.req_it_iu_len = cpu_to_be32(srp_max_iu_len);
 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					      SRP_BUF_FORMAT_INDIRECT);
-
 	/*
-	 * In the published SRP specification (draft rev. 16a), the 
+	 * In the published SRP specification (draft rev. 16a), the
 	 * port identifier format is 8 bytes of ID extension followed
-	 * by 8 bytes of port_GUID.  Older drafts put the two halves in the
-	 * opposite order, so that the port_GUID comes first.
+	 * by 8 bytes of GUID.  Older drafts put the two halves in the
+	 * opposite order, so that the GUID comes first.
 	 *
 	 * Targets conforming to these obsolete drafts can be
 	 * recognized by the I/O Class they report.
 	 */
-
 	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
 		memcpy(req->priv.initiator_port_id,
 		       &target->path.sgid.global.interface_id, 8);
@@ -480,8 +479,8 @@ static void srp_unmap_data(struct scsi_c
 		scat  = &req->fake_sg;
 	}
 
-	dma_unmap_sg(target->srp_host->dev->dev->dma_device, scat, nents,
-		     scmnd->sc_data_direction);
+	ib_dma_unmap_sg(target->srp_host->dev->dev, scat, nents,
+			scmnd->sc_data_direction);
 }
 
 static void srp_remove_req(struct srp_target_port *target, struct srp_request *req)
@@ -548,7 +547,7 @@ static int srp_reconnect_target(struct s
 	target->tx_head	 = 0;
 	target->tx_tail  = 0;
 
-	target->need_reset = 0;
+	target->qp_in_error = 0;
 	ret = srp_connect_target(target);
 	if (ret)
 		goto err;
@@ -594,23 +593,26 @@ static int srp_map_fmr(struct srp_target
 	int i, j;
 	int ret;
 	struct srp_device *dev = target->srp_host->dev;
+	struct ib_device *ibdev = dev->dev;
 
 	if (!dev->fmr_pool)
 		return -ENODEV;
 
-	if ((sg_dma_address(&scat[0]) & ~dev->fmr_page_mask) &&
+	if ((ib_sg_dma_address(ibdev, &scat[0]) & ~dev->fmr_page_mask) &&
 	    mellanox_workarounds && !memcmp(&target->ioc_guid, mellanox_oui, 3))
 		return -EINVAL;
 
 	len = page_cnt = 0;
 	for (i = 0; i < sg_cnt; ++i) {
-		if (sg_dma_address(&scat[i]) & ~dev->fmr_page_mask) {
+		unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]);
+
+		if (ib_sg_dma_address(ibdev, &scat[i]) & ~dev->fmr_page_mask) {
 			if (i > 0)
 				return -EINVAL;
 			else
 				++page_cnt;
 		}
-		if ((sg_dma_address(&scat[i]) + sg_dma_len(&scat[i])) &
+		if ((ib_sg_dma_address(ibdev, &scat[i]) + dma_len) &
 		    ~dev->fmr_page_mask) {
 			if (i < sg_cnt - 1)
 				return -EINVAL;
@@ -618,7 +620,7 @@ static int srp_map_fmr(struct srp_target
 				++page_cnt;
 		}
 
-		len += sg_dma_len(&scat[i]);
+		len += dma_len;
 	}
 
 	page_cnt += len >> dev->fmr_page_shift;
@@ -630,10 +632,14 @@ static int srp_map_fmr(struct srp_target
 		return -ENOMEM;
 
 	page_cnt = 0;
-	for (i = 0; i < sg_cnt; ++i)
-		for (j = 0; j < sg_dma_len(&scat[i]); j += dev->fmr_page_size)
+	for (i = 0; i < sg_cnt; ++i) {
+		unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]);
+
+		for (j = 0; j < dma_len; j += dev->fmr_page_size)
 			dma_pages[page_cnt++] =
-				(sg_dma_address(&scat[i]) & dev->fmr_page_mask) + j;
+				(ib_sg_dma_address(ibdev, &scat[i]) &
+				 dev->fmr_page_mask) + j;
+	}
 
 	req->fmr = ib_fmr_pool_map_phys(dev->fmr_pool,
 					dma_pages, page_cnt, io_addr);
@@ -643,7 +649,8 @@ static int srp_map_fmr(struct srp_target
 		goto out;
 	}
 
-	buf->va  = cpu_to_be64(sg_dma_address(&scat[0]) & ~dev->fmr_page_mask);
+	buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, &scat[0]) &
+			       ~dev->fmr_page_mask);
 	buf->key = cpu_to_be32(req->fmr->fmr->rkey);
 	buf->len = cpu_to_be32(len);
 
@@ -662,6 +669,8 @@ static int srp_map_data(struct scsi_cmnd
 	struct srp_cmd *cmd = req->cmd->buf;
 	int len, nents, count;
 	u8 fmt = SRP_DATA_DESC_DIRECT;
+	struct srp_device *dev;
+	struct ib_device *ibdev;
 
 	if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE)
 		return sizeof (struct srp_cmd);
@@ -686,8 +695,10 @@ static int srp_map_data(struct scsi_cmnd
 		sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen);
 	}
 
-	count = dma_map_sg(target->srp_host->dev->dev->dma_device,
-			   scat, nents, scmnd->sc_data_direction);
+	dev = target->srp_host->dev;
+	ibdev = dev->dev;
+
+	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
 
 	fmt = SRP_DATA_DESC_DIRECT;
 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
@@ -701,9 +712,9 @@ static int srp_map_data(struct scsi_cmnd
 		 */
 		struct srp_direct_buf *buf = (void *) cmd->add_data;
 
-		buf->va  = cpu_to_be64(sg_dma_address(scat));
-		buf->key = cpu_to_be32(target->srp_host->dev->mr->rkey);
-		buf->len = cpu_to_be32(sg_dma_len(scat));
+		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
+		buf->key = cpu_to_be32(dev->mr->rkey);
+		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
 	} else if (srp_map_fmr(target, scat, count, req,
 			       (void *) cmd->add_data)) {
 		/*
@@ -721,13 +732,14 @@ static int srp_map_data(struct scsi_cmnd
 			count * sizeof (struct srp_direct_buf);
 
 		for (i = 0; i < count; ++i) {
+			unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]);
+
 			buf->desc_list[i].va  =
-				cpu_to_be64(sg_dma_address(&scat[i]));
+				cpu_to_be64(ib_sg_dma_address(ibdev, &scat[i]));
 			buf->desc_list[i].key =
-				cpu_to_be32(target->srp_host->dev->mr->rkey);
-			buf->desc_list[i].len =
-				cpu_to_be32(sg_dma_len(&scat[i]));
-			datalen += sg_dma_len(&scat[i]);
+				cpu_to_be32(dev->mr->rkey);
+			buf->desc_list[i].len = cpu_to_be32(dma_len);
+			datalen += dma_len;
 		}
 
 		if (scmnd->sc_data_direction == DMA_TO_DEVICE)
@@ -807,13 +819,15 @@ static void srp_process_rsp(struct srp_t
 
 static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 {
+	struct ib_device *dev;
 	struct srp_iu *iu;
 	u8 opcode;
 
 	iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV];
 
-	dma_sync_single_for_cpu(target->srp_host->dev->dev->dma_device, iu->dma,
-				target->max_ti_iu_len, DMA_FROM_DEVICE);
+	dev = target->srp_host->dev->dev;
+	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len,
+				   DMA_FROM_DEVICE);
 
 	opcode = *(u8 *) iu->buf;
 
@@ -849,8 +863,8 @@ static void srp_handle_recv(struct srp_t
 		break;
 	}
 
-	dma_sync_single_for_device(target->srp_host->dev->dev->dma_device, iu->dma,
-				   target->max_ti_iu_len, DMA_FROM_DEVICE);
+	ib_dma_sync_single_for_device(dev, iu->dma, target->max_ti_iu_len,
+				      DMA_FROM_DEVICE);
 }
 
 static void srp_completion(struct ib_cq *cq, void *target_ptr)
@@ -864,7 +878,7 @@ static void srp_completion(struct ib_cq 
 			printk(KERN_ERR PFX "failed %s status %d\n",
 			       wc.wr_id & SRP_OP_RECV ? "receive" : "send",
 			       wc.status);
-			target->need_reset = 1;
+			target->qp_in_error = 1;
 			break;
 		}
 
@@ -969,6 +983,7 @@ static int srp_queuecommand(struct scsi_
 	struct srp_request *req;
 	struct srp_iu *iu;
 	struct srp_cmd *cmd;
+	struct ib_device *dev;
 	int len;
 
 	if (target->state == SRP_TARGET_CONNECTING)
@@ -985,8 +1000,9 @@ static int srp_queuecommand(struct scsi_
 	if (!iu)
 		goto err;
 
-	dma_sync_single_for_cpu(target->srp_host->dev->dev->dma_device, iu->dma,
-				srp_max_iu_len, DMA_TO_DEVICE);
+	dev = target->srp_host->dev->dev;
+	ib_dma_sync_single_for_cpu(dev, iu->dma, srp_max_iu_len,
+				   DMA_TO_DEVICE);
 
 	req = list_entry(target->free_reqs.next, struct srp_request, list);
 
@@ -1018,8 +1034,8 @@ static int srp_queuecommand(struct scsi_
 		goto err_unmap;
 	}
 
-	dma_sync_single_for_device(target->srp_host->dev->dev->dma_device, iu->dma,
-				   srp_max_iu_len, DMA_TO_DEVICE);
+	ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len,
+				      DMA_TO_DEVICE);
 
 	if (__srp_post_send(target, iu, len)) {
 		printk(KERN_ERR PFX "Send failed\n");
@@ -1086,6 +1102,7 @@ static void srp_cm_rej_handler(struct ib
 		target->path.dlid = cpi->redirect_lid;
 		target->path.pkey = cpi->redirect_pkey;
 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
+		memcpy(target->orig_dgid, target->path.dgid.raw, 16);
 		memcpy(target->path.dgid.raw, cpi->redirect_gid, 16);
 
 		target->status = target->path.dlid ?
@@ -1100,6 +1117,8 @@ static void srp_cm_rej_handler(struct ib
 			 * reject reason code 25 when they mean 24
 			 * (port redirect).
 			 */
+			memcpy(target->orig_dgid,
+			       target->path.dgid.raw, 16);
 			memcpy(target->path.dgid.raw,
 			       event->param.rej_rcvd.ari, 16);
 
@@ -1177,9 +1196,11 @@ static int srp_cm_handler(struct ib_cm_i
 			break;
 		}
 
-		target->status = srp_alloc_iu_bufs(target);
-		if (target->status)
-			break;
+		if (!target->rx_ring[0]) {
+			target->status = srp_alloc_iu_bufs(target);
+			if (target->status)
+				break;
+		}
 
 		qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
 		if (!qp_attr) {
@@ -1320,7 +1341,7 @@ static int srp_abort(struct scsi_cmnd *s
 
 	printk(KERN_ERR "SRP abort called\n");
 
-	if (target->need_reset)
+	if (target->qp_in_error)
 		return FAILED;
 	if (srp_find_req(target, scmnd, &req))
 		return FAILED;
@@ -1350,7 +1371,7 @@ static int srp_reset_device(struct scsi_
 
 	printk(KERN_ERR "SRP reset_device called\n");
 
-	if (target->need_reset)
+	if (target->qp_in_error)
 		return FAILED;
 	if (srp_find_req(target, scmnd, &req))
 		return FAILED;
@@ -1430,6 +1451,24 @@ static ssize_t show_pkey(struct class_de
 	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
 }
 
+static ssize_t show_orig_dgid(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	if (target->state == SRP_TARGET_DEAD ||
+	    target->state == SRP_TARGET_REMOVED)
+		return -ENODEV;
+
+	return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+		       be16_to_cpu(((__be16 *) target->orig_dgid)[0]),
+		       be16_to_cpu(((__be16 *) target->orig_dgid)[1]),
+		       be16_to_cpu(((__be16 *) target->orig_dgid)[2]),
+		       be16_to_cpu(((__be16 *) target->orig_dgid)[3]),
+		       be16_to_cpu(((__be16 *) target->orig_dgid)[4]),
+		       be16_to_cpu(((__be16 *) target->orig_dgid)[5]),
+		       be16_to_cpu(((__be16 *) target->orig_dgid)[6]),
+		       be16_to_cpu(((__be16 *) target->orig_dgid)[7]));
+}
 static ssize_t show_dgid(struct class_device *cdev, char *buf)
 {
 	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
@@ -1474,12 +1513,12 @@ static ssize_t show_local_ib_device(stru
 	return sprintf(buf, "%s\n", target->srp_host->dev->dev->name);
 }
 
-
 static CLASS_DEVICE_ATTR(id_ext,	  S_IRUGO, show_id_ext,		 NULL);
 static CLASS_DEVICE_ATTR(ioc_guid,	  S_IRUGO, show_ioc_guid,	 NULL);
 static CLASS_DEVICE_ATTR(service_id,	  S_IRUGO, show_service_id,	 NULL);
 static CLASS_DEVICE_ATTR(pkey,		  S_IRUGO, show_pkey,		 NULL);
 static CLASS_DEVICE_ATTR(dgid,		  S_IRUGO, show_dgid,		 NULL);
+static CLASS_DEVICE_ATTR(orig_dgid,	  S_IRUGO, show_orig_dgid,	 NULL);
 static CLASS_DEVICE_ATTR(zero_req_lim,	  S_IRUGO, show_zero_req_lim,	 NULL);
 static CLASS_DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,	 NULL);
 static CLASS_DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
@@ -1490,6 +1529,7 @@ static struct class_device_attribute *sr
 	&class_device_attr_service_id,
 	&class_device_attr_pkey,
 	&class_device_attr_dgid,
+	&class_device_attr_orig_dgid,
 	&class_device_attr_zero_req_lim,
 	&class_device_attr_local_ib_port,
 	&class_device_attr_local_ib_device,
@@ -1609,18 +1649,30 @@ static int srp_parse_options(const char 
 		switch (token) {
 		case SRP_OPT_ID_EXT:
 			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
 			kfree(p);
 			break;
 
 		case SRP_OPT_IOC_GUID:
 			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
 			kfree(p);
 			break;
 
 		case SRP_OPT_DGID:
 			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			if (strlen(p) != 32) {
 				printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p);
 				kfree(p);
@@ -1644,6 +1696,10 @@ static int srp_parse_options(const char 
 
 		case SRP_OPT_SERVICE_ID:
 			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
 			kfree(p);
 			break;
@@ -1681,6 +1737,10 @@ static int srp_parse_options(const char 
 
 		case SRP_OPT_INITIATOR_EXT:
 			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
 			kfree(p);
 			break;
@@ -1722,10 +1782,10 @@ static ssize_t srp_create_target(struct 
 	if (!target_host)
 		return -ENOMEM;
 
-	target_host->max_lun = SRP_MAX_LUN;
+	target_host->max_lun     = SRP_MAX_LUN;
+	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
 
 	target = host_to_target(target_host);
-	memset(target, 0, sizeof *target);
 
 	target->io_class   = SRP_REV16A_IB_IO_CLASS;
 	target->scsi_host  = target_host;
@@ -1759,6 +1819,7 @@ static ssize_t srp_create_target(struct 
 	       (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[12]),
 	       (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[14]));
 
+	memcpy(target->orig_dgid, target->path.dgid.raw, 16);
 	ret = srp_create_target_ib(target);
 	if (ret)
 		goto err;
@@ -1769,7 +1830,7 @@ static ssize_t srp_create_target(struct 
 		goto err_free;
 	}
 
-	target->need_reset = 0;
+	target->qp_in_error = 0;
 	ret = srp_connect_target(target);
 	if (ret) {
 		printk(KERN_ERR PFX "Connection failed\n");
@@ -1887,7 +1948,7 @@ static void srp_add_one(struct ib_device
 	 */
 	srp_dev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1);
 	srp_dev->fmr_page_size  = 1 << srp_dev->fmr_page_shift;
-	srp_dev->fmr_page_mask  = ~((unsigned long) srp_dev->fmr_page_size - 1);
+	srp_dev->fmr_page_mask  = ~((u64) srp_dev->fmr_page_size - 1);
 
 	INIT_LIST_HEAD(&srp_dev->dev_list);
 
@@ -1917,7 +1978,7 @@ static void srp_add_one(struct ib_device
 	if (IS_ERR(srp_dev->fmr_pool))
 		srp_dev->fmr_pool = NULL;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
@@ -2016,9 +2077,12 @@ static int __init srp_init_module(void)
 		return ret;
 	}
 
+	ib_sa_register_client(&srp_sa_client);
+
 	ret = ib_register_client(&srp_client);
 	if (ret) {
 		printk(KERN_ERR PFX "couldn't register IB client\n");
+		ib_sa_unregister_client(&srp_sa_client);
 		class_unregister(&srp_class);
 		return ret;
 	}
@@ -2029,6 +2093,7 @@ static int __init srp_init_module(void)
 static void __exit srp_cleanup_module(void)
 {
 	ib_unregister_client(&srp_client);
+	ib_sa_unregister_client(&srp_sa_client);
 	class_unregister(&srp_class);
 }
 
--- linux-2.6.18.noarch/drivers/infiniband/ulp/srp/ib_srp.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/srp/ib_srp.h
@@ -87,7 +87,7 @@ struct srp_device {
 	struct ib_fmr_pool     *fmr_pool;
 	int			fmr_page_shift;
 	int			fmr_page_size;
-	unsigned long		fmr_page_mask;
+	u64			fmr_page_mask;
 };
 
 struct srp_host {
@@ -129,6 +129,7 @@ struct srp_target_port {
 	unsigned int		scsi_id;
 
 	struct ib_sa_path_rec	path;
+	u8			orig_dgid[16];
 	struct ib_sa_query     *path_query;
 	int			path_query_id;
 
@@ -158,11 +159,11 @@ struct srp_target_port {
 	struct completion	done;
 	int			status;
 	enum srp_target_state	state;
-	int			need_reset;
+	int			qp_in_error;
 };
 
 struct srp_iu {
-	dma_addr_t		dma;
+	u64			dma;
 	void		       *buf;
 	size_t			size;
 	enum dma_data_direction	direction;
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/Kconfig
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/Kconfig
@@ -0,0 +1,28 @@
+config INFINIBAND_VNIC
+	tristate "VNIC - Support for QLogic Virtual Ethernet I/O Controller"
+	depends on INFINIBAND && NETDEVICES && INET
+	---help---
+	  Support for the QLogic Virtual Ethernet I/O Controller
+	  (VEx). In conjunction with the VEx, this provides virtual
+	  ethernet interfaces and transports ethernet packets over
+	  InfiniBand so that you can communicate with Ethernet networks
+	  using your IB device.
+
+config INFINIBAND_VNIC_DEBUG
+	bool "VNIC Verbose debugging"
+	depends on INFINIBAND_VNIC
+	default n
+	---help---
+	  This option causes verbose debugging code to be compiled
+	  into the VNIC driver.  The output can be turned on via the
+	  vnic_debug module parameter.
+
+config INFINIBAND_VNIC_STATS
+	bool "VNIC Statistics"
+	depends on INFINIBAND_VNIC
+	default n
+	---help---
+	  This option compiles statistics collecting code into the
+	  data path of the VNIC driver to help in profiling and fine
+	  tuning. This adds some overhead in the interest of gathering
+	  data.
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/Makefile
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_INFINIBAND_VNIC)			+= ib_vnic.o
+
+ib_vnic-y					:= vnic_main.o \
+						   vnic_ib.o \
+						   vnic_viport.o \
+						   vnic_control.o \
+						   vnic_data.o \
+						   vnic_netpath.o \
+						   vnic_config.o \
+						   vnic_sys.o
+
+ib_vnic-$(CONFIG_INFINIBAND_VNIC_STATS)		+= vnic_stats.o
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_config.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_config.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/utsname.h>
+#include <linux/if_vlan.h>
+
+#include <rdma/ib_cache.h>
+
+#include "vnic_util.h"
+#include "vnic_config.h"
+#include "vnic_trailer.h"
+
+#define	SST_AGN		0x10ULL
+#define	SST_OUI		0x00066AULL
+
+enum {
+	CONTROL_PATH_ID	= 0x0,
+	DATA_PATH_ID	= 0x1
+};
+
+#define IOC_NUMBER(GUID)	(((GUID) >> 32) & 0xFF)
+
+static u16 max_mtu = MAX_MTU;
+
+static u32 default_no_path_timeout = DEFAULT_NO_PATH_TIMEOUT;
+static u32 sa_path_rec_get_timeout = SA_PATH_REC_GET_TIMEOUT;
+
+static u32 default_primary_reconnect_timeout =
+				    DEFAULT_PRIMARY_RECONNECT_TIMEOUT;
+static u32 default_primary_switch_timeout = DEFAULT_PRIMARY_SWITCH_TIMEOUT;
+static int default_prefer_primary         = DEFAULT_PREFER_PRIMARY;
+
+static int use_rx_csum = VNIC_USE_RX_CSUM;
+static int use_tx_csum = VNIC_USE_TX_CSUM;
+
+module_param(max_mtu, ushort, 0444);
+MODULE_PARM_DESC(max_mtu, "Maximum MTU size (1500-9500). Default is 9500");
+
+module_param(default_prefer_primary, bool, 0444);
+MODULE_PARM_DESC(default_prefer_primary, "Determines if primary path is"
+		 " preferred (1) or not (0). Defaults to 0");
+module_param(use_rx_csum, bool, 0444);
+MODULE_PARM_DESC(use_rx_csum, "Determines if RX checksum is done on VEx (1)"
+		 " or not (0). Defaults to 1");
+module_param(use_tx_csum, bool, 0444);
+MODULE_PARM_DESC(use_tx_csum, "Determines if TX checksum is done on VEx (1)"
+		 " or not (0). Defaults to 1");
+module_param(default_no_path_timeout, uint, 0444);
+MODULE_PARM_DESC(default_no_path_timeout, "Time to wait in milliseconds"
+		 " before reconnecting to VEx after connection loss");
+module_param(default_primary_reconnect_timeout, uint, 0444);
+MODULE_PARM_DESC(default_primary_reconnect_timeout,  "Time to wait in"
+		 " milliseconds before reconnecting the"
+		 " primary path to VEx");
+module_param(default_primary_switch_timeout, uint, 0444);
+MODULE_PARM_DESC(default_primary_switch_timeout, "Time to wait before"
+		 " switching back to primary path if"
+		 " primary path is preferred");
+module_param(sa_path_rec_get_timeout, uint, 0444);
+MODULE_PARM_DESC(sa_path_rec_get_timeout, "Time out value in milliseconds"
+		 " for SA path record get queries");
+
+static void config_control_defaults(struct control_config *control_config,
+				    struct path_param *params)
+{
+	int len;
+	char *dot;
+	u64 sid;
+
+	sid = (SST_AGN << 56) | (SST_OUI << 32) | (CONTROL_PATH_ID << 8)
+	      |	IOC_NUMBER(be64_to_cpu(params->ioc_guid));
+
+	control_config->ib_config.service_id = cpu_to_be64(sid);
+	control_config->ib_config.conn_data.path_id = 0;
+	control_config->ib_config.conn_data.vnic_instance = params->instance;
+	control_config->ib_config.conn_data.path_num = 0;
+	dot = strchr(init_utsname()->nodename, '.');
+
+	if (dot)
+		len = dot - init_utsname()->nodename;
+	else
+		len = strlen(init_utsname()->nodename);
+
+	if (len > VNIC_MAX_NODENAME_LEN)
+		len = VNIC_MAX_NODENAME_LEN;
+
+	memcpy(control_config->ib_config.conn_data.nodename,
+	       init_utsname()->nodename, len);
+
+	control_config->ib_config.retry_count = RETRY_COUNT;
+	control_config->ib_config.rnr_retry_count = RETRY_COUNT;
+	control_config->ib_config.min_rnr_timer = MIN_RNR_TIMER;
+
+	/* These values are not configurable*/
+	control_config->ib_config.num_recvs    = 5;
+	control_config->ib_config.num_sends    = 1;
+	control_config->ib_config.recv_scatter = 1;
+	control_config->ib_config.send_gather  = 1;
+
+	control_config->num_recvs = control_config->ib_config.num_recvs;
+
+	control_config->vnic_instance = params->instance;
+	control_config->max_address_entries = MAX_ADDRESS_ENTRIES;
+	control_config->min_address_entries = MIN_ADDRESS_ENTRIES;
+	control_config->req_retry_count = CONTROL_REQ_RETRY_COUNT;
+	control_config->rsp_timeout = msecs_to_jiffies(CONTROL_RSP_TIMEOUT);
+}
+
+static void config_data_defaults(struct data_config *data_config,
+				 struct path_param *params)
+{
+	u64 sid;
+
+	sid = (SST_AGN << 56) | (SST_OUI << 32) | (DATA_PATH_ID << 8)
+	      |	IOC_NUMBER(be64_to_cpu(params->ioc_guid));
+
+	data_config->ib_config.service_id = cpu_to_be64(sid);
+	data_config->ib_config.conn_data.path_id = jiffies; /* random */
+	data_config->ib_config.conn_data.vnic_instance = params->instance;
+	data_config->ib_config.conn_data.path_num = 0;
+
+	data_config->ib_config.retry_count = RETRY_COUNT;
+	data_config->ib_config.rnr_retry_count = RETRY_COUNT;
+	data_config->ib_config.min_rnr_timer = MIN_RNR_TIMER;
+
+	/*
+	 * NOTE: the num_recvs size assumes that the EIOC could
+	 * RDMA enough packets to fill all of the host recv
+	 * pool entries, plus send a kick message after each
+	 * packet, plus RDMA new buffers for the size of
+	 * the EIOC recv buffer pool, plus send kick messages
+	 * after each min_host_update_sz of new buffers all
+	 * before the host can even pull off the first completed
+	 * receive off the completion queue, and repost the
+	 * receive. NOT LIKELY!
+	 */
+	data_config->ib_config.num_recvs = HOST_RECV_POOL_ENTRIES +
+	    (MAX_EIOC_POOL_SZ / MIN_HOST_UPDATE_SZ);
+
+	data_config->ib_config.num_sends = (2 * NOTIFY_BUNDLE_SZ) +
+	    (HOST_RECV_POOL_ENTRIES / MIN_EIOC_UPDATE_SZ) + 1;
+
+	data_config->ib_config.recv_scatter = 1; /* not configurable */
+	data_config->ib_config.send_gather = 2;	 /* not configurable */
+
+	data_config->num_recvs = data_config->ib_config.num_recvs;
+	data_config->path_id = data_config->ib_config.conn_data.path_id;
+
+
+	data_config->host_recv_pool_entries = HOST_RECV_POOL_ENTRIES;
+
+	data_config->host_min.size_recv_pool_entry =
+			cpu_to_be32(BUFFER_SIZE(VLAN_ETH_HLEN + MIN_MTU));
+	data_config->host_max.size_recv_pool_entry =
+			cpu_to_be32(BUFFER_SIZE(VLAN_ETH_HLEN + max_mtu));
+	data_config->eioc_min.size_recv_pool_entry =
+			cpu_to_be32(BUFFER_SIZE(VLAN_ETH_HLEN + MIN_MTU));
+	data_config->eioc_max.size_recv_pool_entry =
+			__constant_cpu_to_be32(MAX_PARAM_VALUE);
+
+	data_config->host_min.num_recv_pool_entries =
+				__constant_cpu_to_be32(MIN_HOST_POOL_SZ);
+	data_config->host_max.num_recv_pool_entries =
+				__constant_cpu_to_be32(MAX_PARAM_VALUE);
+	data_config->eioc_min.num_recv_pool_entries =
+				__constant_cpu_to_be32(MIN_EIOC_POOL_SZ);
+	data_config->eioc_max.num_recv_pool_entries =
+				__constant_cpu_to_be32(MAX_EIOC_POOL_SZ);
+
+	data_config->host_min.timeout_before_kick =
+			__constant_cpu_to_be32(MIN_HOST_KICK_TIMEOUT);
+	data_config->host_max.timeout_before_kick =
+			__constant_cpu_to_be32(MAX_HOST_KICK_TIMEOUT);
+	data_config->eioc_min.timeout_before_kick = 0;
+	data_config->eioc_max.timeout_before_kick =
+			__constant_cpu_to_be32(MAX_PARAM_VALUE);
+
+	data_config->host_min.num_recv_pool_entries_before_kick =
+			__constant_cpu_to_be32(MIN_HOST_KICK_ENTRIES);
+	data_config->host_max.num_recv_pool_entries_before_kick =
+			__constant_cpu_to_be32(MAX_HOST_KICK_ENTRIES);
+	data_config->eioc_min.num_recv_pool_entries_before_kick = 0;
+	data_config->eioc_max.num_recv_pool_entries_before_kick =
+				__constant_cpu_to_be32(MAX_PARAM_VALUE);
+
+	data_config->host_min.num_recv_pool_bytes_before_kick =
+			__constant_cpu_to_be32(MIN_HOST_KICK_BYTES);
+	data_config->host_max.num_recv_pool_bytes_before_kick =
+			__constant_cpu_to_be32(MAX_HOST_KICK_BYTES);
+	data_config->eioc_min.num_recv_pool_bytes_before_kick = 0;
+	data_config->eioc_max.num_recv_pool_bytes_before_kick =
+				__constant_cpu_to_be32(MAX_PARAM_VALUE);
+
+	data_config->host_min.free_recv_pool_entries_per_update =
+				__constant_cpu_to_be32(MIN_HOST_UPDATE_SZ);
+	data_config->host_max.free_recv_pool_entries_per_update =
+				__constant_cpu_to_be32(MAX_HOST_UPDATE_SZ);
+	data_config->eioc_min.free_recv_pool_entries_per_update =
+				__constant_cpu_to_be32(MIN_EIOC_UPDATE_SZ);
+	data_config->eioc_max.free_recv_pool_entries_per_update =
+				__constant_cpu_to_be32(MAX_EIOC_UPDATE_SZ);
+
+	data_config->notify_bundle = NOTIFY_BUNDLE_SZ;
+}
+
+static void config_path_info_defaults(struct viport_config *config,
+				      struct path_param *params)
+{
+	int i;
+	ib_get_cached_gid(config->ibdev, config->port, 0,
+			  &config->path_info.path.sgid);
+	for (i = 0; i < 16; i++) {
+		config->path_info.path.dgid.raw[i] = params->dgid[i];
+	}
+	config->path_info.path.pkey = params->pkey;
+	config->path_info.path.numb_path = 1;
+	config->sa_path_rec_get_timeout = sa_path_rec_get_timeout;
+
+}
+
+static void config_viport_defaults(struct viport_config *config,
+				      struct path_param *params)
+{
+	config->ibdev = params->ibdev;
+	config->port = params->port;
+	config->ioc_guid = params->ioc_guid;
+	config->stats_interval = msecs_to_jiffies(VIPORT_STATS_INTERVAL);
+	config->hb_interval = msecs_to_jiffies(VIPORT_HEARTBEAT_INTERVAL);
+	config->hb_timeout = VIPORT_HEARTBEAT_TIMEOUT * 1000;
+				/*hb_timeout needs to be in usec*/
+	config_path_info_defaults(config, params);
+
+	config_control_defaults(&config->control_config, params);
+	config_data_defaults(&config->data_config, params);
+}
+
+static void config_vnic_defaults(struct vnic_config *config)
+{
+	config->no_path_timeout = msecs_to_jiffies(default_no_path_timeout);
+	config->primary_connect_timeout =
+	    msecs_to_jiffies(DEFAULT_PRIMARY_CONNECT_TIMEOUT);
+	config->primary_reconnect_timeout =
+	    msecs_to_jiffies(default_primary_reconnect_timeout);
+	config->primary_switch_timeout =
+	    msecs_to_jiffies(default_primary_switch_timeout);
+	config->prefer_primary = default_prefer_primary;
+	config->use_rx_csum = use_rx_csum;
+	config->use_tx_csum = use_tx_csum;
+}
+
+struct viport_config *config_alloc_viport(struct path_param *params)
+{
+	struct viport_config *config;
+
+	config = kzalloc(sizeof *config, GFP_KERNEL);
+	if (!config) {
+		CONFIG_ERROR("could not allocate memory for"
+			     " struct viport_config\n");
+		return NULL;
+	}
+
+	config_viport_defaults(config, params);
+
+	return config;
+}
+
+struct vnic_config *config_alloc_vnic(void)
+{
+	struct vnic_config *config;
+
+	config = kzalloc(sizeof *config, GFP_KERNEL);
+	if (!config) {
+		CONFIG_ERROR("couldn't allocate memory for"
+			     " struct vnic_config\n");
+
+		return NULL;
+	}
+
+	config_vnic_defaults(config);
+	return config;
+}
+
+char *config_viport_name(struct viport_config *config)
+{
+	/* function only called by one thread, can return a static string */
+	static char str[64];
+
+	sprintf(str, "GUID %llx instance %d",
+		be64_to_cpu(config->ioc_guid),
+		config->control_config.vnic_instance);
+	return str;
+}
+
+int config_start(void)
+{
+	max_mtu = min_t(u16, max_mtu, MAX_MTU);
+	max_mtu = max_t(u16, max_mtu, MIN_MTU);
+
+	sa_path_rec_get_timeout = min_t(u32, sa_path_rec_get_timeout,
+					MAX_SA_TIMEOUT);
+	sa_path_rec_get_timeout = max_t(u32, sa_path_rec_get_timeout,
+					MIN_SA_TIMEOUT);
+
+	if (!default_no_path_timeout)
+		default_no_path_timeout = DEFAULT_NO_PATH_TIMEOUT;
+
+	if (!default_primary_reconnect_timeout)
+		default_primary_reconnect_timeout =
+					 DEFAULT_PRIMARY_RECONNECT_TIMEOUT;
+
+	if (!default_primary_switch_timeout)
+		default_primary_switch_timeout =
+					DEFAULT_PRIMARY_SWITCH_TIMEOUT;
+
+	return 0;
+
+}
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_config.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_config.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_CONFIG_H_INCLUDED
+#define VNIC_CONFIG_H_INCLUDED
+
+#include <rdma/ib_verbs.h>
+#include <linux/types.h>
+#include <linux/if.h>
+
+#include "vnic_control.h"
+#include "vnic_ib.h"
+
+enum {
+	VNIC_CLASS_SUBCLASS	= 0x2000066A,
+	VNIC_PROTOCOL		= 0,
+	VNIC_PROT_VERSION	= 1
+};
+
+enum {
+	MIN_MTU	= 1500,	/* minimum negotiated MTU size */
+	MAX_MTU	= 9500	/* jumbo frame */
+};
+
+/*
+ * TODO: tune the pool parameter values
+ */
+enum {
+	MIN_ADDRESS_ENTRIES = 16,
+	MAX_ADDRESS_ENTRIES = 64
+};
+
+enum {
+	HOST_RECV_POOL_ENTRIES	= 512,
+	MIN_HOST_POOL_SZ	= 64,
+	MIN_EIOC_POOL_SZ	= 64,
+	MAX_EIOC_POOL_SZ	= 256,
+	MIN_HOST_UPDATE_SZ	= 8,
+	MAX_HOST_UPDATE_SZ	= 32,
+	MIN_EIOC_UPDATE_SZ	= 8,
+	MAX_EIOC_UPDATE_SZ	= 32,
+	NOTIFY_BUNDLE_SZ	= 32
+};
+
+enum {
+	MIN_HOST_KICK_TIMEOUT = 10,	/* in usec */
+	MAX_HOST_KICK_TIMEOUT = 100	/* in usec */
+};
+
+enum {
+	MIN_HOST_KICK_ENTRIES = 1,
+	MAX_HOST_KICK_ENTRIES = 128
+};
+
+enum {
+	MIN_HOST_KICK_BYTES = 0,
+	MAX_HOST_KICK_BYTES = 5000
+};
+
+enum {
+	DEFAULT_NO_PATH_TIMEOUT			= 10000,
+	DEFAULT_PRIMARY_CONNECT_TIMEOUT		= 10000,
+	DEFAULT_PRIMARY_RECONNECT_TIMEOUT	= 10000,
+	DEFAULT_PRIMARY_SWITCH_TIMEOUT		= 10000
+};
+
+enum {
+	VIPORT_STATS_INTERVAL		= 500,	/* .5 sec */
+	VIPORT_HEARTBEAT_INTERVAL	= 1000,	/* 1 second */
+	VIPORT_HEARTBEAT_TIMEOUT	= 64000	/* 64 sec */
+};
+
+enum {
+	CONTROL_RSP_TIMEOUT		= 1000	/* 1 sec */
+};
+
+/* infiniband connection parameters */
+enum {
+	RETRY_COUNT		= 3,
+	MIN_RNR_TIMER		= 22,	/* 20 ms */
+	DEFAULT_PKEY		= 0	/* pkey table index */
+};
+
+enum {
+	SA_PATH_REC_GET_TIMEOUT	= 1000,	/* 1000 ms */
+	MIN_SA_TIMEOUT		= 100,	/* 100 ms */
+	MAX_SA_TIMEOUT		= 20000	/* 20s */
+};
+
+#define MAX_PARAM_VALUE                 0x40000000
+#define VNIC_USE_RX_CSUM		1
+#define VNIC_USE_TX_CSUM		1
+#define	DEFAULT_PREFER_PRIMARY		0
+#define	CONTROL_REQ_RETRY_COUNT		4
+
+struct path_param {
+	__be64			ioc_guid;
+	u8			port;
+	u8			instance;
+	struct ib_device	*ibdev;
+	struct vnic_ib_port	*ibport;
+	char			name[IFNAMSIZ];
+	u8			dgid[16];
+	__be16			pkey;
+	int			rx_csum;
+	int			tx_csum;
+	int			heartbeat;
+};
+
+struct vnic_ib_config {
+	__be64				service_id;
+	struct vnic_connection_data	conn_data;
+	u32				retry_count;
+	u32				rnr_retry_count;
+	u8				min_rnr_timer;
+	u32				num_sends;
+	u32				num_recvs;
+	u32				recv_scatter;	/* 1 */
+	u32				send_gather;	/* 1 or 2 */
+};
+
+struct control_config {
+	struct vnic_ib_config	ib_config;
+	u32			num_recvs;
+	u8			vnic_instance;
+	u16			max_address_entries;
+	u16			min_address_entries;
+	u32			rsp_timeout;
+	u8			req_retry_count;
+};
+
+struct data_config {
+	struct vnic_ib_config		ib_config;
+	u64				path_id;
+	u32				num_recvs;
+	u32				host_recv_pool_entries;
+	struct vnic_recv_pool_config	host_min;
+	struct vnic_recv_pool_config	host_max;
+	struct vnic_recv_pool_config	eioc_min;
+	struct vnic_recv_pool_config	eioc_max;
+	u32				notify_bundle;
+};
+
+struct viport_config {
+	struct viport			*viport;
+	struct control_config		control_config;
+	struct data_config		data_config;
+	struct vnic_ib_path_info	path_info;
+	u32				sa_path_rec_get_timeout;
+	struct ib_device		*ibdev;
+	u32				port;
+	u32				stats_interval;
+	u32				hb_interval;
+	u32				hb_timeout;
+	__be64				ioc_guid;
+	size_t				path_idx;
+};
+
+/*
+ * primary_connect_timeout   - if the secondary connects first,
+ *                             how long do we give the primary?
+ * primary_reconnect_timeout - same as above, but used when recovering
+ *                             from the case where both paths fail
+ * primary_switch_timeout -    how long do we wait before switching to the
+ *                             primary when it comes back?
+ */
+struct vnic_config {
+	struct vnic	*vnic;
+	char		name[IFNAMSIZ];
+	u32		no_path_timeout;
+	u32 		primary_connect_timeout;
+	u32		primary_reconnect_timeout;
+	u32		primary_switch_timeout;
+	int		prefer_primary;
+	int		use_rx_csum;
+	int		use_tx_csum;
+};
+
+int config_start(void);
+struct viport_config *config_alloc_viport(struct path_param *params);
+struct vnic_config   *config_alloc_vnic(void);
+char *config_viport_name(struct viport_config *config);
+
+#endif	/* VNIC_CONFIG_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_control.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_control.c
@@ -0,0 +1,1952 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+
+#include "vnic_util.h"
+#include "vnic_main.h"
+#include "vnic_viport.h"
+#include "vnic_control.h"
+#include "vnic_config.h"
+#include "vnic_control_pkt.h"
+#include "vnic_stats.h"
+
+static void control_log_control_packet(struct vnic_control_packet *pkt);
+
+static inline char *control_ifcfg_name(struct control *control)
+{
+	if (!control)
+		return "nctl";
+	if (!control->parent)
+		return "np";
+	if (!control->parent->parent)
+		return "npp";
+	if (!control->parent->parent->parent)
+		return "nppp";
+	if (!control->parent->parent->parent->config)
+		return "npppc";
+	return (control->parent->parent->parent->config->name);
+}
+
+static void control_recv(struct control *control, struct recv_io *recv_io)
+{
+	if (vnic_ib_post_recv(&control->ib_conn, &recv_io->io))
+		viport_failure(control->parent);
+}
+
+static void control_recv_complete(struct io *io)
+{
+	struct recv_io			*recv_io = (struct recv_io *)io;
+	struct recv_io			*last_recv_io;
+	struct control			*control = &io->viport->control;
+	struct vnic_control_packet	*pkt = control_packet(recv_io);
+	struct vnic_control_header	*c_hdr = &pkt->hdr;
+	unsigned long			flags;
+	cycles_t			response_time;
+
+	CONTROL_FUNCTION("%s: control_recv_complete()\n",
+			 control_ifcfg_name(control));
+
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+	control_note_rsptime_stats(&response_time);
+	CONTROL_PACKET(pkt);
+	spin_lock_irqsave(&control->io_lock, flags);
+	if (c_hdr->pkt_type == TYPE_INFO) {
+		last_recv_io = control->info;
+		control->info = recv_io;
+		spin_unlock_irqrestore(&control->io_lock, flags);
+		viport_kick(control->parent);
+		if (last_recv_io)
+			control_recv(control, last_recv_io);
+	} else if (c_hdr->pkt_type == TYPE_RSP) {
+		if (control->rsp_expected
+		    && (c_hdr->pkt_seq_num == control->seq_num)) {
+			control->response = recv_io;
+			control->rsp_expected = 0;
+			spin_unlock_irqrestore(&control->io_lock, flags);
+			control_update_rsptime_stats(control,
+						     response_time);
+			viport_kick(control->parent);
+		} else {
+			spin_unlock_irqrestore(&control->io_lock, flags);
+			control_recv(control, recv_io);
+		}
+	} else {
+		list_add_tail(&recv_io->io.list_ptrs,
+			      &control->failure_list);
+		spin_unlock_irqrestore(&control->io_lock, flags);
+		viport_kick(control->parent);
+	}
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+}
+
+static void control_timeout(unsigned long data)
+{
+	struct control *control;
+
+	control = (struct control *)data;
+	CONTROL_FUNCTION("%s: control_timeout()\n",
+			 control_ifcfg_name(control));
+	control->timer_state = TIMER_EXPIRED;
+	control->rsp_expected = 0;
+	viport_kick(control->parent);
+}
+
+static void control_timer(struct control *control, int timeout)
+{
+	CONTROL_FUNCTION("%s: control_timer()\n",
+			 control_ifcfg_name(control));
+	if (control->timer_state == TIMER_ACTIVE)
+		mod_timer(&control->timer, jiffies + timeout);
+	else {
+		init_timer(&control->timer);
+		control->timer.expires = jiffies + timeout;
+		control->timer.data = (unsigned long)control;
+		control->timer.function = control_timeout;
+		control->timer_state = TIMER_ACTIVE;
+		add_timer(&control->timer);
+	}
+}
+
+static void control_timer_stop(struct control *control)
+{
+	CONTROL_FUNCTION("%s: control_timer_stop()\n",
+			 control_ifcfg_name(control));
+	if (control->timer_state == TIMER_ACTIVE)
+		del_timer_sync(&control->timer);
+
+	control->timer_state = TIMER_IDLE;
+}
+
+static int control_send(struct control *control, struct send_io *send_io)
+{
+	CONTROL_FUNCTION("%s: control_send()\n",
+			 control_ifcfg_name(control));
+	if (control->req_outstanding) {
+		CONTROL_ERROR("%s: IB send never completed\n",
+			      control_ifcfg_name(control));
+		goto out;
+	}
+
+	control->req_outstanding = 1;
+	control_timer(control, control->config->rsp_timeout);
+	control_note_reqtime_stats(control);
+	if (vnic_ib_post_send(&control->ib_conn, &control->send_io.io)) {
+		CONTROL_ERROR("failed to post send\n");
+		control->req_outstanding = 0;
+		goto out;
+	}
+
+	return 0;
+out:
+	viport_failure(control->parent);
+	return -1;
+
+}
+
+static void control_send_complete(struct io *io)
+{
+	struct control *control = &io->viport->control;
+
+	CONTROL_FUNCTION("%s: control_send_complete()\n",
+			 control_ifcfg_name(control));
+	control->req_outstanding = 0;
+}
+
+void control_process_async(struct control *control)
+{
+	struct recv_io			*recv_io;
+	struct vnic_control_packet	*pkt;
+	unsigned long			flags;
+
+	CONTROL_FUNCTION("%s: control_process_async()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+
+	spin_lock_irqsave(&control->io_lock, flags);
+	recv_io = control->info;
+	if (recv_io) {
+		CONTROL_INFO("%s: processing info packet\n",
+			     control_ifcfg_name(control));
+		control->info = NULL;
+		spin_unlock_irqrestore(&control->io_lock, flags);
+		pkt = control_packet(recv_io);
+		if (pkt->hdr.pkt_cmd == CMD_REPORT_STATUS) {
+			u32		status;
+			status =
+			  be32_to_cpu(pkt->cmd.report_status.status_number);
+			switch (status) {
+			case VNIC_STATUS_LINK_UP:
+				CONTROL_INFO("%s: link up\n",
+					     control_ifcfg_name(control));
+				vnic_link_up(control->parent->vnic,
+					     control->parent->parent);
+				break;
+			case VNIC_STATUS_LINK_DOWN:
+				CONTROL_INFO("%s: link down\n",
+					     control_ifcfg_name(control));
+				vnic_link_down(control->parent->vnic,
+					       control->parent->parent);
+				break;
+			default:
+				CONTROL_ERROR("%s: asynchronous status"
+					      " received from EIOC\n",
+					      control_ifcfg_name(control));
+				control_log_control_packet(pkt);
+				break;
+			}
+		}
+		if ((pkt->hdr.pkt_cmd != CMD_REPORT_STATUS) ||
+		     pkt->cmd.report_status.is_fatal) {
+			viport_failure(control->parent);
+		}
+		control_recv(control, recv_io);
+		spin_lock_irqsave(&control->io_lock, flags);
+	}
+
+	while (!list_empty(&control->failure_list)) {
+		CONTROL_INFO("%s: processing error packet\n",
+			     control_ifcfg_name(control));
+		recv_io = (struct recv_io *)
+		    list_entry(control->failure_list.next, struct io,
+			       list_ptrs);
+		list_del(&recv_io->io.list_ptrs);
+		spin_unlock_irqrestore(&control->io_lock, flags);
+		pkt = control_packet(recv_io);
+		CONTROL_ERROR("%s: asynchronous error received from EIOC\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(pkt);
+		if ((pkt->hdr.pkt_type != TYPE_ERR)
+		    || (pkt->hdr.pkt_cmd != CMD_REPORT_STATUS)
+		    || pkt->cmd.report_status.is_fatal) {
+			viport_failure(control->parent);
+		}
+		control_recv(control, recv_io);
+		spin_lock_irqsave(&control->io_lock, flags);
+	}
+	spin_unlock_irqrestore(&control->io_lock, flags);
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+
+	CONTROL_INFO("%s: done control_process_async\n",
+		     control_ifcfg_name(control));
+}
+
+static struct send_io *control_init_hdr(struct control *control, u8 cmd)
+{
+	struct control_config		*config;
+	struct vnic_control_packet	*pkt;
+	struct vnic_control_header	*hdr;
+
+	CONTROL_FUNCTION("control_init_hdr()\n");
+	config = control->config;
+
+	pkt = control_packet(&control->send_io);
+	hdr = &pkt->hdr;
+
+	hdr->pkt_type = TYPE_REQ;
+	hdr->pkt_cmd = cmd;
+	control->seq_num++;
+	hdr->pkt_seq_num = control->seq_num;
+	control->req_retry_counter = 0;
+	hdr->pkt_retry_count = control->req_retry_counter;
+
+	return &control->send_io;
+}
+
+static struct recv_io *control_get_rsp(struct control *control)
+{
+	struct recv_io	*recv_io;
+	unsigned long	flags;
+
+	CONTROL_FUNCTION("%s: control_get_rsp()\n",
+			 control_ifcfg_name(control));
+	spin_lock_irqsave(&control->io_lock, flags);
+	recv_io = control->response;
+	if (recv_io) {
+		control_timer_stop(control);
+		control->response = NULL;
+		spin_unlock_irqrestore(&control->io_lock, flags);
+		return recv_io;
+	}
+	spin_unlock_irqrestore(&control->io_lock, flags);
+	if (control->timer_state == TIMER_EXPIRED) {
+		struct vnic_control_packet *pkt =
+		    control_packet(&control->send_io);
+		struct vnic_control_header *hdr = &pkt->hdr;
+
+		control->timer_state = TIMER_IDLE;
+		CONTROL_ERROR("%s: no response received from EIOC\n",
+			      control_ifcfg_name(control));
+		control_timeout_stats(control);
+		control->req_retry_counter++;
+		if (control->req_retry_counter >=
+		    control->config->req_retry_count) {
+			CONTROL_ERROR("%s: control packet retry exceeded\n",
+				      control_ifcfg_name(control));
+			viport_failure(control->parent);
+		} else {
+			hdr->pkt_retry_count =
+			    control->req_retry_counter;
+			control_send(control, &control->send_io);
+		}
+	}
+
+	return NULL;
+}
+
+int control_init_vnic_req(struct control *control)
+{
+	struct send_io			*send_io;
+	struct control_config		*config = control->config;
+	struct vnic_control_packet	*pkt;
+	struct vnic_cmd_init_vnic_req	*init_vnic_req;
+
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->send_dma, control->send_len,
+				DMA_TO_DEVICE);
+
+	send_io = control_init_hdr(control, CMD_INIT_VNIC);
+	if (!send_io)
+		goto failure;
+
+	pkt = control_packet(send_io);
+	init_vnic_req = &pkt->cmd.init_vnic_req;
+	init_vnic_req->vnic_major_version =
+				 __constant_cpu_to_be16(VNIC_MAJORVERSION);
+	init_vnic_req->vnic_minor_version =
+				 __constant_cpu_to_be16(VNIC_MINORVERSION);
+	init_vnic_req->vnic_instance = config->vnic_instance;
+	init_vnic_req->num_data_paths = 1;
+	init_vnic_req->num_address_entries =
+				cpu_to_be16(config->max_address_entries);
+
+	CONTROL_PACKET(pkt);
+
+	control->rsp_expected = pkt->hdr.pkt_cmd;
+
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+
+	return control_send(control, send_io);
+failure:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return -1;
+}
+
+static int control_chk_vnic_rsp_values(struct control *control,
+				       u16 *num_addrs,
+				       u8 num_data_paths,
+				       u8 num_lan_switches)
+{
+
+	struct control_config		*config = control->config;
+
+	if ((control->maj_ver > VNIC_MAJORVERSION)
+	    || ((control->maj_ver == VNIC_MAJORVERSION)
+		&& (control->min_ver > VNIC_MINORVERSION))) {
+		CONTROL_ERROR("%s: unsupported version\n",
+			      control_ifcfg_name(control));
+		goto failure;
+	}
+	if (num_data_paths != 1) {
+		CONTROL_ERROR("%s: EIOC returned too many datapaths\n",
+			      control_ifcfg_name(control));
+		goto failure;
+	}
+	if (*num_addrs > config->max_address_entries) {
+		CONTROL_ERROR("%s: EIOC returned more address"
+			      " entries than requested\n",
+			      control_ifcfg_name(control));
+		goto failure;
+	}
+	if (*num_addrs < config->min_address_entries) {
+		CONTROL_ERROR("%s: not enough address entries\n",
+			      control_ifcfg_name(control));
+		goto failure;
+	}
+	if (num_lan_switches < 1) {
+		CONTROL_ERROR("%s: EIOC returned no lan switches\n",
+			      control_ifcfg_name(control));
+		goto failure;
+	}
+	if (num_lan_switches > 1) {
+		CONTROL_ERROR("%s: EIOC returned multiple lan switches\n",
+			      control_ifcfg_name(control));
+		goto failure;
+	}
+
+	return 0;
+failure:
+	return -1;
+}
+
+int control_init_vnic_rsp(struct control *control, u32 *features,
+			  u8 *mac_address, u16 *num_addrs, u16 *vlan)
+{
+	u8 num_data_paths;
+	u8 num_lan_switches;
+	struct recv_io			*recv_io;
+	struct vnic_control_packet	*pkt;
+	struct vnic_cmd_init_vnic_rsp	*init_vnic_rsp;
+
+
+	CONTROL_FUNCTION("%s: control_init_vnic_rsp()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+
+	recv_io = control_get_rsp(control);
+	if (!recv_io)
+		goto out;
+
+	pkt = control_packet(recv_io);
+	if (pkt->hdr.pkt_cmd != CMD_INIT_VNIC) {
+		CONTROL_ERROR("%s: sent control request:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(control_last_req(control));
+		CONTROL_ERROR("%s: received control response:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(pkt);
+		goto failure;
+	}
+
+	init_vnic_rsp = &pkt->cmd.init_vnic_rsp;
+	control->maj_ver = be16_to_cpu(init_vnic_rsp->vnic_major_version);
+	control->min_ver = be16_to_cpu(init_vnic_rsp->vnic_minor_version);
+	num_data_paths = init_vnic_rsp->num_data_paths;
+	num_lan_switches = init_vnic_rsp->num_lan_switches;
+	*features = be32_to_cpu(init_vnic_rsp->features_supported);
+	*num_addrs = be16_to_cpu(init_vnic_rsp->num_address_entries);
+
+	if (control_chk_vnic_rsp_values(control, num_addrs,
+					num_data_paths,
+					num_lan_switches))
+		goto failure;
+
+	control->lan_switch.lan_switch_num =
+			init_vnic_rsp->lan_switch[0].lan_switch_num;
+	control->lan_switch.num_enet_ports =
+			init_vnic_rsp->lan_switch[0].num_enet_ports;
+	control->lan_switch.default_vlan =
+			init_vnic_rsp->lan_switch[0].default_vlan;
+	*vlan = be16_to_cpu(control->lan_switch.default_vlan);
+	memcpy(control->lan_switch.hw_mac_address,
+	       init_vnic_rsp->lan_switch[0].hw_mac_address, ETH_ALEN);
+	memcpy(mac_address, init_vnic_rsp->lan_switch[0].hw_mac_address,
+	       ETH_ALEN);
+
+	control_recv(control, recv_io);
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return 0;
+failure:
+	viport_failure(control->parent);
+out:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return -1;
+}
+
+static void copy_recv_pool_config(struct vnic_recv_pool_config *src,
+				  struct vnic_recv_pool_config *dst)
+{
+	dst->size_recv_pool_entry  = src->size_recv_pool_entry;
+	dst->num_recv_pool_entries = src->num_recv_pool_entries;
+	dst->timeout_before_kick   = src->timeout_before_kick;
+	dst->num_recv_pool_entries_before_kick =
+				src->num_recv_pool_entries_before_kick;
+	dst->num_recv_pool_bytes_before_kick =
+				src->num_recv_pool_bytes_before_kick;
+	dst->free_recv_pool_entries_per_update =
+				src->free_recv_pool_entries_per_update;
+}
+
+static int check_recv_pool_config_value(__be32 *src, __be32 *dst,
+					__be32 *max, __be32 *min,
+					char *name)
+{
+	u32 value;
+
+	value = be32_to_cpu(*src);
+	if (value > be32_to_cpu(*max)) {
+		CONTROL_ERROR("value %s too large\n", name);
+		return -1;
+	} else if (value < be32_to_cpu(*min)) {
+		CONTROL_ERROR("value %s too small\n", name);
+		return -1;
+	}
+
+	*dst = cpu_to_be32(value);
+	return 0;
+}
+
+static int check_recv_pool_config(struct vnic_recv_pool_config *src,
+				  struct vnic_recv_pool_config *dst,
+				  struct vnic_recv_pool_config *max,
+				  struct vnic_recv_pool_config *min)
+{
+	if (check_recv_pool_config_value(&src->size_recv_pool_entry,
+				     &dst->size_recv_pool_entry,
+				     &max->size_recv_pool_entry,
+				     &min->size_recv_pool_entry,
+				     "size_recv_pool_entry")
+	    || check_recv_pool_config_value(&src->num_recv_pool_entries,
+				     &dst->num_recv_pool_entries,
+				     &max->num_recv_pool_entries,
+				     &min->num_recv_pool_entries,
+				     "num_recv_pool_entries")
+	    || check_recv_pool_config_value(&src->timeout_before_kick,
+				     &dst->timeout_before_kick,
+				     &max->timeout_before_kick,
+				     &min->timeout_before_kick,
+				     "timeout_before_kick")
+	    || check_recv_pool_config_value(&src->
+				     num_recv_pool_entries_before_kick,
+				     &dst->
+				     num_recv_pool_entries_before_kick,
+				     &max->
+				     num_recv_pool_entries_before_kick,
+				     &min->
+				     num_recv_pool_entries_before_kick,
+				     "num_recv_pool_entries_before_kick")
+	    || check_recv_pool_config_value(&src->
+				     num_recv_pool_bytes_before_kick,
+				     &dst->
+				     num_recv_pool_bytes_before_kick,
+				     &max->
+				     num_recv_pool_bytes_before_kick,
+				     &min->
+				     num_recv_pool_bytes_before_kick,
+				     "num_recv_pool_bytes_before_kick")
+	    || check_recv_pool_config_value(&src->
+				     free_recv_pool_entries_per_update,
+				     &dst->
+				     free_recv_pool_entries_per_update,
+				     &max->
+				     free_recv_pool_entries_per_update,
+				     &min->
+				     free_recv_pool_entries_per_update,
+				     "free_recv_pool_entries_per_update"))
+		goto failure;
+
+	if (!is_power_of2(be32_to_cpu(dst->num_recv_pool_entries))) {
+		CONTROL_ERROR("num_recv_pool_entries (%d)"
+			      " must be power of 2\n",
+			      dst->num_recv_pool_entries);
+		goto failure;
+	}
+
+	if (!is_power_of2(be32_to_cpu(dst->
+				      free_recv_pool_entries_per_update))) {
+		CONTROL_ERROR("free_recv_pool_entries_per_update (%d)"
+			      " must be power of 2\n",
+			      dst->free_recv_pool_entries_per_update);
+		goto failure;
+	}
+
+	if (be32_to_cpu(dst->free_recv_pool_entries_per_update) >=
+	    be32_to_cpu(dst->num_recv_pool_entries)) {
+		CONTROL_ERROR("free_recv_pool_entries_per_update (%d) must"
+			      " be less than num_recv_pool_entries (%d)\n",
+			      dst->free_recv_pool_entries_per_update,
+			      dst->num_recv_pool_entries);
+		goto failure;
+	}
+
+	if (be32_to_cpu(dst->num_recv_pool_entries_before_kick) >=
+	    be32_to_cpu(dst->num_recv_pool_entries)) {
+		CONTROL_ERROR("num_recv_pool_entries_before_kick (%d) must"
+			      " be less than num_recv_pool_entries (%d)\n",
+			      dst->num_recv_pool_entries_before_kick,
+			      dst->num_recv_pool_entries);
+		goto failure;
+	}
+
+	return 0;
+failure:
+	return -1;
+}
+
+int control_config_data_path_req(struct control * control, u64 path_id,
+				     struct vnic_recv_pool_config * host,
+				     struct vnic_recv_pool_config * eioc)
+{
+	struct send_io				*send_io;
+	struct vnic_control_packet		*pkt;
+	struct vnic_cmd_config_data_path	*config_data_path;
+
+	CONTROL_FUNCTION("%s: control_config_data_path_req()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->send_dma, control->send_len,
+				DMA_TO_DEVICE);
+
+	send_io = control_init_hdr(control, CMD_CONFIG_DATA_PATH);
+	if (!send_io)
+		goto failure;
+
+	pkt = control_packet(send_io);
+	config_data_path = &pkt->cmd.config_data_path_req;
+	config_data_path->data_path = 0;
+	config_data_path->path_identifier = path_id;
+	copy_recv_pool_config(host,
+			      &config_data_path->host_recv_pool_config);
+	copy_recv_pool_config(eioc,
+			      &config_data_path->eioc_recv_pool_config);
+	CONTROL_PACKET(pkt);
+
+	control->rsp_expected = pkt->hdr.pkt_cmd;
+
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+
+	return control_send(control, send_io);
+failure:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return -1;
+}
+
+int control_config_data_path_rsp(struct control * control,
+				 struct vnic_recv_pool_config * host,
+				 struct vnic_recv_pool_config * eioc,
+				 struct vnic_recv_pool_config * max_host,
+				 struct vnic_recv_pool_config * max_eioc,
+				 struct vnic_recv_pool_config * min_host,
+				 struct vnic_recv_pool_config * min_eioc)
+{
+	struct recv_io				*recv_io;
+	struct vnic_control_packet		*pkt;
+	struct vnic_cmd_config_data_path	*config_data_path;
+
+	CONTROL_FUNCTION("%s: control_config_data_path_rsp()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+
+	recv_io = control_get_rsp(control);
+	if (!recv_io)
+		goto out;
+
+	pkt = control_packet(recv_io);
+	if (pkt->hdr.pkt_cmd != CMD_CONFIG_DATA_PATH) {
+		CONTROL_ERROR("%s: sent control request:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(control_last_req(control));
+		CONTROL_ERROR("%s: received control response:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(pkt);
+		goto failure;
+	}
+
+	config_data_path = &pkt->cmd.config_data_path_rsp;
+	if (config_data_path->data_path != 0) {
+		CONTROL_ERROR("%s: received CMD_CONFIG_DATA_PATH response"
+			      " for wrong data path: %u\n",
+			      control_ifcfg_name(control),
+			      config_data_path->data_path);
+		goto failure;
+	}
+
+	if (check_recv_pool_config(&config_data_path->
+				   host_recv_pool_config,
+				   host, max_host, min_host)
+	    || check_recv_pool_config(&config_data_path->
+				      eioc_recv_pool_config,
+				      eioc, max_eioc, min_eioc)) {
+		goto failure;
+	}
+
+	control_recv(control, recv_io);
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+
+	return 0;
+failure:
+	viport_failure(control->parent);
+out:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return -1;
+}
+
+int control_exchange_pools_req(struct control * control, u64 addr, u32 rkey)
+{
+	struct send_io			*send_io;
+	struct vnic_control_packet	*pkt;
+	struct vnic_cmd_exchange_pools	*exchange_pools;
+
+	CONTROL_FUNCTION("%s: control_exchange_pools_req()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->send_dma, control->send_len,
+				DMA_TO_DEVICE);
+
+	send_io = control_init_hdr(control, CMD_EXCHANGE_POOLS);
+	if (!send_io)
+		goto failure;
+
+	pkt = control_packet(send_io);
+	exchange_pools = &pkt->cmd.exchange_pools_req;
+	exchange_pools->data_path = 0;
+	exchange_pools->pool_rkey = cpu_to_be32(rkey);
+	exchange_pools->pool_addr = cpu_to_be64(addr);
+
+	control->rsp_expected = pkt->hdr.pkt_cmd;
+
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return control_send(control, send_io);
+failure:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return -1;
+}
+
+int control_exchange_pools_rsp(struct control * control, u64 * addr,
+			       u32 * rkey)
+{
+	struct recv_io			*recv_io;
+	struct vnic_control_packet	*pkt;
+	struct vnic_cmd_exchange_pools	*exchange_pools;
+
+	CONTROL_FUNCTION("%s: control_exchange_pools_rsp()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+
+	recv_io = control_get_rsp(control);
+	if (!recv_io)
+		goto out;
+
+	pkt = control_packet(recv_io);
+	if (pkt->hdr.pkt_cmd != CMD_EXCHANGE_POOLS) {
+		CONTROL_ERROR("%s: sent control request:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(control_last_req(control));
+		CONTROL_ERROR("%s: received control response:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(pkt);
+		goto failure;
+	}
+
+	exchange_pools = &pkt->cmd.exchange_pools_rsp;
+	*rkey = be32_to_cpu(exchange_pools->pool_rkey);
+	*addr = be64_to_cpu(exchange_pools->pool_addr);
+
+	if (exchange_pools->data_path != 0) {
+		CONTROL_ERROR("%s: received CMD_EXCHANGE_POOLS response"
+			      " for wrong data path: %u\n",
+			      control_ifcfg_name(control),
+			      exchange_pools->data_path);
+		goto failure;
+	}
+
+	control_recv(control, recv_io);
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return 0;
+failure:
+	viport_failure(control->parent);
+out:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return -1;
+}
+
+int control_config_link_req(struct control * control, u16 flags, u16 mtu)
+{
+	struct send_io			*send_io;
+	struct vnic_cmd_config_link	*config_link_req;
+	struct vnic_control_packet	*pkt;
+
+	CONTROL_FUNCTION("%s: control_config_link_req()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->send_dma, control->send_len,
+				DMA_TO_DEVICE);
+
+	send_io = control_init_hdr(control, CMD_CONFIG_LINK);
+	if (!send_io)
+		goto failure;
+
+	pkt = control_packet(send_io);
+	config_link_req = &pkt->cmd.config_link_req;
+	config_link_req->lan_switch_num =
+				control->lan_switch.lan_switch_num;
+	config_link_req->cmd_flags = VNIC_FLAG_SET_MTU;
+	if (flags & IFF_UP)
+		config_link_req->cmd_flags |= VNIC_FLAG_ENABLE_NIC;
+	else
+		config_link_req->cmd_flags |= VNIC_FLAG_DISABLE_NIC;
+	if (flags & IFF_ALLMULTI)
+		config_link_req->cmd_flags |= VNIC_FLAG_ENABLE_MCAST_ALL;
+	else
+		config_link_req->cmd_flags |= VNIC_FLAG_DISABLE_MCAST_ALL;
+	if (flags & IFF_PROMISC) {
+		config_link_req->cmd_flags |= VNIC_FLAG_ENABLE_PROMISC;
+		/* the EIOU doesn't really do PROMISC mode.
+		 * if PROMISC is set, it only receives unicast packets
+		 * I also have to set MCAST_ALL if I want real
+		 * PROMISC mode.
+		 */
+		config_link_req->cmd_flags &= ~VNIC_FLAG_DISABLE_MCAST_ALL;
+		config_link_req->cmd_flags |= VNIC_FLAG_ENABLE_MCAST_ALL;
+	} else
+		config_link_req->cmd_flags |= VNIC_FLAG_DISABLE_PROMISC;
+
+	config_link_req->mtu_size = cpu_to_be16(mtu);
+
+	control->rsp_expected = pkt->hdr.pkt_cmd;
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return control_send(control, send_io);
+failure:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return -1;
+}
+
+int control_config_link_rsp(struct control * control, u16 * flags,
+				u16 * mtu)
+{
+	struct recv_io			*recv_io;
+	struct vnic_control_packet	*pkt;
+	struct vnic_cmd_config_link	*config_link_rsp;
+
+	CONTROL_FUNCTION("%s: control_config_link_rsp()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+
+	recv_io = control_get_rsp(control);
+	if (!recv_io)
+		goto out;
+
+	pkt = control_packet(recv_io);
+	if (pkt->hdr.pkt_cmd != CMD_CONFIG_LINK) {
+		CONTROL_ERROR("%s: sent control request:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(control_last_req(control));
+		CONTROL_ERROR("%s: received control response:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(pkt);
+		goto failure;
+	}
+	config_link_rsp = &pkt->cmd.config_link_rsp;
+	if (config_link_rsp->cmd_flags & VNIC_FLAG_ENABLE_NIC)
+		*flags |= IFF_UP;
+	if (config_link_rsp->cmd_flags & VNIC_FLAG_ENABLE_MCAST_ALL)
+		*flags |= IFF_ALLMULTI;
+	if (config_link_rsp->cmd_flags & VNIC_FLAG_ENABLE_PROMISC)
+		*flags |= IFF_PROMISC;
+
+	*mtu = be16_to_cpu(config_link_rsp->mtu_size);
+
+	control_recv(control, recv_io);
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return 0;
+failure:
+	viport_failure(control->parent);
+out:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return -1;
+}
+
+/* control_config_addrs_req:
+ * return values:
+ *          -1: failure
+ *           0: incomplete (successful operation, but more address
+ *              table entries to be updated)
+ *           1: complete
+ */
+int control_config_addrs_req(struct control *control,
+			     struct vnic_address_op *addrs, u16 num)
+{
+	u16  i;
+	u8   j;
+	int  ret = 1;
+	struct send_io				*send_io;
+	struct vnic_control_packet		*pkt;
+	struct vnic_cmd_config_addresses	*config_addrs_req;
+
+	CONTROL_FUNCTION("%s: control_config_addrs_req()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->send_dma, control->send_len,
+				DMA_TO_DEVICE);
+
+	send_io = control_init_hdr(control, CMD_CONFIG_ADDRESSES);
+	if (!send_io)
+		goto failure;
+
+	pkt = control_packet(send_io);
+	config_addrs_req = &pkt->cmd.config_addresses_req;
+	config_addrs_req->lan_switch_num =
+				control->lan_switch.lan_switch_num;
+	for (i = 0, j = 0; (i < num) && (j < 16); i++) {
+		if (!addrs[i].operation)
+			continue;
+		config_addrs_req->list_address_ops[j].index = cpu_to_be16(i);
+		config_addrs_req->list_address_ops[j].operation =
+							VNIC_OP_SET_ENTRY;
+		config_addrs_req->list_address_ops[j].valid = addrs[i].valid;
+		memcpy(config_addrs_req->list_address_ops[j].address,
+		       addrs[i].address, ETH_ALEN);
+		config_addrs_req->list_address_ops[j].vlan = addrs[i].vlan;
+		addrs[i].operation = 0;
+		j++;
+	}
+	for (; i < num; i++) {
+		if (addrs[i].operation) {
+			ret = 0;
+			break;
+		}
+	}
+	config_addrs_req->num_address_ops = j;
+
+	control->rsp_expected = pkt->hdr.pkt_cmd;
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+
+	if (control_send(control, send_io))
+		return -1;
+	return ret;
+failure:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return -1;
+}
+
+int control_config_addrs_rsp(struct control * control)
+{
+	struct recv_io *recv_io;
+	struct vnic_control_packet *pkt;
+	struct vnic_cmd_config_addresses *config_addrs_rsp;
+
+	CONTROL_FUNCTION("%s: control_config_addrs_rsp()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+
+	recv_io = control_get_rsp(control);
+	if (!recv_io)
+		goto out;
+
+	pkt = control_packet(recv_io);
+	if (pkt->hdr.pkt_cmd != CMD_CONFIG_ADDRESSES) {
+		CONTROL_ERROR("%s: sent control request:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(control_last_req(control));
+		CONTROL_ERROR("%s: received control response:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(pkt);
+		goto failure;
+	}
+	config_addrs_rsp = &pkt->cmd.config_addresses_rsp;
+
+	control_recv(control, recv_io);
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return 0;
+failure:
+	viport_failure(control->parent);
+out:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return -1;
+}
+
+int control_report_statistics_req(struct control * control)
+{
+	struct send_io				*send_io;
+	struct vnic_control_packet		*pkt;
+	struct vnic_cmd_report_stats_req	*report_statistics_req;
+
+	CONTROL_FUNCTION("%s: control_report_statistics_req()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->send_dma, control->send_len,
+				DMA_TO_DEVICE);
+
+	send_io = control_init_hdr(control, CMD_REPORT_STATISTICS);
+	if (!send_io)
+		goto failure;
+
+	pkt = control_packet(send_io);
+	report_statistics_req = &pkt->cmd.report_statistics_req;
+	report_statistics_req->lan_switch_num =
+	    control->lan_switch.lan_switch_num;
+
+	control->rsp_expected = pkt->hdr.pkt_cmd;
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return control_send(control, send_io);
+failure:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return -1;
+}
+
+int control_report_statistics_rsp(struct control * control,
+				  struct vnic_cmd_report_stats_rsp * stats)
+{
+	struct recv_io				*recv_io;
+	struct vnic_control_packet		*pkt;
+	struct vnic_cmd_report_stats_rsp	*rep_stat_rsp;
+
+	CONTROL_FUNCTION("%s: control_report_statistics_rsp()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+
+	recv_io = control_get_rsp(control);
+	if (!recv_io)
+		goto out;
+
+	pkt = control_packet(recv_io);
+	if (pkt->hdr.pkt_cmd != CMD_REPORT_STATISTICS) {
+		CONTROL_ERROR("%s: sent control request:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(control_last_req(control));
+		CONTROL_ERROR("%s: received control response:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(pkt);
+		goto failure;
+	}
+
+	rep_stat_rsp = &pkt->cmd.report_statistics_rsp;
+
+	stats->if_in_broadcast_pkts   = rep_stat_rsp->if_in_broadcast_pkts;
+	stats->if_in_multicast_pkts   = rep_stat_rsp->if_in_multicast_pkts;
+	stats->if_in_octets	      = rep_stat_rsp->if_in_octets;
+	stats->if_in_ucast_pkts       = rep_stat_rsp->if_in_ucast_pkts;
+	stats->if_in_nucast_pkts      = rep_stat_rsp->if_in_nucast_pkts;
+	stats->if_in_underrun	      = rep_stat_rsp->if_in_underrun;
+	stats->if_in_errors	      = rep_stat_rsp->if_in_errors;
+	stats->if_out_errors	      = rep_stat_rsp->if_out_errors;
+	stats->if_out_octets	      = rep_stat_rsp->if_out_octets;
+	stats->if_out_ucast_pkts      = rep_stat_rsp->if_out_ucast_pkts;
+	stats->if_out_multicast_pkts  = rep_stat_rsp->if_out_multicast_pkts;
+	stats->if_out_broadcast_pkts  = rep_stat_rsp->if_out_broadcast_pkts;
+	stats->if_out_nucast_pkts     = rep_stat_rsp->if_out_nucast_pkts;
+	stats->if_out_ok	      = rep_stat_rsp->if_out_ok;
+	stats->if_in_ok		      = rep_stat_rsp->if_in_ok;
+	stats->if_out_ucast_bytes     = rep_stat_rsp->if_out_ucast_bytes;
+	stats->if_out_multicast_bytes = rep_stat_rsp->if_out_multicast_bytes;
+	stats->if_out_broadcast_bytes = rep_stat_rsp->if_out_broadcast_bytes;
+	stats->if_in_ucast_bytes      = rep_stat_rsp->if_in_ucast_bytes;
+	stats->if_in_multicast_bytes  = rep_stat_rsp->if_in_multicast_bytes;
+	stats->if_in_broadcast_bytes  = rep_stat_rsp->if_in_broadcast_bytes;
+	stats->ethernet_status	      = rep_stat_rsp->ethernet_status;
+
+	control_recv(control, recv_io);
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+
+	return 0;
+failure:
+	viport_failure(control->parent);
+out:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return -1;
+}
+
+int control_reset_req(struct control * control)
+{
+	struct send_io			*send_io;
+	struct vnic_control_packet	*pkt;
+
+	CONTROL_FUNCTION("%s: control_reset_req()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->send_dma, control->send_len,
+				DMA_TO_DEVICE);
+
+	send_io = control_init_hdr(control, CMD_RESET);
+	if (!send_io)
+		goto failure;
+
+	pkt = control_packet(send_io);
+
+	control->rsp_expected = pkt->hdr.pkt_cmd;
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return control_send(control, send_io);
+failure:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return -1;
+}
+
+int control_reset_rsp(struct control * control)
+{
+	struct recv_io			*recv_io;
+	struct vnic_control_packet	*pkt;
+
+	CONTROL_FUNCTION("%s: control_reset_rsp()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+
+	recv_io = control_get_rsp(control);
+	if (!recv_io)
+		goto out;
+
+	pkt = control_packet(recv_io);
+	if (pkt->hdr.pkt_cmd != CMD_RESET) {
+		CONTROL_ERROR("%s: sent control request:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(control_last_req(control));
+		CONTROL_ERROR("%s: received control response:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(pkt);
+		goto failure;
+	}
+
+	control_recv(control, recv_io);
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return 0;
+failure:
+	viport_failure(control->parent);
+out:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return -1;
+}
+
+int control_heartbeat_req(struct control * control, u32 hb_interval)
+{
+	struct send_io			*send_io;
+	struct vnic_control_packet	*pkt;
+	struct vnic_cmd_heartbeat	*heartbeat_req;
+
+	CONTROL_FUNCTION("%s: control_heartbeat_req()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->send_dma, control->send_len,
+				DMA_TO_DEVICE);
+
+	send_io = control_init_hdr(control, CMD_HEARTBEAT);
+	if (!send_io)
+		goto failure;
+
+	pkt = control_packet(send_io);
+	heartbeat_req = &pkt->cmd.heartbeat_req;
+	heartbeat_req->hb_interval = cpu_to_be32(hb_interval);
+
+	control->rsp_expected = pkt->hdr.pkt_cmd;
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return control_send(control, send_io);
+failure:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->send_dma, control->send_len,
+				   DMA_TO_DEVICE);
+	return -1;
+}
+
+int control_heartbeat_rsp(struct control * control)
+{
+	struct recv_io			*recv_io;
+	struct vnic_control_packet	*pkt;
+	struct vnic_cmd_heartbeat	*heartbeat_rsp;
+
+	CONTROL_FUNCTION("%s: control_heartbeat_rsp()\n",
+			 control_ifcfg_name(control));
+	dma_sync_single_for_cpu(control->parent->config->ibdev->dma_device,
+				control->recv_dma, control->recv_len,
+				DMA_FROM_DEVICE);
+
+	recv_io = control_get_rsp(control);
+	if (!recv_io)
+		goto out;
+
+	pkt = control_packet(recv_io);
+	if (pkt->hdr.pkt_cmd != CMD_HEARTBEAT) {
+		CONTROL_ERROR("%s: sent control request:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(control_last_req(control));
+		CONTROL_ERROR("%s: received control response:\n",
+			      control_ifcfg_name(control));
+		control_log_control_packet(pkt);
+		goto failure;
+	}
+
+	heartbeat_rsp = &pkt->cmd.heartbeat_rsp;
+
+	control_recv(control, recv_io);
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return 0;
+failure:
+	viport_failure(control->parent);
+out:
+	dma_sync_single_for_device(control->parent->config->ibdev->dma_device,
+				   control->recv_dma, control->recv_len,
+				   DMA_FROM_DEVICE);
+	return -1;
+}
+
+static int control_init_recv_ios(struct control * control,
+				 struct viport * viport,
+				 struct vnic_control_packet * pkt)
+{
+	struct io		*io;
+	struct ib_device	*ibdev = viport->config->ibdev;
+	struct control_config	*config = control->config;
+	dma_addr_t		recv_dma;
+	unsigned int		i;
+
+
+	control->recv_len = sizeof *pkt * config->num_recvs;
+	control->recv_dma = dma_map_single(ibdev->dma_device,
+					   pkt, control->recv_len,
+					   DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(control->recv_dma)) {
+		CONTROL_ERROR("control recv dma map error\n");
+		goto failure;
+	}
+
+	recv_dma = control->recv_dma;
+	for (i = 0; i < config->num_recvs; i++) {
+		io = &control->recv_ios[i].io;
+		io->viport = viport;
+		io->routine = control_recv_complete;
+		io->type = RECV;
+
+		control->recv_ios[i].virtual_addr = (u8 *)pkt;
+		control->recv_ios[i].list.addr = recv_dma;
+		control->recv_ios[i].list.length = sizeof *pkt;
+		control->recv_ios[i].list.lkey = control->mr->lkey;
+
+		recv_dma = recv_dma + sizeof *pkt;
+		pkt++;
+
+		io->rwr.wr_id = (u64)io;
+		io->rwr.sg_list = &control->recv_ios[i].list;
+		io->rwr.num_sge = 1;
+		if (vnic_ib_post_recv(&control->ib_conn, io))
+			goto unmap_recv;
+	}
+
+	return 0;
+unmap_recv:
+	dma_unmap_single(control->parent->config->ibdev->dma_device,
+			 control->recv_dma, control->send_len,
+			 DMA_FROM_DEVICE);
+failure:
+	return -1;
+}
+
+static int control_init_send_ios(struct control *control,
+				 struct viport *viport,
+				 struct vnic_control_packet * pkt)
+{
+	struct io		* io;
+	struct ib_device	*ibdev = viport->config->ibdev;
+
+	control->send_io.virtual_addr = (u8*)pkt;
+	control->send_len = sizeof *pkt;
+	control->send_dma = dma_map_single(ibdev->dma_device, pkt,
+					   control->send_len,
+					   DMA_TO_DEVICE);
+	if (dma_mapping_error(control->send_dma)) {
+		CONTROL_ERROR("control send dma map error\n");
+		goto failure;
+	}
+
+	io = &control->send_io.io;
+	io->viport = viport;
+	io->routine = control_send_complete;
+
+	control->send_io.list.addr = control->send_dma;
+	control->send_io.list.length = sizeof *pkt;
+	control->send_io.list.lkey = control->mr->lkey;
+
+	io->swr.wr_id = (u64)io;
+	io->swr.sg_list = &control->send_io.list;
+	io->swr.num_sge = 1;
+	io->swr.opcode = IB_WR_SEND;
+	io->swr.send_flags = IB_SEND_SIGNALED;
+	io->type = SEND;
+
+	return 0;
+failure:
+	return -1;
+}
+
+int control_init(struct control * control, struct viport * viport,
+		 struct control_config * config, struct ib_pd * pd)
+{
+	struct vnic_control_packet	*pkt;
+	unsigned int sz;
+
+	CONTROL_FUNCTION("%s: control_init()\n",
+			 control_ifcfg_name(control));
+	control->parent = viport;
+	control->config = config;
+	control->ib_conn.viport = viport;
+	control->ib_conn.ib_config = &config->ib_config;
+	control->ib_conn.state = IB_CONN_UNINITTED;
+	control->req_outstanding = 0;
+	control->seq_num = 0;
+	control->response = NULL;
+	control->info = NULL;
+	INIT_LIST_HEAD(&control->failure_list);
+	spin_lock_init(&control->io_lock);
+
+	if (vnic_ib_conn_init(&control->ib_conn, viport, pd,
+			      &config->ib_config)) {
+		CONTROL_ERROR("Control IB connection"
+			      " initialization failed\n");
+		goto failure;
+	}
+
+	control->mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(control->mr)) {
+		CONTROL_ERROR("%s: failed to register memory"
+			      " for control connection\n",
+			      control_ifcfg_name(control));
+		goto destroy_conn;
+	}
+
+	control->ib_conn.cm_id = ib_create_cm_id(viport->config->ibdev,
+						 vnic_ib_cm_handler,
+						 &control->ib_conn);
+	if (IS_ERR(control->ib_conn.cm_id)) {
+		CONTROL_ERROR("creating control CM ID failed\n");
+		goto destroy_conn;
+	}
+
+	sz = sizeof(struct recv_io) * config->num_recvs;
+	control->recv_ios = vmalloc(sz);
+	memset(control->recv_ios, 0, sz);
+
+	if (!control->recv_ios) {
+		CONTROL_ERROR("%s: failed allocating space for recv ios\n",
+			      control_ifcfg_name(control));
+		goto destroy_conn;
+	}
+
+	/*One send buffer and num_recvs recv buffers */
+	control->local_storage = kzalloc(sizeof *pkt *
+					 (config->num_recvs + 1),
+					 GFP_KERNEL);
+
+	if (!control->local_storage) {
+		CONTROL_ERROR("%s: failed allocating space"
+			      " for local storage\n",
+			      control_ifcfg_name(control));
+		goto destroy_conn;
+	}
+
+	pkt = control->local_storage;
+	if (control_init_send_ios(control, viport, pkt))
+		goto free_storage;
+
+	pkt++;
+	if (control_init_recv_ios(control, viport, pkt))
+		goto unmap_send;
+
+	return 0;
+
+unmap_send:
+	dma_unmap_single(control->parent->config->ibdev->dma_device,
+			 control->send_dma, control->send_len,
+			 DMA_TO_DEVICE);
+free_storage:
+	vfree(control->recv_ios);
+	kfree(control->local_storage);
+destroy_conn:
+	ib_destroy_qp(control->ib_conn.qp);
+	ib_destroy_cq(control->ib_conn.cq);
+failure:
+	return -1;
+}
+
+void control_cleanup(struct control *control)
+{
+	CONTROL_FUNCTION("%s: control_disconnect()\n",
+			 control_ifcfg_name(control));
+
+	if (ib_send_cm_dreq(control->ib_conn.cm_id, NULL, 0))
+		printk(KERN_DEBUG "control CM DREQ sending failed\n");
+
+	control_timer_stop(control);
+	ib_destroy_cm_id(control->ib_conn.cm_id);
+	ib_destroy_qp(control->ib_conn.qp);
+	ib_destroy_cq(control->ib_conn.cq);
+	ib_dereg_mr(control->mr);
+	dma_unmap_single(control->parent->config->ibdev->dma_device,
+			 control->send_dma, control->send_len,
+			 DMA_TO_DEVICE);
+	dma_unmap_single(control->parent->config->ibdev->dma_device,
+			 control->recv_dma, control->send_len,
+			 DMA_FROM_DEVICE);
+	vfree(control->recv_ios);
+	kfree(control->local_storage);
+
+}
+
+static void control_log_report_status_pkt(struct vnic_control_packet *pkt)
+{
+	printk(KERN_INFO
+	       "               pkt_cmd = CMD_REPORT_STATUS\n");
+	printk(KERN_INFO
+	       "               pkt_seq_num = %u,"
+	       " pkt_retry_count = %u\n",
+	       pkt->hdr.pkt_seq_num,
+	       pkt->hdr.pkt_retry_count);
+	printk(KERN_INFO
+	       "               lan_switch_num = %u, is_fatal = %u\n",
+	       pkt->cmd.report_status.lan_switch_num,
+	       pkt->cmd.report_status.is_fatal);
+	printk(KERN_INFO
+	       "               status_number = %u, status_info = %u\n",
+	       be32_to_cpu(pkt->cmd.report_status.status_number),
+	       be32_to_cpu(pkt->cmd.report_status.status_info));
+	pkt->cmd.report_status.file_name[31] = '\0';
+	pkt->cmd.report_status.routine[31] = '\0';
+	printk(KERN_INFO "               filename = %s, routine = %s\n",
+	       pkt->cmd.report_status.file_name,
+	       pkt->cmd.report_status.routine);
+	printk(KERN_INFO
+	       "               line_num = %u, error_parameter = %u\n",
+	       be32_to_cpu(pkt->cmd.report_status.line_num),
+	       be32_to_cpu(pkt->cmd.report_status.error_parameter));
+	pkt->cmd.report_status.desc_text[127] = '\0';
+	printk(KERN_INFO "               desc_text = %s\n",
+	       pkt->cmd.report_status.desc_text);
+}
+
+static void control_log_report_stats_pkt(struct vnic_control_packet *pkt)
+{
+	printk(KERN_INFO
+	       "               pkt_cmd = CMD_REPORT_STATISTICS\n");
+	printk(KERN_INFO
+	       "               pkt_seq_num = %u,"
+	       " pkt_retry_count = %u\n",
+	       pkt->hdr.pkt_seq_num,
+	       pkt->hdr.pkt_retry_count);
+	printk(KERN_INFO "               lan_switch_num = %u\n",
+	       pkt->cmd.report_statistics_req.lan_switch_num);
+	if (pkt->hdr.pkt_type == TYPE_REQ)
+		return;
+	printk(KERN_INFO "               if_in_broadcast_pkts = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_broadcast_pkts));
+	printk(" if_in_multicast_pkts = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_multicast_pkts));
+	printk(KERN_INFO "               if_in_octets = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_octets));
+	printk(" if_in_ucast_pkts = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_ucast_pkts));
+	printk(KERN_INFO "               if_in_nucast_pkts = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_nucast_pkts));
+	printk(" if_in_underrun = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_underrun));
+	printk(KERN_INFO "               if_in_errors = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_errors));
+	printk(" if_out_errors = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_out_errors));
+	printk(KERN_INFO "               if_out_octets = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_out_octets));
+	printk(" if_out_ucast_pkts = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_out_ucast_pkts));
+	printk(KERN_INFO "               if_out_multicast_pkts = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_out_multicast_pkts));
+	printk(" if_out_broadcast_pkts = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_out_broadcast_pkts));
+	printk(KERN_INFO "               if_out_nucast_pkts = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_out_nucast_pkts));
+	printk(" if_out_ok = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.if_out_ok));
+	printk(KERN_INFO "               if_in_ok = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.if_in_ok));
+	printk(" if_out_ucast_bytes = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_out_ucast_bytes));
+	printk(KERN_INFO "               if_out_multicast_bytes = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+		      if_out_multicast_bytes));
+	printk(" if_out_broadcast_bytes = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_out_broadcast_bytes));
+	printk(KERN_INFO "               if_in_ucast_bytes = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_ucast_bytes));
+	printk(" if_in_multicast_bytes = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_multicast_bytes));
+	printk(KERN_INFO "               if_in_broadcast_bytes = %llu",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   if_in_broadcast_bytes));
+	printk(" ethernet_status = %llu\n",
+	       be64_to_cpu(pkt->cmd.report_statistics_rsp.
+			   ethernet_status));
+}
+
+static void control_log_config_link_pkt(struct vnic_control_packet *pkt)
+{
+	printk(KERN_INFO
+	       "               pkt_cmd = CMD_CONFIG_LINK\n");
+	printk(KERN_INFO
+	       "               pkt_seq_num = %u,"
+	       " pkt_retry_count = %u\n",
+	       pkt->hdr.pkt_seq_num,
+	       pkt->hdr.pkt_retry_count);
+	printk(KERN_INFO "               cmd_flags = %x\n",
+	       pkt->cmd.config_link_req.cmd_flags);
+	if (pkt->cmd.config_link_req.cmd_flags & VNIC_FLAG_ENABLE_NIC)
+		printk(KERN_INFO
+		       "                      VNIC_FLAG_ENABLE_NIC\n");
+	if (pkt->cmd.config_link_req.cmd_flags & VNIC_FLAG_DISABLE_NIC)
+		printk(KERN_INFO
+		       "                      VNIC_FLAG_DISABLE_NIC\n");
+	if (pkt->cmd.config_link_req.
+	    cmd_flags & VNIC_FLAG_ENABLE_MCAST_ALL)
+		printk(KERN_INFO
+		       "                     VNIC_FLAG_ENABLE_"
+		       "MCAST_ALL\n");
+	if (pkt->cmd.config_link_req.
+	    cmd_flags & VNIC_FLAG_DISABLE_MCAST_ALL)
+		printk(KERN_INFO
+		       "                       VNIC_FLAG_DISABLE_"
+		       "MCAST_ALL\n");
+	if (pkt->cmd.config_link_req.
+	    cmd_flags & VNIC_FLAG_ENABLE_PROMISC)
+		printk(KERN_INFO
+		       "                       VNIC_FLAG_ENABLE_"
+		       "PROMISC\n");
+	if (pkt->cmd.config_link_req.
+	    cmd_flags & VNIC_FLAG_DISABLE_PROMISC)
+		printk(KERN_INFO
+		       "                       VNIC_FLAG_DISABLE_"
+		       "PROMISC\n");
+	if (pkt->cmd.config_link_req.cmd_flags & VNIC_FLAG_SET_MTU)
+		printk(KERN_INFO
+		       "                       VNIC_FLAG_SET_MTU\n");
+	printk(KERN_INFO
+	       "               lan_switch_num = %x, mtu_size = %d\n",
+	       pkt->cmd.config_link_req.lan_switch_num,
+	       be16_to_cpu(pkt->cmd.config_link_req.mtu_size));
+	if (pkt->hdr.pkt_type == TYPE_RSP) {
+		printk(KERN_INFO
+		       "               default_vlan = %u,"
+		       " hw_mac_address ="
+		       " %02x:%02x:%02x:%02x:%02x:%02x\n",
+		       be16_to_cpu(pkt->cmd.config_link_req.
+				   default_vlan),
+		       pkt->cmd.config_link_req.hw_mac_address[0],
+		       pkt->cmd.config_link_req.hw_mac_address[1],
+		       pkt->cmd.config_link_req.hw_mac_address[2],
+		       pkt->cmd.config_link_req.hw_mac_address[3],
+		       pkt->cmd.config_link_req.hw_mac_address[4],
+		       pkt->cmd.config_link_req.hw_mac_address[5]);
+	}
+}
+
+static void control_log_config_addrs_pkt(struct vnic_control_packet *pkt)
+{
+	int i;
+
+	printk(KERN_INFO
+	       "               pkt_cmd = CMD_CONFIG_ADDRESSES\n");
+	printk(KERN_INFO
+	       "               pkt_seq_num = %u,"
+	       " pkt_retry_count = %u\n",
+	       pkt->hdr.pkt_seq_num,
+	       pkt->hdr.pkt_retry_count);
+	printk(KERN_INFO
+	       "               num_address_ops = %x,"
+	       " lan_switch_num = %d\n",
+	       pkt->cmd.config_addresses_req.num_address_ops,
+	       pkt->cmd.config_addresses_req.lan_switch_num);
+	for (i = 0; (i < pkt->cmd.config_addresses_req.num_address_ops)
+	     && (i < 16); i++) {
+		printk(KERN_INFO
+		       "               list_address_ops[%u].index"
+		       " = %u\n",
+		       i,
+		       be16_to_cpu(pkt->cmd.config_addresses_req.
+			      list_address_ops[i].index));
+		switch (pkt->cmd.config_addresses_req.
+		        list_address_ops[i].operation) {
+		case VNIC_OP_GET_ENTRY:
+			printk(KERN_INFO
+			       "               list_address_ops[%u]."
+			       "operation = VNIC_OP_GET_ENTRY\n",
+			       i);
+			break;
+		case VNIC_OP_SET_ENTRY:
+			printk(KERN_INFO
+			       "               list_address_ops[%u]."
+			       "operation = VNIC_OP_SET_ENTRY\n",
+			       i);
+			break;
+		default:
+			printk(KERN_INFO
+			       "               list_address_ops[%u]."
+			       "operation = UNKNOWN(%d)\n",
+			       i,
+			       pkt->cmd.config_addresses_req.
+			       list_address_ops[i].operation);
+			break;
+		}
+		printk(KERN_INFO
+		       "               list_address_ops[%u].valid"
+		       " = %u\n",
+		       i,
+		       pkt->cmd.config_addresses_req.
+		       list_address_ops[i].valid);
+		printk(KERN_INFO
+		       "               list_address_ops[%u].address"
+		       " = %02x:%02x:%02x:%02x:%02x:%02x\n",
+		       i,
+		       pkt->cmd.config_addresses_req.
+		       list_address_ops[i].address[0],
+		       pkt->cmd.config_addresses_req.
+		       list_address_ops[i].address[1],
+		       pkt->cmd.config_addresses_req.
+		       list_address_ops[i].address[2],
+		       pkt->cmd.config_addresses_req.
+		       list_address_ops[i].address[3],
+		       pkt->cmd.config_addresses_req.
+		       list_address_ops[i].address[4],
+		       pkt->cmd.config_addresses_req.
+		       list_address_ops[i].address[5]);
+		printk(KERN_INFO
+		       "               list_address_ops[%u].vlan"
+		       " = %u\n",
+		       i,
+		       be16_to_cpu(pkt->cmd.config_addresses_req.
+			      list_address_ops[i].vlan));
+	}
+
+}
+
+static void control_log_exch_pools_pkt(struct vnic_control_packet *pkt)
+{
+	printk(KERN_INFO
+	       "               pkt_cmd = CMD_EXCHANGE_POOLS\n");
+	printk(KERN_INFO
+	       "               pkt_seq_num = %u,"
+	       " pkt_retry_count = %u\n",
+	       pkt->hdr.pkt_seq_num,
+	       pkt->hdr.pkt_retry_count);
+	printk(KERN_INFO "               datapath = %u\n",
+	       pkt->cmd.exchange_pools_req.data_path);
+	printk(KERN_INFO "               pool_rkey = %08x"
+	       " pool_addr = %llx\n",
+	       be32_to_cpu(pkt->cmd.exchange_pools_req.pool_rkey),
+	       be64_to_cpu(pkt->cmd.exchange_pools_req.pool_addr));
+}
+
+static void control_log_data_path_pkt(struct vnic_control_packet *pkt)
+{
+	printk(KERN_INFO
+	       "               pkt_cmd = CMD_CONFIG_DATA_PATH\n");
+	printk(KERN_INFO
+	       "               pkt_seq_num = %u,"
+	       " pkt_retry_count = %u\n",
+	       pkt->hdr.pkt_seq_num,
+	       pkt->hdr.pkt_retry_count);
+	printk(KERN_INFO "               path_identifier = %llx,"
+	       " data_path = %u\n",
+	       pkt->cmd.config_data_path_req.path_identifier,
+	       pkt->cmd.config_data_path_req.data_path);
+	printk(KERN_INFO
+	       "host config    size_recv_pool_entry = %u,"
+	       " num_recv_pool_entries = %u\n",
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      host_recv_pool_config.size_recv_pool_entry),
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      host_recv_pool_config.num_recv_pool_entries));
+	printk(KERN_INFO
+	       "               timeout_before_kick = %u,"
+	       " num_recv_pool_entries_before_kick = %u\n",
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      host_recv_pool_config.timeout_before_kick),
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      host_recv_pool_config.
+		      num_recv_pool_entries_before_kick));
+	printk(KERN_INFO
+	       "               num_recv_pool_bytes_before_kick = %u,"
+	       " free_recv_pool_entries_per_update = %u\n",
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      host_recv_pool_config.
+		      num_recv_pool_bytes_before_kick),
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      host_recv_pool_config.
+		      free_recv_pool_entries_per_update));
+	printk(KERN_INFO
+	       "eioc config    size_recv_pool_entry = %u,"
+	       " num_recv_pool_entries = %u\n",
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      eioc_recv_pool_config.size_recv_pool_entry),
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      eioc_recv_pool_config.num_recv_pool_entries));
+	printk(KERN_INFO
+	       "               timeout_before_kick = %u,"
+	       " num_recv_pool_entries_before_kick = %u\n",
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      eioc_recv_pool_config.timeout_before_kick),
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      eioc_recv_pool_config.
+		      num_recv_pool_entries_before_kick));
+	printk(KERN_INFO
+	       "               num_recv_pool_bytes_before_kick = %u,"
+	       " free_recv_pool_entries_per_update = %u\n",
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      eioc_recv_pool_config.
+		      num_recv_pool_bytes_before_kick),
+	       be32_to_cpu(pkt->cmd.config_data_path_req.
+		      eioc_recv_pool_config.
+		      free_recv_pool_entries_per_update));
+}
+
+static void control_log_init_vnic_pkt(struct vnic_control_packet *pkt)
+{
+	printk(KERN_INFO
+	       "               pkt_cmd = CMD_INIT_VNIC\n");
+	printk(KERN_INFO
+	       "               pkt_seq_num = %u,"
+	       " pkt_retry_count = %u\n",
+	       pkt->hdr.pkt_seq_num,
+	       pkt->hdr.pkt_retry_count);
+	printk(KERN_INFO
+	       "               vnic_major_version = %u,"
+	       " vnic_minor_version = %u\n",
+	       be16_to_cpu(pkt->cmd.init_vnic_req.vnic_major_version),
+	       be16_to_cpu(pkt->cmd.init_vnic_req.vnic_minor_version));
+	if (pkt->hdr.pkt_type == TYPE_REQ) {
+		printk(KERN_INFO
+		       "               vnic_instance = %u,"
+		       " num_data_paths = %u\n",
+		       pkt->cmd.init_vnic_req.vnic_instance,
+		       pkt->cmd.init_vnic_req.num_data_paths);
+		printk(KERN_INFO
+		       "               num_address_entries = %u\n",
+		       be16_to_cpu(pkt->cmd.init_vnic_req.
+			      num_address_entries));
+	} else {
+		printk(KERN_INFO
+		       "               num_lan_switches = %u,"
+		       " num_data_paths = %u\n",
+		       pkt->cmd.init_vnic_rsp.num_lan_switches,
+		       pkt->cmd.init_vnic_rsp.num_data_paths);
+		printk(KERN_INFO
+		       "               num_address_entries = %u,"
+		       " features_supported = %08x\n",
+		       be16_to_cpu(pkt->cmd.init_vnic_rsp.
+			      num_address_entries),
+		       be32_to_cpu(pkt->cmd.init_vnic_rsp.
+			      features_supported));
+		if (pkt->cmd.init_vnic_rsp.num_lan_switches != 0) {
+			printk(KERN_INFO
+			       "lan_switch[0]  lan_switch_num = %u,"
+			       " num_enet_ports = %08x\n",
+			       pkt->cmd.init_vnic_rsp.
+			       lan_switch[0].lan_switch_num,
+			       pkt->cmd.init_vnic_rsp.
+			       lan_switch[0].num_enet_ports);
+			printk(KERN_INFO
+			       "               default_vlan = %u,"
+			       " hw_mac_address ="
+			       " %02x:%02x:%02x:%02x:%02x:%02x\n",
+			       be16_to_cpu(pkt->cmd.init_vnic_rsp.
+				      lan_switch[0].default_vlan),
+			       pkt->cmd.init_vnic_rsp.lan_switch[0].
+			       hw_mac_address[0],
+			       pkt->cmd.init_vnic_rsp.lan_switch[0].
+			       hw_mac_address[1],
+			       pkt->cmd.init_vnic_rsp.lan_switch[0].
+			       hw_mac_address[2],
+			       pkt->cmd.init_vnic_rsp.lan_switch[0].
+			       hw_mac_address[3],
+			       pkt->cmd.init_vnic_rsp.lan_switch[0].
+			       hw_mac_address[4],
+			       pkt->cmd.init_vnic_rsp.lan_switch[0].
+			       hw_mac_address[5]);
+		}
+	}
+}
+
+static void control_log_control_packet(struct vnic_control_packet *pkt)
+{
+	switch (pkt->hdr.pkt_type) {
+	case TYPE_INFO:
+		printk(KERN_INFO "control_packet: pkt_type = TYPE_INFO\n");
+		break;
+	case TYPE_REQ:
+		printk(KERN_INFO "control_packet: pkt_type = TYPE_REQ\n");
+		break;
+	case TYPE_RSP:
+		printk(KERN_INFO "control_packet: pkt_type = TYPE_RSP\n");
+		break;
+	case TYPE_ERR:
+		printk(KERN_INFO "control_packet: pkt_type = TYPE_ERR\n");
+		break;
+	default:
+		printk(KERN_INFO "control_packet: pkt_type = UNKNOWN\n");
+	}
+
+	switch (pkt->hdr.pkt_cmd) {
+	case CMD_INIT_VNIC:
+		control_log_init_vnic_pkt(pkt);
+		break;
+	case CMD_CONFIG_DATA_PATH:
+		control_log_data_path_pkt(pkt);
+		break;
+	case CMD_EXCHANGE_POOLS:
+		control_log_exch_pools_pkt(pkt);
+		break;
+	case CMD_CONFIG_ADDRESSES:
+		control_log_config_addrs_pkt(pkt);
+		break;
+	case CMD_CONFIG_LINK:
+		control_log_config_link_pkt(pkt);
+		break;
+	case CMD_REPORT_STATISTICS:
+		control_log_report_stats_pkt(pkt);
+		break;
+	case CMD_CLEAR_STATISTICS:
+		printk(KERN_INFO
+		       "               pkt_cmd = CMD_CLEAR_STATISTICS\n");
+		printk(KERN_INFO
+		       "               pkt_seq_num = %u,"
+		       " pkt_retry_count = %u\n",
+		       pkt->hdr.pkt_seq_num,
+		       pkt->hdr.pkt_retry_count);
+		break;
+	case CMD_REPORT_STATUS:
+		control_log_report_status_pkt(pkt);
+
+		break;
+	case CMD_RESET:
+		printk(KERN_INFO
+		       "               pkt_cmd = CMD_RESET\n");
+		printk(KERN_INFO
+		       "               pkt_seq_num = %u,"
+		       " pkt_retry_count = %u\n",
+		       pkt->hdr.pkt_seq_num,
+		       pkt->hdr.pkt_retry_count);
+		break;
+	case CMD_HEARTBEAT:
+		printk(KERN_INFO
+		       "               pkt_cmd = CMD_HEARTBEAT\n");
+		printk(KERN_INFO
+		       "               pkt_seq_num = %u,"
+		       " pkt_retry_count = %u\n",
+		       pkt->hdr.pkt_seq_num,
+		       pkt->hdr.pkt_retry_count);
+		printk(KERN_INFO "               hb_interval = %d\n",
+		       be32_to_cpu(pkt->cmd.heartbeat_req.hb_interval));
+		break;
+	default:
+		printk(KERN_INFO
+		       "               pkt_cmd = UNKNOWN (%u)\n",
+		       pkt->hdr.pkt_cmd);
+		printk(KERN_INFO
+		       "               pkt_seq_num = %u,"
+		       " pkt_retry_count = %u\n",
+		       pkt->hdr.pkt_seq_num,
+		       pkt->hdr.pkt_retry_count);
+		break;
+	}
+}
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_control.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_control.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_CONTROL_H_INCLUDED
+#define VNIC_CONTROL_H_INCLUDED
+
+#ifdef CONFIG_INFINIBAND_VNIC_STATS
+#include <linux/timex.h>
+#include <linux/completion.h>
+#endif	/* CONFIG_INFINIBAND_VNIC_STATS */
+
+#include "vnic_ib.h"
+#include "vnic_control_pkt.h"
+
+enum control_timer_state {
+	TIMER_IDLE	= 0,
+	TIMER_ACTIVE	= 1,
+	TIMER_EXPIRED	= 2
+};
+
+struct control {
+	struct viport			*parent;
+	struct control_config		*config;
+	struct ib_mr			*mr;
+	struct vnic_ib_conn		ib_conn;
+	struct vnic_control_packet	*local_storage;
+	int				send_len;
+	int				recv_len;
+	u16				maj_ver;
+	u16				min_ver;
+	struct vnic_lan_switch_attribs	lan_switch;
+	struct send_io			send_io;
+	struct recv_io			*recv_ios;
+	dma_addr_t			send_dma;
+	dma_addr_t			recv_dma;
+	enum control_timer_state	timer_state;
+	struct timer_list		timer;
+	u8				req_retry_counter;
+	u8				req_outstanding;
+	u8				seq_num;
+	u8				rsp_expected;
+	struct recv_io			*response;
+	struct recv_io			*info;
+	struct list_head		failure_list;
+	spinlock_t			io_lock;
+	struct completion		done;
+#ifdef CONFIG_INFINIBAND_VNIC_STATS
+	struct {
+		cycles_t	request_time;	/* intermediate value */
+		cycles_t	response_time;
+		u32		response_num;
+		cycles_t	response_max;
+		cycles_t	response_min;
+		u32		timeout_num;
+	} statistics;
+#endif	/* CONFIG_INFINIBAND_VNIC_STATS */
+};
+
+int control_init(struct control *control, struct viport *viport,
+		 struct control_config *config, struct ib_pd *pd);
+
+void control_cleanup(struct control *control);
+
+void control_process_async(struct control *control);
+
+int control_init_vnic_req(struct control *control);
+int control_init_vnic_rsp(struct control *control, u32 * features,
+			  u8 * mac_address, u16 * num_addrs, u16 * vlan);
+
+int control_config_data_path_req(struct control *control, u64 path_id,
+				 struct vnic_recv_pool_config *host,
+				 struct vnic_recv_pool_config *eioc);
+int control_config_data_path_rsp(struct control *control,
+				 struct vnic_recv_pool_config *host,
+				 struct vnic_recv_pool_config *eioc,
+				 struct vnic_recv_pool_config *max_host,
+				 struct vnic_recv_pool_config *max_eioc,
+				 struct vnic_recv_pool_config *min_host,
+				 struct vnic_recv_pool_config *min_eioc);
+
+int control_exchange_pools_req(struct control *control,
+			       u64 addr, u32 rkey);
+int control_exchange_pools_rsp(struct control *control,
+			       u64 * addr, u32 * rkey);
+
+int control_config_link_req(struct control *control,
+			    u16 flags, u16 mtu);
+int control_config_link_rsp(struct control *control,
+			    u16 * flags, u16 * mtu);
+
+int control_config_addrs_req(struct control *control,
+			     struct vnic_address_op *addrs, u16 num);
+int control_config_addrs_rsp(struct control *control);
+
+int control_report_statistics_req(struct control *control);
+int control_report_statistics_rsp(struct control *control,
+				  struct vnic_cmd_report_stats_rsp *stats);
+
+int control_heartbeat_req(struct control *control, u32 hb_interval);
+int control_heartbeat_rsp(struct control *control);
+
+int control_reset_req(struct control *control);
+int control_reset_rsp(struct control *control);
+
+
+#define control_packet(io) 					\
+	(struct vnic_control_packet *)(io)->virtual_addr
+#define control_is_connected(control) 				\
+	(vnic_ib_conn_connected(&((control)->ib_conn)))
+
+#define control_last_req(control)	control_packet(&(control)->send_io)
+#define control_features(control)	(control)->features_supported
+
+#define control_get_mac_address(control,addr) 				\
+	memcpy(addr,(control)->lan_switch.hw_mac_address, ETH_ALEN)
+
+#endif	/* VNIC_CONTROL_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_control_pkt.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_control_pkt.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_CONTROL_PKT_H_INCLUDED
+#define VNIC_CONTROL_PKT_H_INCLUDED
+
+#include <linux/utsname.h>
+
+#define VNIC_MAX_NODENAME_LEN	64
+
+struct vnic_connection_data {
+	u64	path_id;
+	u8	vnic_instance;
+	u8	path_num;
+	u8	nodename[VNIC_MAX_NODENAME_LEN + 1];
+};
+
+struct vnic_control_header {
+	u8	pkt_type;
+	u8	pkt_cmd;
+	u8	pkt_seq_num;
+	u8	pkt_retry_count;
+	u32	reserved;	/* for 64-bit alignmnet */
+};
+
+/* ptk_type values */
+enum {
+	TYPE_INFO	= 0,
+	TYPE_REQ	= 1,
+	TYPE_RSP	= 2,
+	TYPE_ERR	= 3
+};
+
+/* ptk_cmd values */
+enum {
+	CMD_INIT_VNIC		= 1,
+	CMD_CONFIG_DATA_PATH	= 2,
+	CMD_EXCHANGE_POOLS	= 3,
+	CMD_CONFIG_ADDRESSES	= 4,
+	CMD_CONFIG_LINK		= 5,
+	CMD_REPORT_STATISTICS	= 6,
+	CMD_CLEAR_STATISTICS	= 7,
+	CMD_REPORT_STATUS	= 8,
+	CMD_RESET		= 9,
+	CMD_HEARTBEAT		= 10
+};
+
+/* pkt_cmd CMD_INIT_VNIC, pkt_type TYPE_REQ data format */
+struct vnic_cmd_init_vnic_req {
+	__be16	vnic_major_version;
+	__be16	vnic_minor_version;
+	u8	vnic_instance;
+	u8	num_data_paths;
+	__be16	num_address_entries;
+};
+
+/* pkt_cmd CMD_INIT_VNIC, pkt_type TYPE_RSP subdata format */
+struct vnic_lan_switch_attribs {
+	u8	lan_switch_num;
+	u8	num_enet_ports;
+	__be16	default_vlan;
+	u8	hw_mac_address[ETH_ALEN];
+};
+
+/* pkt_cmd CMD_INIT_VNIC, pkt_type TYPE_RSP data format */
+struct vnic_cmd_init_vnic_rsp {
+	__be16				vnic_major_version;
+	__be16				vnic_minor_version;
+	u8				num_lan_switches;
+	u8				num_data_paths;
+	__be16				num_address_entries;
+	__be32				features_supported;
+	struct vnic_lan_switch_attribs	lan_switch[1];
+};
+
+/* features_supported values */
+enum {
+	VNIC_FEAT_IPV4_HEADERS		= 0x0001,
+	VNIC_FEAT_IPV6_HEADERS		= 0x0002,
+	VNIC_FEAT_IPV4_CSUM_RX		= 0x0004,
+	VNIC_FEAT_IPV4_CSUM_TX		= 0x0008,
+	VNIC_FEAT_TCP_CSUM_RX		= 0x0010,
+	VNIC_FEAT_TCP_CSUM_TX		= 0x0020,
+	VNIC_FEAT_UDP_CSUM_RX		= 0x0040,
+	VNIC_FEAT_UDP_CSUM_TX		= 0x0080,
+	VNIC_FEAT_TCP_SEGMENT		= 0x0100,
+	VNIC_FEAT_IPV4_IPSEC_OFFLOAD	= 0x0200,
+	VNIC_FEAT_IPV6_IPSEC_OFFLOAD	= 0x0400,
+	VNIC_FEAT_FCS_PROPAGATE		= 0x0800,
+	VNIC_FEAT_PF_KICK		= 0x1000,
+	VNIC_FEAT_PF_FORCE_ROUTE	= 0x2000,
+	VNIC_FEAT_CHASH_OFFLOAD		= 0x4000
+};
+
+/* pkt_cmd CMD_CONFIG_DATA_PATH subdata format */
+struct vnic_recv_pool_config {
+	__be32	size_recv_pool_entry;
+	__be32	num_recv_pool_entries;
+	__be32	timeout_before_kick;
+	__be32	num_recv_pool_entries_before_kick;
+	__be32	num_recv_pool_bytes_before_kick;
+	__be32	free_recv_pool_entries_per_update;
+};
+
+/* pkt_cmd CMD_CONFIG_DATA_PATH data format */
+struct vnic_cmd_config_data_path {
+	u64				path_identifier;
+	u8				data_path;
+	u8				reserved[3];
+	struct vnic_recv_pool_config	host_recv_pool_config;
+	struct vnic_recv_pool_config	eioc_recv_pool_config;
+};
+
+/* pkt_cmd CMD_EXCHANGE_POOLS data format */
+struct vnic_cmd_exchange_pools {
+	u8	data_path;
+	u8	reserved[3];
+	__be32	pool_rkey;
+	__be64	pool_addr;
+};
+
+/* pkt_cmd CMD_CONFIG_ADDRESSES subdata format */
+struct vnic_address_op {
+	__be16	index;
+	u8	operation;
+	u8	valid;
+	u8	address[6];
+	__be16	vlan;
+};
+
+/* operation values */
+enum {
+	VNIC_OP_SET_ENTRY = 0x01,
+	VNIC_OP_GET_ENTRY = 0x02
+};
+
+/* pkt_cmd CMD_CONFIG_ADDRESSES data format */
+struct vnic_cmd_config_addresses {
+	u8			num_address_ops;
+	u8			lan_switch_num;
+	struct vnic_address_op	list_address_ops[1];
+};
+
+/* CMD_CONFIG_LINK data format */
+struct vnic_cmd_config_link {
+	u8	cmd_flags;
+	u8	lan_switch_num;
+	__be16	mtu_size;
+	__be16	default_vlan;
+	u8	hw_mac_address[6];
+};
+
+/* cmd_flags values */
+enum {
+	VNIC_FLAG_ENABLE_NIC		= 0x01,
+	VNIC_FLAG_DISABLE_NIC		= 0x02,
+	VNIC_FLAG_ENABLE_MCAST_ALL	= 0x04,
+	VNIC_FLAG_DISABLE_MCAST_ALL	= 0x08,
+	VNIC_FLAG_ENABLE_PROMISC	= 0x10,
+	VNIC_FLAG_DISABLE_PROMISC	= 0x20,
+	VNIC_FLAG_SET_MTU		= 0x40
+};
+
+/* pkt_cmd CMD_REPORT_STATISTICS, pkt_type TYPE_REQ data format */
+struct vnic_cmd_report_stats_req {
+	u8	lan_switch_num;
+};
+
+/* pkt_cmd CMD_REPORT_STATISTICS, pkt_type TYPE_RSP data format */
+struct vnic_cmd_report_stats_rsp {
+	u8	lan_switch_num;
+	u8	reserved[7];		/* for 64-bit alignment */
+	__be64	if_in_broadcast_pkts;
+	__be64	if_in_multicast_pkts;
+	__be64	if_in_octets;
+	__be64	if_in_ucast_pkts;
+	__be64	if_in_nucast_pkts;	/* if_in_broadcast_pkts
+					 + if_in_multicast_pkts */
+	__be64	if_in_underrun;		/* (OID_GEN_RCV_NO_BUFFER) */
+	__be64	if_in_errors;		/* (OID_GEN_RCV_ERROR) */
+	__be64	if_out_errors;		/* (OID_GEN_XMIT_ERROR) */
+	__be64	if_out_octets;
+	__be64	if_out_ucast_pkts;
+	__be64	if_out_multicast_pkts;
+	__be64	if_out_broadcast_pkts;
+	__be64	if_out_nucast_pkts;	/* if_out_broadcast_pkts
+					 + if_out_multicast_pkts */
+	__be64	if_out_ok;		/* if_out_nucast_pkts
+					 + if_out_ucast_pkts(OID_GEN_XMIT_OK) */
+	__be64	if_in_ok;		/* if_in_nucast_pkts
+					 + if_in_ucast_pkts(OID_GEN_RCV_OK) */
+	__be64	if_out_ucast_bytes;	/* (OID_GEN_DIRECTED_BYTES_XMT) */
+	__be64	if_out_multicast_bytes;	/* (OID_GEN_MULTICAST_BYTES_XMT) */
+	__be64	if_out_broadcast_bytes;	/* (OID_GEN_BROADCAST_BYTES_XMT) */
+	__be64	if_in_ucast_bytes;	/* (OID_GEN_DIRECTED_BYTES_RCV) */
+	__be64	if_in_multicast_bytes;	/* (OID_GEN_MULTICAST_BYTES_RCV) */
+	__be64	if_in_broadcast_bytes;	/* (OID_GEN_BROADCAST_BYTES_RCV) */
+	__be64	 ethernet_status;	/* OID_GEN_MEDIA_CONNECT_STATUS) */
+};
+
+/* pkt_cmd CMD_CLEAR_STATISTICS data format */
+struct vnic_cmd_clear_statistics {
+	u8	lan_switch_num;
+};
+
+/* pkt_cmd CMD_REPORT_STATUS data format */
+struct vnic_cmd_report_status {
+	u8	lan_switch_num;
+	u8	is_fatal;
+	u8	reserved[2];		/* for 32-bit alignment */
+	__be32	status_number;
+	__be32	status_info;
+	u8	file_name[32];
+	u8	routine[32];
+	__be32	line_num;
+	__be32	error_parameter;
+	u8	desc_text[128];
+};
+
+/* pkt_cmd CMD_HEARTBEAT data format */
+struct vnic_cmd_heartbeat {
+	__be32	hb_interval;
+};
+
+enum {
+	VNIC_STATUS_LINK_UP			= 1,
+	VNIC_STATUS_LINK_DOWN			= 2,
+	VNIC_STATUS_ENET_AGGREGATION_CHANGE	= 3,
+	VNIC_STATUS_EIOC_SHUTDOWN		= 4,
+	VNIC_STATUS_CONTROL_ERROR		= 5,
+	VNIC_STATUS_EIOC_ERROR			= 6
+};
+
+#define VNIC_MAX_CONTROLPKTSZ		256
+#define VNIC_MAX_CONTROLDATASZ						\
+	(VNIC_MAX_CONTROLPKTSZ - sizeof(struct vnic_control_header))
+
+struct vnic_control_packet {
+	struct vnic_control_header	hdr;
+	union {
+		struct vnic_cmd_init_vnic_req		init_vnic_req;
+		struct vnic_cmd_init_vnic_rsp		init_vnic_rsp;
+		struct vnic_cmd_config_data_path	config_data_path_req;
+		struct vnic_cmd_config_data_path	config_data_path_rsp;
+		struct vnic_cmd_exchange_pools		exchange_pools_req;
+		struct vnic_cmd_exchange_pools		exchange_pools_rsp;
+		struct vnic_cmd_config_addresses	config_addresses_req;
+		struct vnic_cmd_config_addresses	config_addresses_rsp;
+		struct vnic_cmd_config_link		config_link_req;
+		struct vnic_cmd_config_link		config_link_rsp;
+		struct vnic_cmd_report_stats_req	report_statistics_req;
+		struct vnic_cmd_report_stats_rsp	report_statistics_rsp;
+		struct vnic_cmd_clear_statistics	clear_statistics_req;
+		struct vnic_cmd_clear_statistics	clear_statistics_rsp;
+		struct vnic_cmd_report_status		report_status;
+		struct vnic_cmd_heartbeat		heartbeat_req;
+		struct vnic_cmd_heartbeat		heartbeat_rsp;
+
+		char   cmd_data[VNIC_MAX_CONTROLDATASZ];
+	} cmd;
+};
+
+#endif	/* VNIC_CONTROL_PKT_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_data.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_data.c
@@ -0,0 +1,1112 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/inet_sock.h>
+#include <linux/ip.h>
+#include <linux/if_ether.h>
+#include <linux/vmalloc.h>
+
+#include "vnic_util.h"
+#include "vnic_viport.h"
+#include "vnic_main.h"
+#include "vnic_config.h"
+#include "vnic_data.h"
+#include "vnic_trailer.h"
+#include "vnic_stats.h"
+
+static void data_received_kick(struct io *io);
+static void data_xmit_complete(struct io *io);
+
+u32 min_rcv_skb = 60;
+module_param(min_rcv_skb, int, 0444);
+MODULE_PARM_DESC(min_rcv_skb, "Packets of size (in bytes) less than"
+		 " or equal this value will be copied during receive."
+		 " Default 60");
+
+u32 min_xmt_skb = 60;
+module_param(min_xmt_skb, int, 0444);
+MODULE_PARM_DESC(min_xmit_skb, "Packets of size (in bytes) less than"
+		 " or equal to this value will be copied during transmit."
+		 "Default 60");
+
+int data_init(struct data * data, struct viport * viport,
+	      struct data_config * config, struct ib_pd *pd)
+{
+	DATA_FUNCTION("data_init()\n");
+
+	data->parent = viport;
+	data->config = config;
+	data->ib_conn.viport = viport;
+	data->ib_conn.ib_config = &config->ib_config;
+	data->ib_conn.state = IB_CONN_UNINITTED;
+
+	if ((min_xmt_skb < 60) || (min_xmt_skb > 9000)) {
+		DATA_ERROR("min_xmt_skb (%d) must be between 60 and 9000\n",
+			   min_xmt_skb);
+		goto failure;
+	}
+	if (vnic_ib_conn_init(&data->ib_conn, viport, pd,
+			      &config->ib_config)) {
+		DATA_ERROR("Data IB connection initialization failed\n");
+		goto failure;
+	}
+	data->mr = ib_get_dma_mr(pd,
+				 IB_ACCESS_LOCAL_WRITE |
+				 IB_ACCESS_REMOTE_READ |
+				 IB_ACCESS_REMOTE_WRITE);
+	if (IS_ERR(data->mr)) {
+		DATA_ERROR("failed to register memory for"
+			   " data connection\n");
+		goto destroy_conn;
+	}
+
+	data->ib_conn.cm_id = ib_create_cm_id(viport->config->ibdev,
+					      vnic_ib_cm_handler,
+					      &data->ib_conn);
+
+	if (IS_ERR(data->ib_conn.cm_id)) {
+		DATA_ERROR("creating data CM ID failed\n");
+		goto destroy_conn;
+	}
+
+	return 0;
+
+destroy_conn:
+	ib_destroy_qp(data->ib_conn.qp);
+	ib_destroy_cq(data->ib_conn.cq);
+failure:
+	return -1;
+}
+
+static void data_post_recvs(struct data *data)
+{
+	unsigned long flags;
+
+	DATA_FUNCTION("data_post_recvs()\n");
+	spin_lock_irqsave(&data->recv_ios_lock, flags);
+	while (!list_empty(&data->recv_ios)) {
+		struct io *io = list_entry(data->recv_ios.next,
+					   struct io, list_ptrs);
+		struct recv_io *recv_io = (struct recv_io *)io;
+
+		list_del(&recv_io->io.list_ptrs);
+		spin_unlock_irqrestore(&data->recv_ios_lock, flags);
+		if (vnic_ib_post_recv(&data->ib_conn, &recv_io->io)) {
+			viport_failure(data->parent);
+			return;
+		}
+		spin_lock_irqsave(&data->recv_ios_lock, flags);
+	}
+	spin_unlock_irqrestore(&data->recv_ios_lock, flags);
+}
+
+static void data_init_pool_work_reqs(struct data * data,
+				      struct recv_io * recv_io)
+{
+	struct recv_pool	*recv_pool = &data->recv_pool;
+	struct xmit_pool	*xmit_pool = &data->xmit_pool;
+	struct rdma_io		*rdma_io;
+	struct rdma_dest	*rdma_dest;
+	dma_addr_t		xmit_dma;
+	u8			*xmit_data;
+	unsigned int		i;
+
+	INIT_LIST_HEAD(&data->recv_ios);
+	spin_lock_init(&data->recv_ios_lock);
+	spin_lock_init(&data->xmit_buf_lock);
+	for (i = 0; i < data->config->num_recvs; i++) {
+		recv_io[i].io.viport = data->parent;
+		recv_io[i].io.routine = data_received_kick;
+		recv_io[i].list.addr = data->region_data_dma;
+		recv_io[i].list.length = 4;
+		recv_io[i].list.lkey = data->mr->lkey;
+
+		recv_io[i].io.rwr.wr_id = (u64)&recv_io[i].io;
+		recv_io[i].io.rwr.sg_list = &recv_io[i].list;
+		recv_io[i].io.rwr.num_sge = 1;
+
+		list_add(&recv_io[i].io.list_ptrs, &data->recv_ios);
+	}
+
+	INIT_LIST_HEAD(&recv_pool->avail_recv_bufs);
+	for (i = 0; i < recv_pool->pool_sz; i++) {
+		rdma_dest = &recv_pool->recv_bufs[i];
+		list_add(&rdma_dest->list_ptrs,
+			 &recv_pool->avail_recv_bufs);
+	}
+
+	xmit_dma = xmit_pool->xmitdata_dma;
+	xmit_data = xmit_pool->xmit_data;
+
+	for (i = 0; i < xmit_pool->num_xmit_bufs; i++) {
+		rdma_io = &xmit_pool->xmit_bufs[i];
+		rdma_io->index = i;
+		rdma_io->io.viport = data->parent;
+		rdma_io->io.routine = data_xmit_complete;
+
+		rdma_io->list[0].lkey = data->mr->lkey;
+		rdma_io->list[1].lkey = data->mr->lkey;
+		rdma_io->io.swr.wr_id = (u64)rdma_io;
+		rdma_io->io.swr.sg_list = rdma_io->list;
+		rdma_io->io.swr.num_sge = 2;
+		rdma_io->io.swr.opcode = IB_WR_RDMA_WRITE;
+		rdma_io->io.swr.send_flags = IB_SEND_SIGNALED;
+		rdma_io->io.type = RDMA;
+
+		rdma_io->data = xmit_data;
+		rdma_io->data_dma = xmit_dma;
+
+		xmit_data += ALIGN(min_xmt_skb, VIPORT_TRAILER_ALIGNMENT);
+		xmit_dma += ALIGN(min_xmt_skb, VIPORT_TRAILER_ALIGNMENT);
+		rdma_io->trailer = (struct viport_trailer *)xmit_data;
+		rdma_io->trailer_dma = xmit_dma;
+		xmit_data += sizeof(struct viport_trailer);
+		xmit_dma += sizeof(struct viport_trailer);
+	}
+
+	xmit_pool->rdma_rkey = data->mr->rkey;
+	xmit_pool->rdma_addr = xmit_pool->buf_pool_dma;
+}
+
+static void data_init_free_bufs_swrs(struct data * data)
+{
+	struct rdma_io		*rdma_io;
+	struct send_io		*send_io;
+
+	rdma_io = &data->free_bufs_io;
+	rdma_io->io.viport = data->parent;
+	rdma_io->io.routine = NULL;
+
+	rdma_io->list[0].lkey = data->mr->lkey;
+
+	rdma_io->io.swr.wr_id = (u64)rdma_io;
+	rdma_io->io.swr.sg_list = rdma_io->list;
+	rdma_io->io.swr.num_sge = 1;
+	rdma_io->io.swr.opcode = IB_WR_RDMA_WRITE;
+	rdma_io->io.swr.send_flags = IB_SEND_SIGNALED;
+	rdma_io->io.type = RDMA;
+
+	send_io = &data->kick_io;
+	send_io->io.viport = data->parent;
+	send_io->io.routine = NULL;
+
+	send_io->list.addr = data->region_data_dma;
+	send_io->list.length = 0;
+	send_io->list.lkey = data->mr->lkey;
+
+	send_io->io.swr.wr_id = (u64)send_io;
+	send_io->io.swr.sg_list = &send_io->list;
+	send_io->io.swr.num_sge = 1;
+	send_io->io.swr.opcode = IB_WR_SEND;
+	send_io->io.swr.send_flags = IB_SEND_SIGNALED;
+	send_io->io.type = SEND;
+}
+
+static int data_init_buf_pools(struct data * data)
+{
+	struct recv_pool	*recv_pool = &data->recv_pool;
+	struct xmit_pool	*xmit_pool = &data->xmit_pool;
+	struct viport		*viport = data->parent;
+
+	recv_pool->buf_pool_len =
+	    sizeof(struct buff_pool_entry) * recv_pool->eioc_pool_sz;
+
+	recv_pool->buf_pool = kzalloc(recv_pool->buf_pool_len, GFP_KERNEL);
+
+	if (!recv_pool->buf_pool) {
+		DATA_ERROR("failed allocating %d bytes"
+			   " for recv pool bufpool\n",
+			   recv_pool->buf_pool_len);
+		goto failure;
+	}
+
+	recv_pool->buf_pool_dma =
+	    dma_map_single(viport->config->ibdev->dma_device,
+			   recv_pool->buf_pool, recv_pool->buf_pool_len,
+			   DMA_TO_DEVICE);
+
+	if (dma_mapping_error(recv_pool->buf_pool_dma)) {
+		DATA_ERROR("xmit buf_pool dma map error\n");
+		goto free_recv_pool;
+	}
+
+	xmit_pool->buf_pool_len =
+	    sizeof(struct buff_pool_entry) * xmit_pool->pool_sz;
+	xmit_pool->buf_pool = kzalloc(xmit_pool->buf_pool_len, GFP_KERNEL);
+
+	if (!xmit_pool->buf_pool) {
+		DATA_ERROR("failed allocating %d bytes"
+			   " for xmit pool bufpool\n",
+			   xmit_pool->buf_pool_len);
+		goto unmap_recv_pool;
+	}
+
+	xmit_pool->buf_pool_dma =
+	    dma_map_single(viport->config->ibdev->dma_device,
+			   xmit_pool->buf_pool, xmit_pool->buf_pool_len,
+			   DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(xmit_pool->buf_pool_dma)) {
+		DATA_ERROR("xmit buf_pool dma map error\n");
+		goto free_xmit_pool;
+	}
+
+	xmit_pool->xmit_data = kzalloc(xmit_pool->xmitdata_len, GFP_KERNEL);
+
+	if (!xmit_pool->xmit_data) {
+		DATA_ERROR("failed allocating %d bytes for xmit data\n",
+			   xmit_pool->xmitdata_len);
+		goto unmap_xmit_pool;
+	}
+
+	xmit_pool->xmitdata_dma =
+	    dma_map_single(viport->config->ibdev->dma_device,
+			   xmit_pool->xmit_data, xmit_pool->xmitdata_len,
+			   DMA_TO_DEVICE);
+
+	if (dma_mapping_error(xmit_pool->xmitdata_dma)) {
+		DATA_ERROR("xmit data dma map error\n");
+		goto free_xmit_data;
+	}
+
+	return 0;
+
+free_xmit_data:
+	kfree(xmit_pool->xmit_data);
+unmap_xmit_pool:
+	dma_unmap_single(data->parent->config->ibdev->dma_device,
+			 xmit_pool->buf_pool_dma,
+			 xmit_pool->buf_pool_len, DMA_FROM_DEVICE);
+free_xmit_pool:
+	kfree(xmit_pool->buf_pool);
+unmap_recv_pool:
+	dma_unmap_single(data->parent->config->ibdev->dma_device,
+			 recv_pool->buf_pool_dma,
+			 recv_pool->buf_pool_len, DMA_TO_DEVICE);
+free_recv_pool:
+	kfree(recv_pool->buf_pool);
+failure:
+	return -1;
+}
+
+static void data_init_xmit_pool(struct data * data)
+{
+	struct xmit_pool	*xmit_pool = &data->xmit_pool;
+
+	xmit_pool->pool_sz =
+		be32_to_cpu(data->eioc_pool_parms.num_recv_pool_entries);
+	xmit_pool->buffer_sz =
+		be32_to_cpu(data->eioc_pool_parms.size_recv_pool_entry);
+
+	xmit_pool->notify_count = 0;
+	xmit_pool->notify_bundle = data->config->notify_bundle;
+	xmit_pool->next_xmit_pool = 0;
+	xmit_pool->num_xmit_bufs = xmit_pool->notify_bundle * 2;
+	xmit_pool->next_xmit_buf = 0;
+	xmit_pool->last_comp_buf = xmit_pool->num_xmit_bufs - 1;
+
+	xmit_pool->kick_count = 0;
+	xmit_pool->kick_byte_count = 0;
+
+	xmit_pool->send_kicks =
+	  be32_to_cpu(data->
+		      eioc_pool_parms.num_recv_pool_entries_before_kick)
+	  || be32_to_cpu(data->
+		      eioc_pool_parms.num_recv_pool_bytes_before_kick);
+	xmit_pool->kick_bundle =
+	    be32_to_cpu(data->
+		        eioc_pool_parms.num_recv_pool_entries_before_kick);
+	xmit_pool->kick_byte_bundle =
+	    be32_to_cpu(data->
+			eioc_pool_parms.num_recv_pool_bytes_before_kick);
+
+	xmit_pool->need_buffers = 1;
+
+	xmit_pool->xmitdata_len =
+	    BUFFER_SIZE(min_xmt_skb) * xmit_pool->num_xmit_bufs;
+}
+
+static void data_init_recv_pool(struct data * data)
+{
+	struct recv_pool	*recv_pool = &data->recv_pool;
+
+	recv_pool->pool_sz = data->config->host_recv_pool_entries;
+	recv_pool->eioc_pool_sz =
+		be32_to_cpu(data->host_pool_parms.num_recv_pool_entries);
+	if (recv_pool->pool_sz > recv_pool->eioc_pool_sz)
+		recv_pool->pool_sz =
+		    be32_to_cpu(data->host_pool_parms.num_recv_pool_entries);
+
+	recv_pool->buffer_sz =
+		    be32_to_cpu(data->host_pool_parms.size_recv_pool_entry);
+
+	recv_pool->sz_free_bundle =
+		be32_to_cpu(data->
+			host_pool_parms.free_recv_pool_entries_per_update);
+	recv_pool->num_free_bufs = 0;
+	recv_pool->num_posted_bufs = 0;
+
+	recv_pool->next_full_buf = 0;
+	recv_pool->next_free_buf = 0;
+	recv_pool->kick_on_free  = 0;
+}
+
+int data_connect(struct data * data)
+{
+	struct xmit_pool	*xmit_pool = &data->xmit_pool;
+	struct recv_pool	*recv_pool = &data->recv_pool;
+	struct recv_io		* recv_io;
+	unsigned int		sz;
+	struct viport		*viport = data->parent;
+
+	DATA_FUNCTION("data_connect()\n");
+
+	data_init_recv_pool(data);
+	data_init_xmit_pool(data);
+
+	sz = sizeof(struct rdma_dest) * recv_pool->pool_sz    +
+	     sizeof(struct recv_io) * data->config->num_recvs +
+	     sizeof(struct rdma_io) * xmit_pool->num_xmit_bufs;
+
+	data->local_storage = vmalloc(sz);
+
+	if (!data->local_storage) {
+		DATA_ERROR("failed allocating %d bytes"
+			   " local storage\n", sz);
+		goto out;
+	}
+
+	memset(data->local_storage, 0, sz);
+
+	recv_pool->recv_bufs = (struct rdma_dest *)data->local_storage;
+	sz = sizeof(struct rdma_dest) * recv_pool->pool_sz;
+
+	recv_io = (struct recv_io *)(data->local_storage + sz);
+	sz += sizeof(struct recv_io) * data->config->num_recvs;
+
+	xmit_pool->xmit_bufs = (struct rdma_io *)(data->local_storage + sz);
+	data->region_data = kzalloc(4, GFP_KERNEL);
+
+	if (!data->region_data) {
+		DATA_ERROR("failed to alloc memory for region data\n");
+		goto free_local_storage;
+	}
+
+	data->region_data_dma =
+	    dma_map_single(viport->config->ibdev->dma_device,
+			   data->region_data, 4, DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(data->region_data_dma)) {
+		DATA_ERROR("region data dma map error\n");
+		goto free_region_data;
+	}
+
+	if (data_init_buf_pools(data))
+		goto unmap_region_data;
+
+	data_init_free_bufs_swrs(data);
+	data_init_pool_work_reqs(data, recv_io);
+
+	data_post_recvs(data);
+
+	if (vnic_ib_cm_connect(&data->ib_conn))
+		goto unmap_region_data;
+
+	return 0;
+
+unmap_region_data:
+	dma_unmap_single(data->parent->config->ibdev->dma_device,
+			 data->region_data_dma, 4, DMA_BIDIRECTIONAL);
+free_region_data:
+		kfree(data->region_data);
+free_local_storage:
+		vfree(data->local_storage);
+out:
+	return -1;
+}
+
+static void data_add_free_buffer(struct data *data, int index,
+				 struct rdma_dest *rdma_dest)
+{
+	struct recv_pool *pool = &data->recv_pool;
+	struct buff_pool_entry *bpe;
+
+	DATA_FUNCTION("data_add_free_buffer()\n");
+	rdma_dest->trailer->connection_hash_and_valid = 0;
+	dma_sync_single_for_cpu(data->parent->config->ibdev->dma_device,
+				pool->buf_pool_dma, pool->buf_pool_len,
+				DMA_TO_DEVICE);
+
+	bpe = &pool->buf_pool[index];
+	bpe->rkey = cpu_to_be32(data->mr->rkey);
+
+	bpe->remote_addr = cpu_to_be64((unsigned long long)
+					virt_to_phys(rdma_dest->data));
+	bpe->valid = (u32) (rdma_dest - &pool->recv_bufs[0]) + 1;
+	++pool->num_free_bufs;
+
+	dma_sync_single_for_device(data->parent->config->ibdev->dma_device,
+				   pool->buf_pool_dma, pool->buf_pool_len,
+				   DMA_TO_DEVICE);
+}
+
+/* NOTE: this routine is not reentrant */
+static void data_alloc_buffers(struct data *data, int initial_allocation)
+{
+	struct recv_pool *pool = &data->recv_pool;
+	struct rdma_dest *rdma_dest;
+	struct sk_buff *skb;
+	int index;
+
+	DATA_FUNCTION("data_alloc_buffers()\n");
+	index = ADD(pool->next_free_buf, pool->num_free_bufs,
+		    pool->eioc_pool_sz);
+
+	while (!list_empty(&pool->avail_recv_bufs)) {
+		rdma_dest =
+		    list_entry(pool->avail_recv_bufs.next,
+			       struct rdma_dest, list_ptrs);
+		if (!rdma_dest->skb) {
+			if (initial_allocation)
+				skb = alloc_skb(pool->buffer_sz + 2,
+						GFP_KERNEL);
+			else
+				skb = dev_alloc_skb(pool->buffer_sz + 2);
+			if (!skb) {
+				DATA_ERROR("failed to alloc skb\n");
+				break;
+			}
+			skb_reserve(skb, 2);
+			skb_put(skb, pool->buffer_sz);
+			rdma_dest->skb = skb;
+			rdma_dest->data = skb->data;
+			rdma_dest->trailer =
+			  (struct viport_trailer *)(rdma_dest->data +
+						    pool->buffer_sz -
+						    sizeof(struct
+							   viport_trailer));
+		}
+		rdma_dest->trailer->connection_hash_and_valid = 0;
+
+		list_del_init(&rdma_dest->list_ptrs);
+
+		data_add_free_buffer(data, index, rdma_dest);
+		index = NEXT(index, pool->eioc_pool_sz);
+	}
+}
+
+static void data_send_kick_message(struct data *data)
+{
+	struct xmit_pool *pool = &data->xmit_pool;
+	DATA_FUNCTION("data_send_kick_message()\n");
+	/* stop timer for bundle_timeout */
+	if (data->kick_timer_on) {
+		del_timer(&data->kick_timer);
+		data->kick_timer_on = 0;
+	}
+	pool->kick_count = 0;
+	pool->kick_byte_count = 0;
+
+	/* TODO: keep track of when kick is outstanding, and
+	 * don't reuse until complete
+	 */
+	if (vnic_ib_post_send(&data->ib_conn, &data->free_bufs_io.io)) {
+		DATA_ERROR("failed to post send\n");
+		viport_failure(data->parent);
+	}
+}
+
+static void data_send_free_recv_buffers(struct data *data)
+{
+	struct recv_pool *pool = &data->recv_pool;
+	struct ib_send_wr *swr = &data->free_bufs_io.io.swr;
+
+	int bufs_sent = 0;
+	u64 rdma_addr;
+	u32 offset;
+	u32 sz;
+	unsigned int num_to_send, next_increment;
+
+	DATA_FUNCTION("data_send_free_recv_buffers()\n");
+
+	for (num_to_send = pool->sz_free_bundle;
+	     num_to_send <= pool->num_free_bufs;
+	     num_to_send += pool->sz_free_bundle) {
+		/* handle multiple bundles as one when possible. */
+		next_increment = num_to_send + pool->sz_free_bundle;
+		if ((next_increment <= pool->num_free_bufs)
+		    && (pool->next_free_buf + next_increment <=
+			pool->eioc_pool_sz)) {
+			continue;
+		}
+
+		offset = pool->next_free_buf *
+				sizeof(struct buff_pool_entry);
+		sz = num_to_send * sizeof(struct buff_pool_entry);
+		rdma_addr = pool->eioc_rdma_addr + offset;
+		swr->sg_list->length = sz;
+		swr->sg_list->addr = pool->buf_pool_dma + offset;
+		swr->wr.rdma.remote_addr = rdma_addr;
+
+		if (vnic_ib_post_send(&data->ib_conn,
+		    &data->free_bufs_io.io)) {
+			DATA_ERROR("failed to post send\n");
+			viport_failure(data->parent);
+			break;
+		}
+		INC(pool->next_free_buf, num_to_send, pool->eioc_pool_sz);
+		pool->num_free_bufs -= num_to_send;
+		pool->num_posted_bufs += num_to_send;
+		bufs_sent = 1;
+	}
+
+	if (bufs_sent) {
+		if (pool->kick_on_free)
+			data_send_kick_message(data);
+	}
+	if (pool->num_posted_bufs == 0) {
+		DATA_ERROR("%s: unable to allocate receive buffers\n",
+			   config_viport_name(data->parent->config));
+		viport_failure(data->parent);
+	}
+}
+
+void data_connected(struct data *data)
+{
+	DATA_FUNCTION("data_connected()\n");
+	data->free_bufs_io.io.swr.wr.rdma.rkey =
+				data->recv_pool.eioc_rdma_rkey;
+	data_alloc_buffers(data, 1);
+	data_send_free_recv_buffers(data);
+	data->connected = 1;
+}
+
+void data_disconnect(struct data *data)
+{
+	struct xmit_pool *xmit_pool = &data->xmit_pool;
+	struct recv_pool *recv_pool = &data->recv_pool;
+	unsigned int i;
+
+	DATA_FUNCTION("data_disconnect()\n");
+
+	data->connected = 0;
+	if (data->kick_timer_on) {
+		del_timer_sync(&data->kick_timer);
+		data->kick_timer_on = 0;
+	}
+
+	for (i = 0; i < xmit_pool->num_xmit_bufs; i++) {
+		if (xmit_pool->xmit_bufs[i].skb)
+			dev_kfree_skb(xmit_pool->xmit_bufs[i].skb);
+		xmit_pool->xmit_bufs[i].skb = NULL;
+
+	}
+	for (i = 0; i < recv_pool->pool_sz; i++) {
+		if (data->recv_pool.recv_bufs[i].skb)
+			dev_kfree_skb(recv_pool->recv_bufs[i].skb);
+		recv_pool->recv_bufs[i].skb = NULL;
+	}
+	vfree(data->local_storage);
+	if (data->region_data) {
+		dma_unmap_single(data->parent->config->ibdev->dma_device,
+				 data->region_data_dma, 4,
+				 DMA_BIDIRECTIONAL);
+		kfree(data->region_data);
+	}
+
+	if (recv_pool->buf_pool) {
+		dma_unmap_single(data->parent->config->ibdev->dma_device,
+				 recv_pool->buf_pool_dma,
+				 recv_pool->buf_pool_len, DMA_TO_DEVICE);
+		kfree(recv_pool->buf_pool);
+	}
+
+	if (xmit_pool->buf_pool) {
+		dma_unmap_single(data->parent->config->ibdev->dma_device,
+				 xmit_pool->buf_pool_dma,
+				 xmit_pool->buf_pool_len, DMA_FROM_DEVICE);
+		kfree(xmit_pool->buf_pool);
+	}
+
+	if (xmit_pool->xmit_data) {
+		dma_unmap_single(data->parent->config->ibdev->dma_device,
+				 xmit_pool->xmitdata_dma,
+				 xmit_pool->xmitdata_len, DMA_TO_DEVICE);
+		kfree(xmit_pool->xmit_data);
+	}
+}
+
+void data_cleanup(struct data *data)
+{
+	if (ib_send_cm_dreq(data->ib_conn.cm_id, NULL, 0))
+		printk(KERN_DEBUG "data CM DREQ sending failed\n");
+
+	ib_destroy_cm_id(data->ib_conn.cm_id);
+	ib_destroy_qp(data->ib_conn.qp);
+	ib_destroy_cq(data->ib_conn.cq);
+	ib_dereg_mr(data->mr);
+
+}
+
+static int data_alloc_xmit_buffer(struct data *data, struct sk_buff *skb,
+				  struct buff_pool_entry **pp_bpe,
+				  struct rdma_io **pp_rdma_io,
+				  int *last)
+{
+	struct xmit_pool	*pool = &data->xmit_pool;
+	unsigned long		flags;
+	int			ret;
+
+	DATA_FUNCTION("data_alloc_xmit_buffer()\n");
+
+	spin_lock_irqsave(&data->xmit_buf_lock, flags);
+	dma_sync_single_for_cpu(data->parent->config->ibdev->dma_device,
+				pool->buf_pool_dma, pool->buf_pool_len,
+				DMA_TO_DEVICE);
+	*last = 0;
+	*pp_rdma_io = &pool->xmit_bufs[pool->next_xmit_buf];
+	*pp_bpe = &pool->buf_pool[pool->next_xmit_pool];
+
+	if ((*pp_bpe)->valid && pool->next_xmit_buf !=
+	     pool->last_comp_buf) {
+		INC(pool->next_xmit_buf, 1, pool->num_xmit_bufs);
+		INC(pool->next_xmit_pool, 1, pool->pool_sz);
+		if (!pool->buf_pool[pool->next_xmit_pool].valid) {
+			DATA_INFO("just used the last EIOU"
+				  " receive buffer\n");
+			*last = 1;
+			pool->need_buffers = 1;
+			vnic_stop_xmit(data->parent->vnic,
+				       data->parent->parent);
+			data_kickreq_stats(data);
+		} else if (pool->next_xmit_buf == pool->last_comp_buf) {
+			DATA_INFO("just used our last xmit buffer\n");
+			pool->need_buffers = 1;
+			vnic_stop_xmit(data->parent->vnic,
+				       data->parent->parent);
+		}
+		(*pp_rdma_io)->skb = skb;
+		(*pp_bpe)->valid = 0;
+		ret = 0;
+	} else {
+		data_no_xmitbuf_stats(data);
+		DATA_ERROR("Out of xmit buffers\n");
+		vnic_stop_xmit(data->parent->vnic,
+			       data->parent->parent);
+		ret = -1;
+	}
+
+	dma_sync_single_for_device(data->parent->config->ibdev->
+				   dma_device, pool->buf_pool_dma,
+				   pool->buf_pool_len, DMA_TO_DEVICE);
+	spin_unlock_irqrestore(&data->xmit_buf_lock, flags);
+	return ret;
+}
+
+static void data_rdma_packet(struct data *data, struct buff_pool_entry *bpe,
+			     struct rdma_io *rdma_io)
+{
+	struct ib_send_wr	*swr;
+	struct sk_buff		*skb;
+	dma_addr_t		trailer_data_dma;
+	dma_addr_t		skb_data_dma;
+	struct xmit_pool	*xmit_pool = &data->xmit_pool;
+	struct viport		*viport = data->parent;
+	u8			*d;
+	int			len;
+	int			fill_len;
+
+	DATA_FUNCTION("data_rdma_packet()\n");
+	swr = &rdma_io->io.swr;
+	skb = rdma_io->skb;
+	len = ALIGN(rdma_io->len, VIPORT_TRAILER_ALIGNMENT);
+	fill_len = len - skb->len;
+
+	dma_sync_single_for_cpu(data->parent->config->ibdev->dma_device,
+				xmit_pool->xmitdata_dma,
+				xmit_pool->xmitdata_len, DMA_TO_DEVICE);
+
+	d = (u8 *) rdma_io->trailer - fill_len;
+	trailer_data_dma = rdma_io->trailer_dma - fill_len;
+	memset(d, 0, fill_len);
+
+	swr->sg_list[0].length = skb->len;
+	if (skb->len <= min_xmt_skb) {
+		memcpy(rdma_io->data, skb->data, skb->len);
+		swr->sg_list[0].lkey = data->mr->lkey;
+		swr->sg_list[0].addr = rdma_io->data_dma;
+		dev_kfree_skb_any(skb);
+		rdma_io->skb = NULL;
+	} else {
+		swr->sg_list[0].lkey = data->mr->lkey;
+
+		skb_data_dma = dma_map_single(viport->config->ibdev->dma_device,
+					      skb->data, skb->len,
+					      DMA_TO_DEVICE);
+
+		if (dma_mapping_error(skb_data_dma)) {
+			DATA_ERROR("skb data dma map error\n");
+			goto failure;
+		}
+
+		rdma_io->skb_data_dma = skb_data_dma;
+
+		swr->sg_list[0].addr = skb_data_dma;
+		skb_orphan(skb);
+	}
+	dma_sync_single_for_cpu(data->parent->config->ibdev->dma_device,
+				xmit_pool->buf_pool_dma,
+				xmit_pool->buf_pool_len, DMA_TO_DEVICE);
+
+	swr->sg_list[1].addr = trailer_data_dma;
+	swr->sg_list[1].length = fill_len + sizeof(struct viport_trailer);
+	swr->sg_list[0].lkey = data->mr->lkey;
+	swr->wr.rdma.remote_addr = be64_to_cpu(bpe->remote_addr);
+	swr->wr.rdma.remote_addr += data->xmit_pool.buffer_sz;
+	swr->wr.rdma.remote_addr -= (sizeof(struct viport_trailer) + len);
+	swr->wr.rdma.rkey = be32_to_cpu(bpe->rkey);
+
+	dma_sync_single_for_device(data->parent->config->ibdev->dma_device,
+				   xmit_pool->buf_pool_dma,
+				   xmit_pool->buf_pool_len, DMA_TO_DEVICE);
+
+	data->xmit_pool.notify_count++;
+	if (data->xmit_pool.notify_count >= data->xmit_pool.notify_bundle) {
+		data->xmit_pool.notify_count = 0;
+		swr->send_flags = IB_SEND_SIGNALED;
+	} else {
+		swr->send_flags = 0;
+	}
+	dma_sync_single_for_device(data->parent->config->ibdev->dma_device,
+				   xmit_pool->xmitdata_dma,
+				   xmit_pool->xmitdata_len, DMA_TO_DEVICE);
+	if (vnic_ib_post_send(&data->ib_conn, &rdma_io->io)) {
+		DATA_ERROR("failed to post send for data RDMA write\n");
+		viport_failure(data->parent);
+		goto failure;
+	}
+
+	data_xmits_stats(data);
+failure:
+	dma_sync_single_for_device(data->parent->config->ibdev->dma_device,
+				   xmit_pool->xmitdata_dma,
+				   xmit_pool->xmitdata_len, DMA_TO_DEVICE);
+}
+
+static void data_kick_timeout_handler(unsigned long arg)
+{
+	struct data *data = (struct data *)arg;
+
+	DATA_FUNCTION("data_kick_timeout_handler()\n");
+	data->kick_timer_on = 0;
+	data_send_kick_message(data);
+}
+
+int data_xmit_packet(struct data *data, struct sk_buff *skb)
+{
+	struct xmit_pool	*pool = &data->xmit_pool;
+	struct rdma_io		*rdma_io;
+	struct buff_pool_entry	*bpe;
+	struct viport_trailer	*trailer;
+	unsigned int		sz = skb->len;
+	int			last;
+
+	DATA_FUNCTION("data_xmit_packet()\n");
+	if (sz > pool->buffer_sz) {
+		DATA_ERROR("outbound packet too large, size = %d\n", sz);
+		return -1;
+	}
+
+	if (data_alloc_xmit_buffer(data, skb, &bpe, &rdma_io, &last)) {
+		DATA_ERROR("error in allocating data xmit buffer\n");
+		return -1;
+	}
+
+	dma_sync_single_for_cpu(data->parent->config->ibdev->dma_device,
+				pool->xmitdata_dma, pool->xmitdata_len,
+				DMA_TO_DEVICE);
+	trailer = rdma_io->trailer;
+
+	memset(trailer, 0, sizeof *trailer);
+	memcpy(trailer->dest_mac_addr, skb->data, ETH_ALEN);
+
+	if (skb->sk)
+		trailer->connection_hash_and_valid = 0x40 |
+			 ((be16_to_cpu(inet_sk(skb->sk)->sport) +
+			   be16_to_cpu( inet_sk(skb->sk)->dport)) & 0x3f);
+
+	trailer->connection_hash_and_valid |= CHV_VALID;
+
+	if ((sz > 16) && (*(__be16 *) (skb->data + 12) ==
+			   __constant_cpu_to_be16(ETH_P_8021Q))) {
+		trailer->vlan = *(__be16 *) (skb->data + 14);
+		memmove(skb->data + 4, skb->data, 12);
+		skb_pull(skb, 4);
+		trailer->pkt_flags |= PF_VLAN_INSERT;
+	}
+	if (last)
+		trailer->pkt_flags |= PF_KICK;
+	if (sz < ETH_ZLEN) {
+		/* EIOU requires all packets to be
+		 * of ethernet minimum packet size.
+		 */
+		trailer->data_length = __constant_cpu_to_be16(ETH_ZLEN);
+		rdma_io->len = ETH_ZLEN;
+	} else {
+		trailer->data_length = cpu_to_be16(sz);
+		rdma_io->len = sz;
+	}
+
+	if (skb->ip_summed == CHECKSUM_HW) {
+		trailer->tx_chksum_flags = TX_CHKSUM_FLAGS_CHECKSUM_V4
+		    | TX_CHKSUM_FLAGS_IP_CHECKSUM
+		    | TX_CHKSUM_FLAGS_TCP_CHECKSUM
+		    | TX_CHKSUM_FLAGS_UDP_CHECKSUM;
+	}
+
+	dma_sync_single_for_device(data->parent->config->ibdev->dma_device,
+				   pool->xmitdata_dma, pool->xmitdata_len,
+				   DMA_TO_DEVICE);
+
+	data_rdma_packet(data, bpe, rdma_io);
+
+	if (pool->send_kicks) {
+		/* EIOC needs kicks to inform it of sent packets */
+		pool->kick_count++;
+		pool->kick_byte_count += sz;
+		if ((pool->kick_count >= pool->kick_bundle)
+		    || (pool->kick_byte_count >= pool->kick_byte_bundle)) {
+			data_send_kick_message(data);
+		} else if (pool->kick_count == 1) {
+			init_timer(&data->kick_timer);
+			/* timeout_before_kick is in usec */
+			data->kick_timer.expires =
+			   msecs_to_jiffies(be32_to_cpu(data->
+				eioc_pool_parms.timeout_before_kick) * 1000)
+				+ jiffies;
+			data->kick_timer.data = (unsigned long)data;
+			data->kick_timer.function = data_kick_timeout_handler;
+			add_timer(&data->kick_timer);
+			data->kick_timer_on = 1;
+		}
+	}
+	return 0;
+}
+
+void data_check_xmit_buffers(struct data *data)
+{
+	struct xmit_pool *pool = &data->xmit_pool;
+	unsigned long flags;
+
+	DATA_FUNCTION("data_check_xmit_buffers()\n");
+	spin_lock_irqsave(&data->xmit_buf_lock, flags);
+	dma_sync_single_for_cpu(data->parent->config->ibdev->dma_device,
+				pool->buf_pool_dma, pool->buf_pool_len,
+				DMA_TO_DEVICE);
+
+	if (data->xmit_pool.need_buffers
+	    && pool->buf_pool[pool->next_xmit_pool].valid
+	    && pool->next_xmit_buf != pool->last_comp_buf) {
+		data->xmit_pool.need_buffers = 0;
+		vnic_restart_xmit(data->parent->vnic,
+				  data->parent->parent);
+		DATA_INFO("there are free xmit buffers\n");
+	}
+	dma_sync_single_for_device(data->parent->config->ibdev->dma_device,
+				   pool->buf_pool_dma, pool->buf_pool_len,
+				   DMA_TO_DEVICE);
+
+	spin_unlock_irqrestore(&data->xmit_buf_lock, flags);
+}
+
+static struct sk_buff *data_recv_to_skbuff(struct data *data,
+					   struct rdma_dest *rdma_dest)
+{
+	struct viport_trailer *trailer;
+	struct sk_buff *skb = NULL;
+	int start;
+	unsigned int len;
+	u8 rx_chksum_flags;
+
+	DATA_FUNCTION("data_recv_to_skbuff()\n");
+	trailer = rdma_dest->trailer;
+	start = data_offset(data, trailer);
+	len = data_len(data, trailer);
+
+	if (len <= min_rcv_skb)
+		skb = dev_alloc_skb(len + VLAN_HLEN + 2);
+			 /* leave room for VLAN header and alignment */
+	if (skb) {
+		skb_reserve(skb, VLAN_HLEN + 2);
+		memcpy(skb->data, rdma_dest->data + start, len);
+		skb_put(skb, len);
+	} else {
+		skb = rdma_dest->skb;
+		rdma_dest->skb = NULL;
+		rdma_dest->trailer = NULL;
+		rdma_dest->data = NULL;
+		skb_pull(skb, start);
+		skb_trim(skb, len);
+	}
+
+	rx_chksum_flags = trailer->rx_chksum_flags;
+	DATA_INFO("rx_chksum_flags = %d, LOOP = %c, IP = %c,"
+	     " TCP = %c, UDP = %c\n",
+	     rx_chksum_flags,
+	     (rx_chksum_flags & RX_CHKSUM_FLAGS_LOOPBACK) ? 'Y' : 'N',
+	     (rx_chksum_flags & RX_CHKSUM_FLAGS_IP_CHECKSUM_SUCCEEDED) ? 'Y'
+	     : (rx_chksum_flags & RX_CHKSUM_FLAGS_IP_CHECKSUM_FAILED) ? 'N' :
+	     '-',
+	     (rx_chksum_flags & RX_CHKSUM_FLAGS_TCP_CHECKSUM_SUCCEEDED) ? 'Y'
+	     : (rx_chksum_flags & RX_CHKSUM_FLAGS_TCP_CHECKSUM_FAILED) ? 'N' :
+	     '-',
+	     (rx_chksum_flags & RX_CHKSUM_FLAGS_UDP_CHECKSUM_SUCCEEDED) ? 'Y'
+	     : (rx_chksum_flags & RX_CHKSUM_FLAGS_UDP_CHECKSUM_FAILED) ? 'N' :
+	     '-');
+
+	if ((rx_chksum_flags & RX_CHKSUM_FLAGS_LOOPBACK)
+	    || ((rx_chksum_flags & RX_CHKSUM_FLAGS_IP_CHECKSUM_SUCCEEDED)
+		&& ((rx_chksum_flags & RX_CHKSUM_FLAGS_TCP_CHECKSUM_SUCCEEDED)
+		    || (rx_chksum_flags &
+			RX_CHKSUM_FLAGS_UDP_CHECKSUM_SUCCEEDED))))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	else
+		skb->ip_summed = CHECKSUM_NONE;
+
+	if (trailer->pkt_flags & PF_VLAN_INSERT) {
+		u8 *rv;
+
+		rv = skb_push(skb, 4);
+		memmove(rv, rv + 4, 12);
+		*(__be16 *) (rv + 12) = __constant_cpu_to_be16(ETH_P_8021Q);
+		if (trailer->pkt_flags & PF_PVID_OVERRIDDEN)
+			*(__be16 *) (rv + 14) = trailer->vlan &
+					__constant_cpu_to_be16(0xF000);
+		else
+			*(__be16 *) (rv + 14) = trailer->vlan;
+	}
+
+	return skb;
+}
+
+static int data_incoming_recv(struct data *data)
+{
+	struct recv_pool *pool = &data->recv_pool;
+	struct rdma_dest *rdma_dest;
+	struct viport_trailer *trailer;
+	struct buff_pool_entry *bpe;
+	struct sk_buff *skb;
+
+	DATA_FUNCTION("data_incoming_recv()\n");
+	if (pool->next_full_buf == pool->next_free_buf)
+		return -1;
+	bpe = &pool->buf_pool[pool->next_full_buf];
+	rdma_dest = &pool->recv_bufs[bpe->valid - 1];
+	trailer = rdma_dest->trailer;
+
+	if (!trailer
+	    || !(trailer->connection_hash_and_valid & CHV_VALID))
+		return -1;
+
+	/* received a packet */
+	if (trailer->pkt_flags & PF_KICK)
+		pool->kick_on_free = 1;
+
+	skb = data_recv_to_skbuff(data, rdma_dest);
+
+	if (skb) {
+		vnic_recv_packet(data->parent->vnic,
+				 data->parent->parent, skb);
+		list_add(&rdma_dest->list_ptrs, &pool->avail_recv_bufs);
+	}
+
+	dma_sync_single_for_cpu(data->parent->config->ibdev->dma_device,
+				pool->buf_pool_dma, pool->buf_pool_len,
+				DMA_TO_DEVICE);
+
+	bpe->valid = 0;
+	dma_sync_single_for_device(data->parent->config->ibdev->
+				   dma_device, pool->buf_pool_dma,
+				   pool->buf_pool_len, DMA_TO_DEVICE);
+
+	INC(pool->next_full_buf, 1, pool->eioc_pool_sz);
+	pool->num_posted_bufs--;
+	data_recvs_stats(data);
+	return 0;
+}
+
+static void data_received_kick(struct io *io)
+{
+	struct data *data = &io->viport->data;
+	unsigned long flags;
+
+	DATA_FUNCTION("data_received_kick()\n");
+	data_note_kickrcv_time();
+	spin_lock_irqsave(&data->recv_ios_lock, flags);
+	list_add(&io->list_ptrs, &data->recv_ios);
+	spin_unlock_irqrestore(&data->recv_ios_lock, flags);
+	data_post_recvs(data);
+	data_rcvkicks_stats(data);
+	data_check_xmit_buffers(data);
+
+	while (!data_incoming_recv(data));
+
+	if (data->connected) {
+		data_alloc_buffers(data, 0);
+		data_send_free_recv_buffers(data);
+	}
+}
+
+static void data_xmit_complete(struct io *io)
+{
+	struct rdma_io *rdma_io = (struct rdma_io *)io;
+	struct data *data = &io->viport->data;
+	struct xmit_pool *pool = &data->xmit_pool;
+	struct sk_buff *skb;
+
+	DATA_FUNCTION("data_xmit_complete()\n");
+
+	if (rdma_io->skb)
+		dma_unmap_single(data->parent->config->ibdev->dma_device,
+				 rdma_io->skb_data_dma, rdma_io->skb->len,
+				 DMA_TO_DEVICE);
+
+	while (pool->last_comp_buf != rdma_io->index) {
+		INC(pool->last_comp_buf, 1, pool->num_xmit_bufs);
+		skb = pool->xmit_bufs[pool->last_comp_buf].skb;
+		if (skb)
+			dev_kfree_skb_any(skb);
+		pool->xmit_bufs[pool->last_comp_buf].skb = NULL;
+	}
+
+	data_check_xmit_buffers(data);
+}
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_data.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_data.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_DATA_H_INCLUDED
+#define VNIC_DATA_H_INCLUDED
+
+#include <linux/if_vlan.h>
+
+#ifdef CONFIG_INFINIBAND_VNIC_STATS
+#include <linux/timex.h>
+#endif	/* CONFIG_INFINIBAND_VNIC_STATS */
+
+#include "vnic_ib.h"
+#include "vnic_control_pkt.h"
+#include "vnic_trailer.h"
+
+struct rdma_dest {
+	struct list_head	list_ptrs;
+	struct sk_buff		*skb;
+	u8			*data;
+	struct viport_trailer	*trailer;
+};
+
+struct buff_pool_entry {
+	__be64	remote_addr;
+	__be32	rkey;
+	u32	valid;
+};
+
+struct recv_pool {
+	u32			buffer_sz;
+	u32			pool_sz;
+	u32			eioc_pool_sz;
+	u32	 		eioc_rdma_rkey;
+	u64 			eioc_rdma_addr;
+	u32 			next_full_buf;
+	u32 			next_free_buf;
+	u32 			num_free_bufs;
+	u32 			num_posted_bufs;
+	u32 			sz_free_bundle;
+	int			kick_on_free;
+	struct buff_pool_entry	*buf_pool;
+	dma_addr_t		buf_pool_dma;
+	int			buf_pool_len;
+	struct rdma_dest	*recv_bufs;
+	struct list_head	avail_recv_bufs;
+};
+
+struct xmit_pool {
+	u32			buffer_sz;
+	u32 			pool_sz;
+	u32 			notify_count;
+	u32 			notify_bundle;
+	u32 			next_xmit_buf;
+	u32 			last_comp_buf;
+	u32 			num_xmit_bufs;
+	u32 			next_xmit_pool;
+	u32 			kick_count;
+	u32 			kick_byte_count;
+	u32 			kick_bundle;
+	u32 			kick_byte_bundle;
+	int			need_buffers;
+	int			send_kicks;
+	uint32_t 		rdma_rkey;
+	u64 			rdma_addr;
+	struct buff_pool_entry	*buf_pool;
+	dma_addr_t		buf_pool_dma;
+	int			buf_pool_len;
+	struct rdma_io		*xmit_bufs;
+	u8			*xmit_data;
+	dma_addr_t		xmitdata_dma;
+	int			xmitdata_len;
+};
+
+struct data {
+	struct viport			*parent;
+	struct data_config		*config;
+	struct ib_mr			*mr;
+	struct vnic_ib_conn		ib_conn;
+	u8				*local_storage;
+	struct vnic_recv_pool_config	host_pool_parms;
+	struct vnic_recv_pool_config	eioc_pool_parms;
+	struct recv_pool		recv_pool;
+	struct xmit_pool		xmit_pool;
+	u8				*region_data;
+	dma_addr_t			region_data_dma;
+	struct rdma_io			free_bufs_io;
+	struct send_io			kick_io;
+	struct list_head		recv_ios;
+	spinlock_t			recv_ios_lock;
+	spinlock_t			xmit_buf_lock;
+	int				kick_timer_on;
+	int				connected;
+	struct timer_list		kick_timer;
+	struct completion		done;
+#ifdef CONFIG_INFINIBAND_VNIC_STATS
+	struct {
+		u32		xmit_num;
+		u32		recv_num;
+		u32		free_buf_sends;
+		u32		free_buf_num;
+		u32		free_buf_min;
+		u32		kick_recvs;
+		u32		kick_reqs;
+		u32		no_xmit_bufs;
+		cycles_t	no_xmit_buf_time;
+	} statistics;
+#endif	/* CONFIG_INFINIBAND_VNIC_STATS */
+};
+
+int data_init(struct data *data, struct viport *viport,
+	      struct data_config *config, struct ib_pd *pd);
+
+int  data_connect(struct data *data);
+void data_connected(struct data *data);
+void data_disconnect(struct data *data);
+
+int data_xmit_packet(struct data *data, struct sk_buff *skb);
+
+void data_cleanup(struct data *data);
+
+#define data_is_connected(data)		\
+	(vnic_ib_conn_connected(&((data)->ib_conn)))
+#define data_path_id(data)		(data)->config->path_id
+#define data_eioc_pool(data)		&(data)->eioc_pool_parms
+#define data_host_pool(data)		&(data)->host_pool_parms
+#define data_eioc_pool_min(data)	&(data)->config->eioc_min
+#define data_host_pool_min(data)	&(data)->config->host_min
+#define data_eioc_pool_max(data)	&(data)->config->eioc_max
+#define data_host_pool_max(data)	&(data)->config->host_max
+#define data_local_pool_addr(data)	(data)->xmit_pool.rdma_addr
+#define data_local_pool_rkey(data)	(data)->xmit_pool.rdma_rkey
+#define data_remote_pool_addr(data)	&(data)->recv_pool.eioc_rdma_addr
+#define data_remote_pool_rkey(data)	&(data)->recv_pool.eioc_rdma_rkey
+
+#define data_max_mtu(data)				\
+	MAX_PAYLOAD(min((data)->recv_pool.buffer_sz,	\
+	(data)->xmit_pool.buffer_sz)) - VLAN_ETH_HLEN
+
+#define data_len(data, trailer)		be16_to_cpu(trailer->data_length)
+#define data_offset(data, trailer)					\
+	data->recv_pool.buffer_sz - sizeof(struct viport_trailer)	\
+	- ALIGN(data_len(data, trailer), VIPORT_TRAILER_ALIGNMENT)	\
+	+ trailer->data_alignment_offset
+
+/* the following macros manipulate ring buffer indexes.
+ * the ring buffer size must be a power of 2.
+ */
+#define ADD(index, increment, size)	(((index) + (increment))&((size) - 1))
+#define NEXT(index, size)		ADD(index, 1, size)
+#define INC(index, increment, size)	(index) = ADD(index, increment, size)
+
+#endif	/* VNIC_DATA_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_ib.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_ib.c
@@ -0,0 +1,693 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/string.h>
+#include <linux/random.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <rdma/ib_cache.h>
+
+#include "vnic_util.h"
+#include "vnic_config.h"
+#include "vnic_ib.h"
+#include "vnic_viport.h"
+#include "vnic_sys.h"
+#include "vnic_main.h"
+#include "vnic_stats.h"
+
+static int vnic_ib_inited = 0;
+
+static void vnic_add_one(struct ib_device *device);
+static void vnic_remove_one(struct ib_device *device);
+
+static struct ib_client vnic_client = {
+	.name = "vnic",
+	.add = vnic_add_one,
+	.remove = vnic_remove_one
+};
+
+static struct ib_sa_client vnic_sa_client;
+
+static CLASS_DEVICE_ATTR(create_primary, S_IWUSR, NULL,
+			 vnic_create_primary);
+static CLASS_DEVICE_ATTR(create_secondary, S_IWUSR, NULL,
+			 vnic_create_secondary);
+
+static CLASS_DEVICE_ATTR(delete_vnic, S_IWUSR, NULL, vnic_delete);
+
+static struct vnic_ib_port *vnic_add_port(struct vnic_ib_device *device,
+					  u8 port_num)
+{
+	struct vnic_ib_port *port;
+
+	port = kzalloc(sizeof *port, GFP_KERNEL);
+	if (!port)
+		return NULL;
+
+	init_completion(&port->cdev_info.released);
+	port->dev = device;
+	port->port_num = port_num;
+
+	port->cdev_info.class_dev.class = &vnic_class;
+	port->cdev_info.class_dev.dev = device->dev->dma_device;
+	snprintf(port->cdev_info.class_dev.class_id, BUS_ID_SIZE,
+		 "vnic-%s-%d", device->dev->name, port_num);
+
+	if (class_device_register(&port->cdev_info.class_dev))
+		goto free_port;
+
+	if (class_device_create_file(&port->cdev_info.class_dev,
+				     &class_device_attr_create_primary))
+		goto err_class;
+	if (class_device_create_file(&port->cdev_info.class_dev,
+				     &class_device_attr_create_secondary))
+		goto err_class;
+
+	return port;
+err_class:
+	class_device_unregister(&port->cdev_info.class_dev);
+free_port:
+	kfree(port);
+
+	return NULL;
+}
+
+static void vnic_add_one(struct ib_device *device)
+{
+	struct vnic_ib_device *vnic_dev;
+	struct vnic_ib_port *port;
+	int s, e, p;
+
+	vnic_dev = kmalloc(sizeof *vnic_dev, GFP_KERNEL);
+	if (!vnic_dev)
+		return;
+
+	vnic_dev->dev = device;
+	INIT_LIST_HEAD(&vnic_dev->port_list);
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
+		s = 0;
+		e = 0;
+
+	} else {
+		s = 1;
+		e = device->phys_port_cnt;
+
+	}
+
+	for (p = s; p <= e; p++) {
+		port = vnic_add_port(vnic_dev, p);
+		if (port)
+			list_add_tail(&port->list, &vnic_dev->port_list);
+	}
+
+	ib_set_client_data(device, &vnic_client, vnic_dev);
+
+}
+
+static void vnic_remove_one(struct ib_device *device)
+{
+	struct vnic_ib_device *vnic_dev;
+	struct vnic_ib_port *port, *tmp_port;
+
+	vnic_dev = ib_get_client_data(device, &vnic_client);
+	list_for_each_entry_safe(port, tmp_port,
+				 &vnic_dev->port_list, list) {
+		class_device_unregister(&port->cdev_info.class_dev);
+		/*
+		 * wait for sysfs entries to go away, so that no new vnics
+		 * are created
+		 */
+		wait_for_completion(&port->cdev_info.released);
+		kfree(port);
+
+	}
+	kfree(vnic_dev);
+}
+
+int vnic_ib_init(void)
+{
+	int ret = -1;
+
+	IB_FUNCTION("vnic_ib_init()\n");
+
+	/* class has to be registered before
+	 * calling ib_register_client() because, that call
+	 * will trigger vnic_add_port() which will register
+	 * class_device for the port with the parent class
+	 * as vnic_class
+	 */
+	ret = class_register(&vnic_class);
+	if (ret) {
+		printk(KERN_ERR PFX "couldn't register class"
+		       " infiniband_vnic; error %d", ret);
+		goto out;
+	}
+
+	ib_sa_register_client(&vnic_sa_client);
+	ret = ib_register_client(&vnic_client);
+	if (ret) {
+		printk(KERN_ERR PFX "couldn't register IB client;"
+		       " error %d", ret);
+		goto err_ib_reg;
+	}
+
+	interface_cdev.class_dev.class = &vnic_class;
+	snprintf(interface_cdev.class_dev.class_id,
+		 BUS_ID_SIZE, "interfaces");
+	init_completion(&interface_cdev.released);
+	ret = class_device_register(&interface_cdev.class_dev);
+	if (ret) {
+		printk(KERN_ERR PFX "couldn't register class interfaces;"
+		       " error %d", ret);
+		goto err_class_dev;
+	}
+	ret = class_device_create_file(&interface_cdev.class_dev,
+				       &class_device_attr_delete_vnic);
+	if (ret) {
+		printk(KERN_ERR PFX "couldn't create class file"
+		       " 'delete_vnic'; error %d", ret);
+		goto err_class_file;
+	}
+
+	vnic_ib_inited = 1;
+
+	return ret;
+err_class_file:
+	class_device_unregister(&interface_cdev.class_dev);
+err_class_dev:
+	ib_unregister_client(&vnic_client);
+err_ib_reg:
+	ib_sa_unregister_client(&vnic_sa_client);
+	class_unregister(&vnic_class);
+out:
+	return ret;
+}
+
+void vnic_ib_cleanup(void)
+{
+	IB_FUNCTION("vnic_ib_cleanup()\n");
+
+	if (!vnic_ib_inited)
+		return;
+
+	class_device_unregister(&interface_cdev.class_dev);
+	wait_for_completion(&interface_cdev.released);
+
+	ib_unregister_client(&vnic_client);
+	ib_sa_unregister_client(&vnic_sa_client);
+	class_unregister(&vnic_class);
+}
+
+static void vnic_path_rec_completion(int status,
+				     struct ib_sa_path_rec *pathrec,
+				     void *context)
+{
+	struct vnic_ib_path_info *p = context;
+	p->status = status;
+	if (!status)
+		p->path = *pathrec;
+
+	complete(&p->done);
+}
+
+int vnic_ib_get_path(struct netpath *netpath, struct vnic * vnic)
+{
+	struct viport_config *config = netpath->viport->config;
+	int ret = 0;
+
+	init_completion(&config->path_info.done);
+	IB_INFO("Using SA path rec get time out value of %d\n",
+	       config->sa_path_rec_get_timeout);
+	config->path_info.path_query_id =
+			 ib_sa_path_rec_get(&vnic_sa_client,
+					    config->ibdev,
+					    config->port,
+					    &config->path_info.path,
+					    IB_SA_PATH_REC_DGID      |
+					    IB_SA_PATH_REC_SGID      |
+					    IB_SA_PATH_REC_NUMB_PATH |
+					    IB_SA_PATH_REC_PKEY,
+					    config->sa_path_rec_get_timeout,
+					    GFP_KERNEL,
+					    vnic_path_rec_completion,
+					    &config->path_info,
+					    &config->path_info.path_query);
+
+	if (config->path_info.path_query_id < 0) {
+		IB_ERROR("SA path record query failed; error %d\n",
+			 config->path_info.path_query_id);
+		ret= config->path_info.path_query_id;
+		goto out;
+	}
+
+	wait_for_completion(&config->path_info.done);
+
+	if (config->path_info.status < 0) {
+		printk(KERN_WARNING PFX "path record query failed for dgid "
+		       "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+		       (int)be16_to_cpu(*(__be16 *) &config->path_info.path.
+					dgid.raw[0]),
+		       (int)be16_to_cpu(*(__be16 *) &config->path_info.path.
+					dgid.raw[2]),
+		       (int)be16_to_cpu(*(__be16 *) &config->path_info.path.
+					dgid.raw[4]),
+		       (int)be16_to_cpu(*(__be16 *) &config->path_info.path.
+					dgid.raw[6]),
+		       (int)be16_to_cpu(*(__be16 *) &config->path_info.path.
+					dgid.raw[8]),
+		       (int)be16_to_cpu(*(__be16 *) &config->path_info.path.
+					dgid.raw[10]),
+		       (int)be16_to_cpu(*(__be16 *) &config->path_info.path.
+					dgid.raw[12]),
+		       (int)be16_to_cpu(*(__be16 *) &config->path_info.path.
+					dgid.raw[14]));
+
+		if (config->path_info.status == -ETIMEDOUT)
+			printk(KERN_WARNING PFX
+			       "reason: path record query timed out\n");
+		else if (config->path_info.status == -EIO)
+			printk(KERN_WARNING PFX
+			       "reason: error in sending path record query\n");
+		else
+			printk(KERN_WARNING PFX "reason: error %d in sending"
+			       " path record query\n",
+			       config->path_info.status);
+
+		ret = config->path_info.status;
+	}
+out:
+	if (ret)
+		netpath_timer(netpath, vnic->config->no_path_timeout);
+
+	return ret;
+}
+
+static void ib_qp_event(struct ib_event *event, void *context)
+{
+	IB_ERROR("QP event %d\n", event->event);
+}
+
+static void vnic_ib_completion(struct ib_cq *cq, void *ptr)
+{
+	struct ib_wc wc;
+	struct io *io;
+	struct vnic_ib_conn *ib_conn = ptr;
+	cycles_t           comp_time;
+	u32              comp_num = 0;
+
+	vnic_ib_note_comptime_stats(&comp_time);
+	vnic_ib_callback_stats(ib_conn);
+
+	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+	while (ib_poll_cq(cq, 1, &wc) > 0) {
+		io = (struct io *)(wc.wr_id);
+		vnic_ib_comp_stats(ib_conn, &comp_num);
+		if (wc.status) {
+#if 0
+			IB_ERROR("completion error  wc.status %d"
+				 " wc.opcode %d vendor err 0x%x\n",
+				 wc.status, wc.opcode, wc.vendor_err);
+#endif
+		} else if (io) {
+			vnic_ib_io_stats(io, ib_conn, comp_time);
+			if (io->routine)
+				(*io->routine) (io);
+		}
+	}
+	vnic_ib_maxio_stats(ib_conn, comp_num);
+}
+
+static int vnic_ib_mod_qp_to_rts(struct ib_cm_id * cm_id,
+			     struct vnic_ib_conn * ib_conn)
+{
+	int attr_mask = 0;
+	int ret;
+	struct ib_qp_attr *qp_attr = NULL;
+
+	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
+	if (!qp_attr)
+		return -ENOMEM;
+
+	qp_attr->qp_state = IB_QPS_RTR;
+
+	if ((ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask)))
+		goto out;
+
+	if((ret = ib_modify_qp(ib_conn->qp, qp_attr, attr_mask)))
+		goto out;
+
+	IB_INFO("QP RTR\n");
+
+	qp_attr->qp_state = IB_QPS_RTS;
+
+	if((ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask)))
+		goto out;
+
+	if((ret=ib_modify_qp(ib_conn->qp, qp_attr, attr_mask)))
+		goto out;
+
+	IB_INFO("QP RTS\n");
+
+	if((ret = ib_send_cm_rtu(cm_id, NULL, 0)))
+		goto out;
+out:
+	kfree(qp_attr);
+	return ret;
+}
+
+int vnic_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+	struct vnic_ib_conn *ib_conn = cm_id->context;
+	struct viport *viport = ib_conn->viport;
+	int err = 0;
+	int disconn = 0;
+
+	switch (event->event) {
+	case IB_CM_REQ_ERROR:
+		IB_ERROR("sending CM REQ failed\n");
+		err = 1;
+		disconn = 1;
+		break;
+	case IB_CM_REP_RECEIVED:
+		IB_INFO("CM REP recvd\n");
+		if (vnic_ib_mod_qp_to_rts(cm_id, ib_conn))
+			err = 1;
+		else {
+			ib_conn->state = IB_CONN_CONNECTED;
+			vnic_ib_connected_time_stats(ib_conn);
+			IB_INFO("RTU SENT\n");
+		}
+		break;
+	case IB_CM_REJ_RECEIVED:
+		printk(KERN_ERR PFX "CM rejected control connection \n");
+		if (event->param.rej_rcvd.reason ==
+		    IB_CM_REJ_INVALID_SERVICE_ID)
+			printk(KERN_ERR "reason: invalid service ID. "
+			       "IOCGUID value specified may be incorrect\n");
+		else
+			printk(KERN_ERR "reason code : 0x%x\n",
+			       event->param.rej_rcvd.reason);
+
+		err = 1;
+		disconn = 1;
+		break;
+	case IB_CM_MRA_RECEIVED:
+		IB_INFO("CM MRA received\n");
+		break;
+
+	case IB_CM_DREP_RECEIVED:
+		IB_INFO("CM DREP recvd\n");
+		ib_conn->state = IB_CONN_DISCONNECTED;
+		break;
+
+	case IB_CM_TIMEWAIT_EXIT:
+		IB_ERROR("CM timewait exit\n");
+		err = 1;
+		break;
+
+	default:
+		IB_INFO("unhandled CM event %d\n", event->event);
+		break;
+
+	}
+
+	if (disconn)
+		viport->disconnect = 1;
+
+	if (err) {
+		ib_conn->state = IB_CONN_DISCONNECTED;
+		viport_failure(viport);
+	}
+
+	viport_kick(viport);
+	return 0;
+}
+
+
+int vnic_ib_cm_connect(struct vnic_ib_conn *ib_conn)
+{
+	struct ib_cm_req_param	*req = NULL;
+	struct viport		*viport;
+	int 			ret = -1;
+
+	if (!vnic_ib_conn_initted(ib_conn)) {
+		IB_ERROR("IB Connection out of state for CM connect (%d)\n",
+			 ib_conn->state);
+		return -EINVAL;
+	}
+
+	vnic_ib_conntime_stats(ib_conn);
+	req = kzalloc(sizeof *req, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	viport	= ib_conn->viport;
+
+	req->primary_path	= &viport->config->path_info.path;
+	req->alternate_path	= NULL;
+	req->qp_num		= ib_conn->qp->qp_num;
+	req->qp_type		= ib_conn->qp->qp_type;
+	req->service_id 	= ib_conn->ib_config->service_id;
+	req->private_data	= &ib_conn->ib_config->conn_data;
+	req->private_data_len	= sizeof(struct vnic_connection_data);
+	req->flow_control	= 1;
+
+	get_random_bytes(&req->starting_psn, 4);
+	req->starting_psn &= 0xffffff;
+
+	/*
+	 * Both responder_resources and initiator_depth are set to zero
+	 * as we do not need RDMA read.
+	 *
+	 * They also must be set to zero, otherwise data connections
+	 * are rejected by VEx.
+	 */
+	req->responder_resources 	= 0;
+	req->initiator_depth		= 0;
+	req->remote_cm_response_timeout = 20;
+	req->local_cm_response_timeout  = 20;
+	req->retry_count		= ib_conn->ib_config->retry_count;
+	req->rnr_retry_count		= ib_conn->ib_config->rnr_retry_count;
+	req->max_cm_retries		= 15;
+
+	ib_conn->state = IB_CONN_CONNECTING;
+
+	ret = ib_send_cm_req(ib_conn->cm_id, req);
+
+	kfree(req);
+
+	if (ret) {
+		IB_ERROR("CM REQ sending failed; error %d \n", ret);
+		ib_conn->state = IB_CONN_DISCONNECTED;
+	}
+
+	return ret;
+}
+
+static int vnic_ib_init_qp(struct vnic_ib_conn * ib_conn,
+			   struct vnic_ib_config *config,
+			   struct ib_pd	*pd,
+			   struct viport_config * viport_config)
+{
+	struct ib_qp_init_attr	*init_attr;
+	struct ib_qp_attr	*attr;
+	int			ret;
+
+	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
+	if (!init_attr)
+		return -ENOMEM;
+
+	init_attr->event_handler	= ib_qp_event;
+	init_attr->cap.max_send_wr	= config->num_sends;
+	init_attr->cap.max_recv_wr	= config->num_recvs;
+	init_attr->cap.max_recv_sge	= config->recv_scatter;
+	init_attr->cap.max_send_sge	= config->send_gather;
+	init_attr->sq_sig_type		= IB_SIGNAL_ALL_WR;
+	init_attr->qp_type		= IB_QPT_RC;
+	init_attr->send_cq		= ib_conn->cq;
+	init_attr->recv_cq		= ib_conn->cq;
+
+	ib_conn->qp = ib_create_qp(pd, init_attr);
+
+	if (IS_ERR(ib_conn->qp)) {
+		ret = -1;
+		IB_ERROR("could not create QP\n");
+		goto free_init_attr;
+	}
+
+	attr = kmalloc(sizeof *attr, GFP_KERNEL);
+	if (!attr) {
+		ret = -ENOMEM;
+		goto destroy_qp;
+	}
+
+	ret = ib_find_cached_pkey(viport_config->ibdev,
+				  viport_config->port,
+				  be16_to_cpu(viport_config->path_info.path.
+					      pkey),
+				  &attr->pkey_index);
+	if (ret) {
+		printk(KERN_WARNING PFX "ib_find_cached_pkey() failed; "
+		       "error %d\n", ret);
+		goto freeattr;
+	}
+
+	attr->qp_state		= IB_QPS_INIT;
+	attr->qp_access_flags	= IB_ACCESS_REMOTE_WRITE;
+	attr->port_num		= viport_config->port;
+
+	ret = ib_modify_qp(ib_conn->qp, attr,
+			   IB_QP_STATE |
+			   IB_QP_PKEY_INDEX |
+			   IB_QP_ACCESS_FLAGS | IB_QP_PORT);
+	if (ret) {
+		printk(KERN_WARNING PFX "could not modify QP; error %d \n",
+		       ret);
+		goto freeattr;
+	}
+
+	kfree(attr);
+	kfree(init_attr);
+	return ret;
+
+freeattr:
+	kfree(attr);
+destroy_qp:
+	ib_destroy_qp(ib_conn->qp);
+free_init_attr:
+	kfree(init_attr);
+	return ret;
+}
+
+int vnic_ib_conn_init(struct vnic_ib_conn *ib_conn, struct viport *viport,
+		      struct ib_pd *pd, struct vnic_ib_config *config)
+{
+	struct viport_config	*viport_config = viport->config;
+	int		ret = -1;
+	unsigned int	cq_size = config->num_sends + config->num_recvs;
+
+
+	if (!vnic_ib_conn_uninitted(ib_conn)) {
+		IB_ERROR("IB Connection out of state for init (%d)\n",
+			 ib_conn->state);
+		return -EINVAL;
+	}
+
+	ib_conn->cq = ib_create_cq(viport_config->ibdev, vnic_ib_completion,
+				   NULL, ib_conn, cq_size);
+	if (IS_ERR(ib_conn->cq)) {
+		IB_ERROR("could not create CQ\n");
+		goto out;
+	}
+
+	ib_req_notify_cq(ib_conn->cq, IB_CQ_NEXT_COMP);
+
+	ret = vnic_ib_init_qp(ib_conn, config, pd, viport_config);
+
+	if(ret)
+		goto destroy_cq;
+
+	ib_conn->conn_lock  = SPIN_LOCK_UNLOCKED;
+	ib_conn->state = IB_CONN_INITTED;
+
+	return ret;
+
+destroy_cq:
+	ib_destroy_cq(ib_conn->cq);
+out:
+	return ret;
+}
+
+int vnic_ib_post_recv(struct vnic_ib_conn * ib_conn, struct io * io)
+{
+	cycles_t		post_time;
+	struct ib_recv_wr	*bad_wr;
+	int			ret = -1;
+	unsigned long		flags;
+
+	IB_FUNCTION("vnic_ib_post_recv()\n");
+
+	spin_lock_irqsave(&ib_conn->conn_lock, flags);
+
+	if (!vnic_ib_conn_initted(ib_conn) &&
+	    !vnic_ib_conn_connected(ib_conn))
+		return -EINVAL;
+
+	vnic_ib_pre_rcvpost_stats(ib_conn, io, &post_time);
+	io->type = RECV;
+	ret = ib_post_recv(ib_conn->qp, &io->rwr, &bad_wr);
+	if (ret) {
+		IB_ERROR("error in posting rcv wr; error %d\n", ret);
+		goto out;
+	}
+
+	vnic_ib_post_rcvpost_stats(ib_conn, post_time);
+out:
+	spin_unlock_irqrestore(&ib_conn->conn_lock, flags);
+	return ret;
+
+}
+
+int vnic_ib_post_send(struct vnic_ib_conn * ib_conn, struct io * io)
+{
+	cycles_t		post_time;
+	unsigned long		flags;
+	struct ib_send_wr	*bad_wr;
+	int			ret = -1;
+
+	IB_FUNCTION("vnic_ib_post_send()\n");
+
+	spin_lock_irqsave(&ib_conn->conn_lock, flags);
+	if (!vnic_ib_conn_connected(ib_conn)) {
+		IB_ERROR("IB Connection out of state for"
+			 " posting sends (%d)\n", ib_conn->state);
+		goto out;
+	}
+
+	vnic_ib_pre_sendpost_stats(io, &post_time);
+	if (io->swr.opcode == IB_WR_RDMA_WRITE)
+		io->type = RDMA;
+	else
+		io->type = SEND;
+
+	ret = ib_post_send(ib_conn->qp, &io->swr, &bad_wr);
+	if (ret) {
+		IB_ERROR("error in posting send wr; error %d\n", ret);
+		goto out;
+	}
+
+	vnic_ib_post_sendpost_stats(ib_conn, io, post_time);
+out:
+	spin_unlock_irqrestore(&ib_conn->conn_lock, flags);
+	return ret;
+}
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_ib.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_ib.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_IB_H_INCLUDED
+#define VNIC_IB_H_INCLUDED
+
+#include <linux/timex.h>
+#include <linux/completion.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_cm.h>
+
+#include "vnic_sys.h"
+#include "vnic_netpath.h"
+#define PFX	"ib_vnic: "
+
+struct io;
+typedef void (comp_routine_t) (struct io * io);
+
+enum vnic_ib_conn_state {
+	IB_CONN_UNINITTED	= 0,
+	IB_CONN_INITTED		= 1,
+	IB_CONN_CONNECTING	= 2,
+	IB_CONN_CONNECTED	= 3,
+	IB_CONN_DISCONNECTED	= 4
+};
+
+struct vnic_ib_conn {
+	struct viport		*viport;
+	struct vnic_ib_config	*ib_config;
+	spinlock_t		conn_lock;
+	enum vnic_ib_conn_state	state;
+	struct ib_qp		*qp;
+	struct ib_cq		*cq;
+	struct ib_cm_id		*cm_id;
+#ifdef CONFIG_INFINIBAND_VNIC_STATS
+	struct {
+		cycles_t	connection_time;
+		cycles_t	rdma_post_time;
+		u32		rdma_post_ios;
+		cycles_t	rdma_comp_time;
+		u32		rdma_comp_ios;
+		cycles_t	send_post_time;
+		u32		send_post_ios;
+		cycles_t	send_comp_time;
+		u32		send_comp_ios;
+		cycles_t	recv_post_time;
+		u32		recv_post_ios;
+		cycles_t	recv_comp_time;
+		u32		recv_comp_ios;
+		u32		num_ios;
+		u32		num_callbacks;
+		u32		max_ios;
+	} statistics;
+#endif	/* CONFIG_INFINIBAND_VNIC_STATS */
+};
+
+struct vnic_ib_path_info {
+	struct ib_sa_path_rec	path;
+	struct ib_sa_query	*path_query;
+	int			path_query_id;
+	int			status;
+	struct			completion done;
+};
+
+struct vnic_ib_device {
+	struct ib_device	*dev;
+	struct list_head	port_list;
+};
+
+struct vnic_ib_port {
+	struct vnic_ib_device	*dev;
+	u8			port_num;
+	struct class_dev_info	cdev_info;
+	struct list_head	list;
+};
+
+struct io {
+	struct list_head	list_ptrs;
+	struct viport		*viport;
+	comp_routine_t		*routine;
+	struct ib_recv_wr	rwr;
+	struct ib_send_wr	swr;
+#ifdef CONFIG_INFINIBAND_VNIC_STATS
+	cycles_t		time;
+#endif	/* CONFIG_INFINIBAND_VNIC_STATS */
+	enum {RECV, RDMA, SEND}	type;
+};
+
+struct rdma_io {
+	struct io		io;
+	struct ib_sge		list[2];
+	u16			index;
+	u16			len;
+	u8			*data;
+	dma_addr_t		data_dma;
+	struct sk_buff		*skb;
+	dma_addr_t		skb_data_dma;
+	struct viport_trailer 	*trailer;
+	dma_addr_t 		trailer_dma;
+};
+
+struct send_io {
+	struct io	io;
+	struct ib_sge	list;
+	u8		*virtual_addr;
+};
+
+struct recv_io {
+	struct io	io;
+	struct ib_sge	list;
+	u8		*virtual_addr;
+};
+
+int	vnic_ib_init(void);
+void	vnic_ib_cleanup(void);
+
+struct vnic;
+int vnic_ib_get_path(struct netpath *netpath, struct vnic * vnic);
+int vnic_ib_conn_init(struct vnic_ib_conn *ib_conn, struct viport *viport,
+		      struct ib_pd *pd, struct vnic_ib_config *config);
+
+int vnic_ib_post_recv(struct vnic_ib_conn *ib_conn, struct io *io);
+int vnic_ib_post_send(struct vnic_ib_conn *ib_conn, struct io *io);
+int vnic_ib_cm_connect(struct vnic_ib_conn *ib_conn);
+int vnic_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+
+#define	vnic_ib_conn_uninitted(ib_conn)			\
+	((ib_conn)->state == IB_CONN_UNINITTED)
+#define	vnic_ib_conn_initted(ib_conn)			\
+	((ib_conn)->state == IB_CONN_INITTED)
+#define	vnic_ib_conn_connecting(ib_conn)		\
+	((ib_conn)->state == IB_CONN_CONNECTING)
+#define	vnic_ib_conn_connected(ib_conn)			\
+	((ib_conn)->state == IB_CONN_CONNECTED)
+#define	vnic_ib_conn_disconnected(ib_conn)		\
+	((ib_conn)->state == IB_CONN_DISCONNECTED)
+
+#endif	/* VNIC_IB_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_main.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_main.c
@@ -0,0 +1,1030 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/completion.h>
+
+#include <rdma/ib_cache.h>
+
+#include "vnic_util.h"
+#include "vnic_main.h"
+#include "vnic_netpath.h"
+#include "vnic_viport.h"
+#include "vnic_ib.h"
+#include "vnic_stats.h"
+
+#define MODULEVERSION "0.1"
+#define MODULEDETAILS "Virtual NIC driver version " MODULEVERSION
+
+MODULE_AUTHOR("Ramachandra K");
+MODULE_DESCRIPTION(MODULEDETAILS);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_SUPPORTED_DEVICE("QLogic Ethernet Virtual I/O Controller");
+
+u32 vnic_debug = 0;
+
+module_param(vnic_debug, uint, 0444);
+MODULE_PARM_DESC(vnic_debug, "Enable debug tracing if > 0");
+
+LIST_HEAD(vnic_list);
+
+const char driver[] = "vnic";
+
+DECLARE_WAIT_QUEUE_HEAD(vnic_npevent_queue);
+LIST_HEAD(vnic_npevent_list);
+DECLARE_COMPLETION(vnic_npevent_thread_exit);
+spinlock_t vnic_npevent_list_lock = SPIN_LOCK_UNLOCKED;
+int vnic_npevent_thread = -1;
+int vnic_npevent_thread_end = 0;
+
+
+void vnic_connected(struct vnic *vnic, struct netpath *netpath)
+{
+	VNIC_FUNCTION("vnic_connected()\n");
+	if (netpath->second_bias)
+		vnic_npevent_queue_evt(netpath, VNIC_SECNP_CONNECTED);
+	else
+		vnic_npevent_queue_evt(netpath, VNIC_PRINP_CONNECTED);
+
+	vnic_connected_stats(vnic);
+}
+
+void vnic_disconnected(struct vnic *vnic, struct netpath *netpath)
+{
+	VNIC_FUNCTION("vnic_disconnected()\n");
+	if (netpath->second_bias)
+		vnic_npevent_queue_evt(netpath, VNIC_SECNP_DISCONNECTED);
+	else
+		vnic_npevent_queue_evt(netpath, VNIC_PRINP_DISCONNECTED);
+}
+
+void vnic_link_up(struct vnic *vnic, struct netpath *netpath)
+{
+	VNIC_FUNCTION("vnic_link_up()\n");
+	if (netpath->second_bias)
+		vnic_npevent_queue_evt(netpath, VNIC_SECNP_LINKUP);
+	else
+		vnic_npevent_queue_evt(netpath, VNIC_PRINP_LINKUP);
+}
+
+void vnic_link_down(struct vnic *vnic, struct netpath *netpath)
+{
+	VNIC_FUNCTION("vnic_link_down()\n");
+	if (netpath->second_bias)
+		vnic_npevent_queue_evt(netpath, VNIC_SECNP_LINKDOWN);
+	else
+		vnic_npevent_queue_evt(netpath, VNIC_PRINP_LINKDOWN);
+}
+
+void vnic_stop_xmit(struct vnic *vnic, struct netpath *netpath)
+{
+	VNIC_FUNCTION("vnic_stop_xmit()\n");
+	if (netpath == vnic->current_path) {
+		if (vnic->xmit_started) {
+			netif_stop_queue(&vnic->netdevice);
+			vnic->xmit_started = 0;
+		}
+
+		vnic_stop_xmit_stats(vnic);
+	}
+}
+
+void vnic_restart_xmit(struct vnic *vnic, struct netpath *netpath)
+{
+	VNIC_FUNCTION("vnic_restart_xmit()\n");
+	if (netpath == vnic->current_path) {
+		if (!vnic->xmit_started) {
+			netif_wake_queue(&vnic->netdevice);
+			vnic->xmit_started = 1;
+		}
+
+		vnic_restart_xmit_stats(vnic);
+	}
+}
+
+void vnic_recv_packet(struct vnic *vnic, struct netpath *netpath,
+		      struct sk_buff *skb)
+{
+	VNIC_FUNCTION("vnic_recv_packet()\n");
+	if ((netpath != vnic->current_path) || !vnic->open) {
+		VNIC_INFO("tossing packet\n");
+		dev_kfree_skb(skb);
+		return;
+	}
+
+	vnic->netdevice.last_rx = jiffies;
+	skb->dev = &vnic->netdevice;
+	skb->protocol = eth_type_trans(skb, skb->dev);
+	if (!vnic->config->use_rx_csum)
+		skb->ip_summed = CHECKSUM_NONE;
+	netif_rx(skb);
+	vnic_recv_pkt_stats(vnic);
+}
+
+static struct net_device_stats *vnic_get_stats(struct net_device *device)
+{
+	struct vnic *vnic;
+	struct netpath *np;
+
+	VNIC_FUNCTION("vnic_get_stats()\n");
+	vnic = (struct vnic *)device->priv;
+
+	np = vnic->current_path;
+	if (np && np->viport)
+		viport_get_stats(np->viport, &vnic->stats);
+	return &vnic->stats;
+}
+
+static int vnic_open(struct net_device *device)
+{
+	struct vnic *vnic;
+
+	VNIC_FUNCTION("vnic_open()\n");
+	vnic = (struct vnic *)device->priv;
+
+	vnic->open++;
+	vnic_npevent_queue_evt(&vnic->primary_path, VNIC_NP_SETLINK);
+	vnic->xmit_started = 1;
+	netif_start_queue(&vnic->netdevice);
+
+	return 0;
+}
+
+static int vnic_stop(struct net_device *device)
+{
+	struct vnic *vnic;
+	int ret = 0;
+
+	VNIC_FUNCTION("vnic_stop()\n");
+	vnic = (struct vnic *)device->priv;
+	netif_stop_queue(device);
+	vnic->xmit_started = 0;
+	vnic->open--;
+	vnic_npevent_queue_evt(&vnic->primary_path, VNIC_NP_SETLINK);
+
+	return ret;
+}
+
+static int vnic_hard_start_xmit(struct sk_buff *skb,
+				struct net_device *device)
+{
+	struct vnic *vnic;
+	struct netpath *np;
+	cycles_t xmit_time;
+	int	 ret = -1;
+
+	VNIC_FUNCTION("vnic_hard_start_xmit()\n");
+	vnic = (struct vnic *)device->priv;
+	np = vnic->current_path;
+
+	vnic_pre_pkt_xmit_stats(&xmit_time);
+
+	if (np && np->viport)
+		ret = viport_xmit_packet(np->viport, skb);
+
+	if (ret) {
+		vnic_xmit_fail_stats(vnic);
+		dev_kfree_skb_any(skb);
+		vnic->stats.tx_dropped++;
+		goto out;
+	}
+
+	device->trans_start = jiffies;
+	vnic_post_pkt_xmit_stats(vnic, xmit_time);
+out:
+	return 0;
+}
+
+static void vnic_tx_timeout(struct net_device *device)
+{
+	struct vnic *vnic;
+
+	VNIC_FUNCTION("vnic_tx_timeout()\n");
+	vnic = (struct vnic *)device->priv;
+	device->trans_start = jiffies;
+
+	if (vnic->current_path->viport)
+		viport_failure(vnic->current_path->viport);
+
+	VNIC_ERROR("vnic_tx_timeout\n");
+}
+
+static void vnic_set_multicast_list(struct net_device *device)
+{
+	struct vnic *vnic;
+	unsigned long flags;
+
+	VNIC_FUNCTION("vnic_set_multicast_list()\n");
+	vnic = (struct vnic *)device->priv;
+
+	spin_lock_irqsave(&vnic->lock, flags);
+	/* the vnic_link_evt thread also needs to be able to access
+	 * mc_list. it is only safe to access the mc_list
+	 * in the netdevice from this call, so make a local
+	 * copy of it in the vnic. the mc_list is a linked
+	 * list, but my copy is an array where each element's
+	 * next pointer points to the next element. when I
+	 * reallocate the list, I always size it with 10
+	 * extra elements so I don't have to resize it as
+	 * often. I only downsize the list when it goes empty.
+	 */
+	if (device->mc_count == 0) {
+		if (vnic->mc_list_len) {
+			vnic->mc_list_len = vnic->mc_count = 0;
+			kfree(vnic->mc_list);
+		}
+	} else {
+		struct dev_mc_list *mc_list = device->mc_list;
+		int i;
+
+		if (device->mc_count > vnic->mc_list_len) {
+			if (vnic->mc_list_len)
+				kfree(vnic->mc_list);
+			vnic->mc_list_len = device->mc_count + 10;
+			vnic->mc_list = kmalloc(vnic->mc_list_len *
+						sizeof *mc_list, GFP_ATOMIC);
+			if (!vnic->mc_list) {
+				vnic->mc_list_len = vnic->mc_count = 0;
+				VNIC_ERROR("failed allocating mc_list\n");
+				goto failure;
+			}
+		}
+		vnic->mc_count = device->mc_count;
+		for (i = 0; i < device->mc_count; i++) {
+			vnic->mc_list[i] = *mc_list;
+			vnic->mc_list[i].next = &vnic->mc_list[i + 1];
+			mc_list = mc_list->next;
+		}
+	}
+	spin_unlock_irqrestore(&vnic->lock, flags);
+
+	if (vnic->primary_path.viport)
+		viport_set_multicast(vnic->primary_path.viport,
+				     vnic->mc_list, vnic->mc_count);
+
+	if (vnic->secondary_path.viport)
+		viport_set_multicast(vnic->secondary_path.viport,
+				     vnic->mc_list, vnic->mc_count);
+
+	vnic_npevent_queue_evt(&vnic->primary_path, VNIC_NP_SETLINK);
+	return;
+failure:
+	spin_unlock_irqrestore(&vnic->lock, flags);
+}
+
+static int vnic_set_mac_address(struct net_device *device, void *addr)
+{
+	struct vnic	*vnic;
+	struct sockaddr	*sockaddr = addr;
+	u8		*address;
+	int		ret = -1;
+
+	VNIC_FUNCTION("vnic_set_mac_address()\n");
+	vnic = (struct vnic *)device->priv;
+
+	if (!is_valid_ether_addr(sockaddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (netif_running(device))
+		return -EBUSY;
+
+	memcpy(device->dev_addr, sockaddr->sa_data, ETH_ALEN);
+	address = sockaddr->sa_data;
+
+	if (vnic->primary_path.viport)
+		ret = viport_set_unicast(vnic->primary_path.viport,
+					 address);
+
+	if (ret)
+		return ret;
+
+	/* Ignore result of set unicast for secondary path viport.
+	 * Consider the operation a success if we are able to atleast
+	 * set the primary path viport address
+	 */
+	if (vnic->secondary_path.viport)
+		viport_set_unicast(vnic->secondary_path.viport, address);
+
+	vnic->mac_set = 1;
+	/* I'm assuming that this should work even if nothing is connected
+	 * at the moment.  note that this might return before the address has
+	 * actually been changed.
+	 */
+	return 0;
+}
+
+static int vnic_change_mtu(struct net_device *device, int mtu)
+{
+	struct vnic	*vnic;
+	int		ret = 0;
+	int		pri_max_mtu;
+	int		sec_max_mtu;
+
+	VNIC_FUNCTION("vnic_change_mtu()\n");
+	vnic = (struct vnic *)device->priv;
+
+	if (vnic->primary_path.viport)
+		pri_max_mtu = viport_max_mtu(vnic->primary_path.viport);
+	else
+		pri_max_mtu = MAX_PARAM_VALUE;
+
+	if (vnic->secondary_path.viport)
+		sec_max_mtu = viport_max_mtu(vnic->secondary_path.viport);
+	else
+		sec_max_mtu = MAX_PARAM_VALUE;
+
+	if ((mtu < pri_max_mtu) && (mtu < sec_max_mtu)) {
+		device->mtu = mtu;
+		vnic_npevent_queue_evt(&vnic->primary_path,
+				       VNIC_NP_SETLINK);
+	}
+
+	return ret;
+}
+
+static int vnic_npevent_register(struct vnic *vnic, struct netpath *netpath)
+{
+	u8	*address;
+	int	ret;
+
+	if (!vnic->mac_set) {
+		/* if netpath == secondary_path, then the primary path isn't
+		 * connected.  MAC address will be set when the primary
+		 * connects.
+		 */
+		netpath_get_hw_addr(netpath, vnic->netdevice.dev_addr);
+		address = vnic->netdevice.dev_addr;
+
+		if (vnic->secondary_path.viport)
+			viport_set_unicast(vnic->secondary_path.viport,
+					   address);
+
+		vnic->mac_set = 1;
+	}
+
+	ret = register_netdev(&vnic->netdevice);
+	if (ret) {
+		printk(KERN_WARNING PFX "failed registering netdev "
+		       "error %d\n", ret);
+		return ret;
+	}
+
+	vnic->state = VNIC_REGISTERED;
+	vnic->carrier = 2; /*special value to force netif_carrier_(on|off)*/
+	return 0;
+}
+
+static void vnic_npevent_dequeue_all(struct vnic *vnic)
+{
+	unsigned long flags;
+	struct vnic_npevent *npevt, *tmp;
+
+	spin_lock_irqsave(&vnic_npevent_list_lock, flags);
+	if (list_empty(&vnic_npevent_list))
+		goto out;
+	list_for_each_entry_safe(npevt, tmp, &vnic_npevent_list,
+				 list_ptrs) {
+		if ((npevt->vnic == vnic)) {
+			list_del(&npevt->list_ptrs);
+			kfree(npevt);
+		}
+	}
+out:
+	spin_unlock_irqrestore(&vnic_npevent_list_lock, flags);
+}
+
+
+static const char *const vnic_npevent_str[] = {
+	"PRIMARY CONNECTED",
+	"PRIMARY DISCONNECTED",
+	"PRIMARY CARRIER",
+	"PRIMARY NO CARRIER",
+	"PRIMARY TIMER EXPIRED",
+	"SECONDARY CONNECTED",
+	"SECONDARY DISCONNECTED",
+	"SECONDARY CARRIER",
+	"SECONDARY NO CARRIER",
+	"SECONDARY TIMER EXPIRED",
+	"SETLINK",
+	"FREE VNIC",
+};
+
+static void update_path_and_reconnect(struct netpath *netpath,
+				      struct vnic *vnic)
+{
+	struct viport_config *config = netpath->viport->config;
+	int delay = 1;
+
+	if (vnic_ib_get_path(netpath, vnic))
+		return;
+	/*
+	 * tell viport_connect to wait for default_no_path_timeout
+	 * before connecting if  we are retrying the same path index
+	 * within default_no_path_timeout.
+	 * This prevents flooding connect requests to a path (or set
+	 * of paths) that aren't successfully connecting for some reason.
+	 */
+	if (jiffies > netpath->connect_time +
+		      vnic->config->no_path_timeout) {
+		netpath->path_idx = config->path_idx;
+		netpath->connect_time = jiffies;
+		delay = 0;
+	} else if (config->path_idx != netpath->path_idx)
+		delay = 0;
+
+	viport_connect(netpath->viport, delay);
+}
+
+static void vnic_set_uni_multicast(struct vnic * vnic,
+				   struct netpath * netpath)
+{
+	unsigned long	flags;
+	u8		*address;
+
+	if (vnic->mac_set) {
+		address = vnic->netdevice.dev_addr;
+
+		if (netpath->viport)
+			viport_set_unicast(netpath->viport, address);
+	}
+	spin_lock_irqsave(&vnic->lock, flags);
+
+	if (vnic->mc_list && netpath->viport)
+		viport_set_multicast(netpath->viport, vnic->mc_list,
+				     vnic->mc_count);
+
+	spin_unlock_irqrestore(&vnic->lock, flags);
+	if (vnic->state == VNIC_REGISTERED) {
+		if (!netpath->viport)
+			return;
+		viport_set_link(netpath->viport,
+				vnic->netdevice.flags & ~IFF_UP,
+				vnic->netdevice.mtu);
+	}
+}
+
+static void vnic_set_netpath_timers(struct vnic *vnic,
+				    struct netpath *netpath)
+{
+	switch (netpath->timer_state) {
+	case NETPATH_TS_IDLE:
+		netpath->timer_state = NETPATH_TS_ACTIVE;
+		if (vnic->state == VNIC_UNINITIALIZED)
+			netpath_timer(netpath,
+				      vnic->config->
+				      primary_connect_timeout);
+		else
+			netpath_timer(netpath,
+				      vnic->config->
+				      primary_reconnect_timeout);
+			break;
+	case NETPATH_TS_ACTIVE:
+		/*nothing to do*/
+		break;
+	case NETPATH_TS_EXPIRED:
+		if (vnic->state == VNIC_UNINITIALIZED) {
+			vnic_npevent_register(vnic, netpath);
+		}
+		break;
+	}
+}
+
+static void vnic_check_primary_path_timer(struct vnic * vnic)
+{
+	switch (vnic->primary_path.timer_state) {
+	case NETPATH_TS_ACTIVE:
+		/* nothing to do. just wait */
+		break;
+	case NETPATH_TS_IDLE:
+		netpath_timer(&vnic->primary_path,
+			      vnic->config->
+			      primary_switch_timeout);
+		break;
+	case NETPATH_TS_EXPIRED:
+		printk(KERN_INFO PFX
+		       "%s: switching to primary path\n",
+		       vnic->config->name);
+
+		vnic->current_path = &vnic->primary_path;
+		if (vnic->config->use_tx_csum
+		    && netpath_can_tx_csum(vnic->
+					   current_path)) {
+			vnic->netdevice.features |=
+					    NETIF_F_IP_CSUM;
+		}
+		break;
+	}
+}
+
+static void vnic_carrier_loss(struct vnic * vnic,
+			      struct netpath *last_path)
+{
+	if (vnic->primary_path.carrier) {
+		vnic->carrier = 1;
+		vnic->current_path = &vnic->primary_path;
+
+		if (last_path && last_path != vnic->current_path)
+			printk(KERN_INFO PFX
+			       "%s: failing over to primary path\n",
+			       vnic->config->name);
+		else if (!last_path)
+			printk(KERN_INFO PFX "%s: using primary path\n",
+			       vnic->config->name);
+
+		if (vnic->config->use_tx_csum &&
+		    netpath_can_tx_csum(vnic->current_path))
+			vnic->netdevice.features |= NETIF_F_IP_CSUM;
+
+	} else if ((vnic->secondary_path.carrier) &&
+		   (vnic->secondary_path.timer_state != NETPATH_TS_ACTIVE)) {
+		vnic->carrier = 1;
+		vnic->current_path = &vnic->secondary_path;
+
+		if (last_path && last_path != vnic->current_path)
+			printk(KERN_INFO PFX
+			       "%s: failing over to secondary path\n",
+			       vnic->config->name);
+		else if (!last_path)
+			printk(KERN_INFO PFX "%s: using secondary path\n",
+			       vnic->config->name);
+
+		if (vnic->config->use_tx_csum &&
+		    netpath_can_tx_csum(vnic->current_path))
+			vnic->netdevice.features |= NETIF_F_IP_CSUM;
+
+	}
+
+}
+
+static void vnic_handle_path_change(struct vnic * vnic,
+				    struct netpath **path)
+{
+	struct netpath * last_path = *path;
+
+	if (!last_path) {
+		if (vnic->current_path == &vnic->primary_path)
+			last_path = &vnic->secondary_path;
+		else
+			last_path = &vnic->primary_path;
+
+	}
+
+	if (vnic->current_path && vnic->current_path->viport)
+		viport_set_link(vnic->current_path->viport,
+				vnic->netdevice.flags,
+				vnic->netdevice.mtu);
+
+	if (last_path->viport)
+		viport_set_link(last_path->viport,
+				 vnic->netdevice.flags &
+				 ~IFF_UP, vnic->netdevice.mtu);
+
+	vnic_restart_xmit(vnic, vnic->current_path);
+}
+
+static void vnic_report_path_change(struct vnic * vnic,
+				    struct netpath *last_path,
+				    int other_path_ok)
+{
+	if (!vnic->current_path) {
+		if (last_path == &vnic->primary_path)
+			printk(KERN_INFO PFX "%s: primary path lost, "
+			       "no failover path available\n",
+			       vnic->config->name);
+		else
+			printk(KERN_INFO PFX "%s: secondary path lost, "
+			       "no failover path available\n",
+			       vnic->config->name);
+		return;
+	}
+
+	if (last_path != vnic->current_path)
+		return;
+
+	if (vnic->current_path == &vnic->secondary_path) {
+		if (other_path_ok != vnic->primary_path.carrier) {
+			if (other_path_ok)
+				printk(KERN_INFO PFX "%s: primary path no"
+				       " longer available for failover\n",
+				       vnic->config->name);
+			else
+				printk(KERN_INFO PFX "%s: primary path now"
+				       " available for failover\n",
+				       vnic->config->name);
+		}
+	} else {
+		if (other_path_ok != vnic->secondary_path.carrier) {
+			if (other_path_ok)
+				printk(KERN_INFO PFX "%s: secondary path no"
+				       " longer available for failover\n",
+				       vnic->config->name);
+			else
+				printk(KERN_INFO PFX "%s: secondary path now"
+				       " available for failover\n",
+				       vnic->config->name);
+		}
+	}
+}
+
+static void vnic_handle_free_vnic_evt(struct vnic * vnic)
+{
+	netpath_timer_stop(&vnic->primary_path);
+	netpath_timer_stop(&vnic->secondary_path);
+	vnic->current_path = NULL;
+	netpath_free(&vnic->primary_path);
+	netpath_free(&vnic->secondary_path);
+	if (vnic->state == VNIC_REGISTERED)
+		unregister_netdev(&vnic->netdevice);
+	vnic_npevent_dequeue_all(vnic);
+	kfree(vnic->config);
+	if (vnic->mc_list_len) {
+		vnic->mc_list_len = vnic->mc_count = 0;
+		kfree(vnic->mc_list);
+	}
+
+	sysfs_remove_group(&vnic->class_dev_info.class_dev.kobj,
+			   &vnic_dev_attr_group);
+	vnic_cleanup_stats_files(vnic);
+	class_device_unregister(&vnic->class_dev_info.class_dev);
+	wait_for_completion(&vnic->class_dev_info.released);
+}
+
+static struct vnic * vnic_handle_npevent(struct vnic *vnic,
+					 enum vnic_npevent_type npevt_type)
+{
+	struct netpath	*netpath;
+
+	VNIC_INFO("%s: processing %s, netpath=%s, carrier=%d\n",
+		  vnic->config->name, vnic_npevent_str[npevt_type],
+		  netpath_to_string(vnic, vnic->current_path),
+		  vnic->carrier);
+
+	switch (npevt_type) {
+	case VNIC_PRINP_CONNECTED:
+		netpath = &vnic->primary_path;
+		if (vnic->state == VNIC_UNINITIALIZED) {
+			if (vnic_npevent_register(vnic, netpath))
+				break;
+		}
+		vnic_set_uni_multicast(vnic, netpath);
+		break;
+	case VNIC_SECNP_CONNECTED:
+		vnic_set_uni_multicast(vnic, &vnic->secondary_path);
+		break;
+	case VNIC_PRINP_TIMEREXPIRED:
+		netpath = &vnic->primary_path;
+		netpath->timer_state = NETPATH_TS_EXPIRED;
+		if (!netpath->carrier)
+			update_path_and_reconnect(netpath, vnic);
+		break;
+	case VNIC_SECNP_TIMEREXPIRED:
+		netpath = &vnic->secondary_path;
+		netpath->timer_state = NETPATH_TS_EXPIRED;
+		if (!netpath->carrier)
+			update_path_and_reconnect(netpath, vnic);
+		else {
+			if (vnic->state == VNIC_UNINITIALIZED)
+				vnic_npevent_register(vnic, netpath);
+		}
+		break;
+	case VNIC_PRINP_LINKUP:
+		vnic->primary_path.carrier = 1;
+		break;
+	case VNIC_SECNP_LINKUP:
+		netpath = &vnic->secondary_path;
+		netpath->carrier = 1;
+		if (!vnic->carrier)
+			vnic_set_netpath_timers(vnic, netpath);
+		break;
+	case VNIC_PRINP_LINKDOWN:
+		vnic->primary_path.carrier = 0;
+		break;
+	case VNIC_SECNP_LINKDOWN:
+		if (vnic->state == VNIC_UNINITIALIZED)
+			netpath_timer_stop(&vnic->secondary_path);
+		vnic->secondary_path.carrier = 0;
+		break;
+	case VNIC_PRINP_DISCONNECTED:
+		netpath = &vnic->primary_path;
+		netpath_timer_stop(netpath);
+		netpath->carrier = 0;
+		update_path_and_reconnect(netpath, vnic);
+		break;
+	case VNIC_SECNP_DISCONNECTED:
+		netpath = &vnic->secondary_path;
+		netpath_timer_stop(netpath);
+		netpath->carrier = 0;
+		update_path_and_reconnect(netpath, vnic);
+		break;
+	case VNIC_NP_FREEVNIC:
+		vnic_handle_free_vnic_evt(vnic);
+		kfree(vnic);
+		vnic = NULL;
+		break;
+	case VNIC_NP_SETLINK:
+		netpath = vnic->current_path;
+		if (!netpath || !netpath->viport)
+			break;
+		viport_set_link(netpath->viport,
+				vnic->netdevice.flags,
+				vnic->netdevice.mtu);
+		break;
+	}
+	return vnic;
+}
+
+static int vnic_npevent_statemachine(void *context)
+{
+	struct vnic_npevent	*vnic_link_evt;
+	enum vnic_npevent_type	npevt_type;
+	struct vnic		*vnic;
+	int			last_carrier;
+	int			other_path_ok = 0;
+	struct netpath		*last_path;
+
+	daemonize("vnic_link_evt");
+
+	while (!vnic_npevent_thread_end ||
+	       !list_empty(&vnic_npevent_list)) {
+		unsigned long flags;
+
+		wait_event_interruptible(vnic_npevent_queue,
+					 !list_empty(&vnic_npevent_list)
+					 || vnic_npevent_thread_end);
+		spin_lock_irqsave(&vnic_npevent_list_lock, flags);
+		if (list_empty(&vnic_npevent_list)) {
+			spin_unlock_irqrestore(&vnic_npevent_list_lock,
+					       flags);
+			VNIC_INFO("netpath statemachine wake"
+				  " on empty list\n");
+			continue;
+		}
+
+		vnic_link_evt = list_entry(vnic_npevent_list.next,
+					   struct vnic_npevent,
+					   list_ptrs);
+		list_del(&vnic_link_evt->list_ptrs);
+		spin_unlock_irqrestore(&vnic_npevent_list_lock, flags);
+		vnic = vnic_link_evt->vnic;
+		npevt_type = vnic_link_evt->event_type;
+		kfree(vnic_link_evt);
+
+		if (vnic->current_path == &vnic->secondary_path)
+			other_path_ok = vnic->primary_path.carrier;
+		else if (vnic->current_path == &vnic->primary_path)
+			other_path_ok = vnic->secondary_path.carrier;
+
+		vnic = vnic_handle_npevent(vnic, npevt_type);
+
+		if (!vnic)
+			continue;
+
+		last_carrier = vnic->carrier;
+		last_path = vnic->current_path;
+
+		if (!vnic->current_path ||
+		    !vnic->current_path->carrier) {
+			vnic->carrier = 0;
+			vnic->current_path = NULL;
+			vnic->netdevice.features &= ~NETIF_F_IP_CSUM;
+		}
+
+		if (!vnic->carrier)
+			vnic_carrier_loss(vnic, last_path);
+		else if ((vnic->current_path != &vnic->primary_path) &&
+			 (vnic->config->prefer_primary) &&
+			 (vnic->primary_path.carrier))
+				vnic_check_primary_path_timer(vnic);
+
+		if (last_path)
+			vnic_report_path_change(vnic, last_path,
+						other_path_ok);
+
+		VNIC_INFO("new netpath=%s, carrier=%d\n",
+			  netpath_to_string(vnic, vnic->current_path),
+			  vnic->carrier);
+
+		if (vnic->current_path != last_path)
+			vnic_handle_path_change(vnic, &last_path);
+
+		if (vnic->carrier != last_carrier) {
+			if (vnic->carrier) {
+				VNIC_INFO("netif_carrier_on\n");
+				netif_carrier_on(&vnic->netdevice);
+				vnic_carrier_loss_stats(vnic);
+			} else {
+				VNIC_INFO("netif_carrier_off\n");
+				netif_carrier_off(&vnic->netdevice);
+				vnic_disconn_stats(vnic);
+			}
+
+		}
+	}
+	complete_and_exit(&vnic_npevent_thread_exit, 0);
+	return 0;
+}
+
+void vnic_npevent_queue_evt(struct netpath *netpath,
+			    enum vnic_npevent_type evt)
+{
+	struct vnic_npevent *npevent;
+	unsigned long flags;
+
+	npevent = kmalloc(sizeof *npevent, GFP_ATOMIC);
+	if (!npevent) {
+		VNIC_ERROR("Could not allocate memory for vnic event\n");
+		return;
+	}
+	npevent->vnic = netpath->parent;
+	npevent->event_type = evt;
+	INIT_LIST_HEAD(&npevent->list_ptrs);
+	spin_lock_irqsave(&vnic_npevent_list_lock, flags);
+	list_add_tail(&npevent->list_ptrs, &vnic_npevent_list);
+	spin_unlock_irqrestore(&vnic_npevent_list_lock, flags);
+	wake_up(&vnic_npevent_queue);
+}
+
+void vnic_npevent_dequeue_evt(struct netpath *netpath,
+			      enum vnic_npevent_type evt)
+{
+	unsigned long flags;
+	struct vnic_npevent *npevt, *tmp;
+	struct vnic * vnic = netpath->parent;
+
+	spin_lock_irqsave(&vnic_npevent_list_lock, flags);
+	if (list_empty(&vnic_npevent_list))
+		goto out;
+	list_for_each_entry_safe(npevt, tmp, &vnic_npevent_list,
+				 list_ptrs) {
+		if ((npevt->vnic == vnic) &&
+		    (npevt->event_type == evt)) {
+			list_del(&npevt->list_ptrs);
+			kfree(npevt);
+			break;
+		}
+	}
+out:
+	spin_unlock_irqrestore(&vnic_npevent_list_lock, flags);
+}
+
+static int vnic_npevent_start(void)
+{
+	VNIC_FUNCTION("vnic_npevent_start()\n");
+
+	if ((vnic_npevent_thread =
+	     kernel_thread(vnic_npevent_statemachine, NULL, 0)) < 0) {
+		printk(KERN_WARNING PFX "failed to create vnic npevent"
+		       " thread; error %d\n", vnic_npevent_thread);
+		return vnic_npevent_thread;
+	}
+
+	return 0;
+}
+
+static void vnic_npevent_cleanup(void)
+{
+	if (vnic_npevent_thread >= 0) {
+		vnic_npevent_thread_end = 1;
+		wake_up(&vnic_npevent_queue);
+		wait_for_completion(&vnic_npevent_thread_exit);
+	}
+}
+
+struct vnic *vnic_allocate(struct vnic_config *config)
+{
+	struct vnic *vnic = NULL;
+	struct net_device *device;
+
+	VNIC_FUNCTION("vnic_allocate()\n");
+	vnic = kzalloc(sizeof *vnic, GFP_KERNEL);
+	if (!vnic) {
+		VNIC_ERROR("failed allocating vnic structure\n");
+		return NULL;
+	}
+
+	vnic->lock = SPIN_LOCK_UNLOCKED;
+	vnic_alloc_stats(vnic);
+	vnic->state = VNIC_UNINITIALIZED;
+	vnic->config = config;
+	device = &vnic->netdevice;
+
+	strcpy(device->name, config->name);
+
+	ether_setup(device);
+
+	device->priv			= (void *)vnic;
+	device->get_stats		= vnic_get_stats;
+	device->open			= vnic_open;
+	device->stop			= vnic_stop;
+	device->hard_start_xmit		= vnic_hard_start_xmit;
+	device->tx_timeout		= vnic_tx_timeout;
+	device->set_multicast_list	= vnic_set_multicast_list;
+	device->set_mac_address		= vnic_set_mac_address;
+	device->change_mtu		= vnic_change_mtu;
+	device->watchdog_timeo 		= HZ;
+	device->features		= 0;
+
+	netpath_init(&vnic->primary_path, vnic, 0);
+	netpath_init(&vnic->secondary_path, vnic, 1);
+
+	vnic->current_path = NULL;
+
+	list_add_tail(&vnic->list_ptrs, &vnic_list);
+
+	return vnic;
+}
+
+void vnic_free(struct vnic *vnic)
+{
+	VNIC_FUNCTION("vnic_free()\n");
+	list_del(&vnic->list_ptrs);
+	vnic_npevent_queue_evt(&vnic->primary_path, VNIC_NP_FREEVNIC);
+}
+
+static void __exit vnic_cleanup(void)
+{
+	VNIC_FUNCTION("vnic_cleanup()\n");
+
+	VNIC_INIT("unloading %s\n", MODULEDETAILS);
+
+	while (!list_empty(&vnic_list)) {
+		struct vnic *vnic =
+		    list_entry(vnic_list.next, struct vnic, list_ptrs);
+		vnic_free(vnic);
+	}
+
+	vnic_npevent_cleanup();
+	viport_cleanup();
+	vnic_ib_cleanup();
+}
+
+static int __init vnic_init(void)
+{
+	int ret;
+	VNIC_FUNCTION("vnic_init()\n");
+	VNIC_INIT("Initializing %s\n", MODULEDETAILS);
+
+	if ((ret=config_start())) {
+		VNIC_ERROR("config_start failed\n");
+		goto failure;
+	}
+
+	if ((ret=vnic_ib_init())) {
+		VNIC_ERROR("ib_start failed\n");
+		goto failure;
+	}
+
+	if ((ret=viport_start())) {
+		VNIC_ERROR("viport_start failed\n");
+		goto failure;
+	}
+
+	if ((ret=vnic_npevent_start())) {
+		VNIC_ERROR("vnic_npevent_start failed\n");
+		goto failure;
+	}
+
+	return 0;
+failure:
+	vnic_cleanup();
+	return ret;
+}
+
+module_init(vnic_init);
+module_exit(vnic_cleanup);
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_main.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_main.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_MAIN_H_INCLUDED
+#define VNIC_MAIN_H_INCLUDED
+
+#include <linux/timex.h>
+#include <linux/netdevice.h>
+
+#include "vnic_config.h"
+#include "vnic_netpath.h"
+
+enum vnic_npevent_type {
+	VNIC_PRINP_CONNECTED	= 0,
+	VNIC_PRINP_DISCONNECTED	= 1,
+	VNIC_PRINP_LINKUP	= 2,
+	VNIC_PRINP_LINKDOWN	= 3,
+	VNIC_PRINP_TIMEREXPIRED	= 4,
+	VNIC_SECNP_CONNECTED	= 5,
+	VNIC_SECNP_DISCONNECTED	= 6,
+	VNIC_SECNP_LINKUP	= 7,
+	VNIC_SECNP_LINKDOWN	= 8,
+	VNIC_SECNP_TIMEREXPIRED	= 9,
+	VNIC_NP_SETLINK		= 10,
+	VNIC_NP_FREEVNIC	= 11
+};
+
+struct vnic_npevent {
+	struct list_head	list_ptrs;
+	struct vnic		*vnic;
+	enum vnic_npevent_type	event_type;
+};
+
+void vnic_npevent_queue_evt(struct netpath *netpath,
+			    enum vnic_npevent_type evt);
+void vnic_npevent_dequeue_evt(struct netpath *netpath,
+			      enum vnic_npevent_type evt);
+
+enum vnic_state {
+	VNIC_UNINITIALIZED	= 0,
+	VNIC_REGISTERED		= 1
+};
+
+struct vnic {
+	struct list_head		list_ptrs;
+	enum vnic_state			state;
+	struct vnic_config		*config;
+	struct netpath			*current_path;
+	struct netpath			primary_path;
+	struct netpath			secondary_path;
+	int				open;
+	int				carrier;
+	int				xmit_started;
+	int				mac_set;
+	struct net_device_stats 	stats;
+	struct net_device		netdevice;
+	struct class_dev_info		class_dev_info;
+	struct dev_mc_list		*mc_list;
+	int				mc_list_len;
+	int				mc_count;
+	spinlock_t			lock;
+#ifdef CONFIG_INFINIBAND_VNIC_STATS
+	struct {
+		cycles_t	start_time;
+		cycles_t	conn_time;
+		cycles_t	disconn_ref;	/* intermediate time */
+		cycles_t	disconn_time;
+		u32		disconn_num;
+		cycles_t	xmit_time;
+		u32		xmit_num;
+		u32		xmit_fail;
+		cycles_t	recv_time;
+		u32		recv_num;
+		cycles_t	xmit_ref;	/* intermediate time */
+		cycles_t	xmit_off_time;
+		u32		xmit_off_num;
+		cycles_t	carrier_ref;	/* intermediate time */
+		cycles_t	carrier_off_time;
+		u32		carrier_off_num;
+	} statistics;
+	struct class_dev_info	stat_info;
+#endif	/* CONFIG_INFINIBAND_VNIC_STATS */
+};
+
+struct vnic *vnic_allocate(struct vnic_config *config);
+
+void vnic_free(struct vnic *vnic);
+
+void vnic_connected(struct vnic *vnic, struct netpath *netpath);
+void vnic_disconnected(struct vnic *vnic, struct netpath *netpath);
+
+void vnic_link_up(struct vnic *vnic, struct netpath *netpath);
+void vnic_link_down(struct vnic *vnic, struct netpath *netpath);
+
+void vnic_stop_xmit(struct vnic *vnic, struct netpath *netpath);
+void vnic_restart_xmit(struct vnic *vnic, struct netpath *netpath);
+
+void vnic_recv_packet(struct vnic *vnic, struct netpath *netpath,
+		      struct sk_buff *skb);
+
+#endif	/* VNIC_MAIN_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_netpath.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_netpath.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include "vnic_util.h"
+#include "vnic_main.h"
+#include "vnic_viport.h"
+#include "vnic_netpath.h"
+
+void vnic_npevent_timeout(unsigned long data)
+{
+	struct netpath *netpath = (struct netpath *)data;
+
+	if (netpath->second_bias)
+		vnic_npevent_queue_evt(netpath, VNIC_SECNP_TIMEREXPIRED);
+	else
+		vnic_npevent_queue_evt(netpath, VNIC_PRINP_TIMEREXPIRED);
+}
+
+void netpath_timer(struct netpath *netpath, int timeout)
+{
+	if (netpath->timer_state == NETPATH_TS_ACTIVE)
+		del_timer_sync(&netpath->timer);
+	if (timeout) {
+		init_timer(&netpath->timer);
+		netpath->timer_state = NETPATH_TS_ACTIVE;
+		netpath->timer.expires = jiffies + timeout;
+		netpath->timer.data = (unsigned long)netpath;
+		netpath->timer.function = vnic_npevent_timeout;
+		add_timer(&netpath->timer);
+	} else
+		vnic_npevent_timeout((unsigned long)netpath);
+}
+
+void netpath_timer_stop(struct netpath *netpath)
+{
+	if (netpath->timer_state != NETPATH_TS_ACTIVE)
+		return;
+	del_timer_sync(&netpath->timer);
+	if (netpath->second_bias)
+		vnic_npevent_dequeue_evt(netpath, VNIC_SECNP_TIMEREXPIRED);
+	else
+		vnic_npevent_dequeue_evt(netpath, VNIC_PRINP_TIMEREXPIRED);
+
+	netpath->timer_state = NETPATH_TS_IDLE;
+}
+
+void netpath_free(struct netpath *netpath)
+{
+	if (!netpath->viport)
+		return;
+	viport_free(netpath->viport);
+	netpath->viport = NULL;
+	sysfs_remove_group(&netpath->class_dev_info.class_dev.kobj,
+			   &vnic_path_attr_group);
+	class_device_unregister(&netpath->class_dev_info.class_dev);
+	wait_for_completion(&netpath->class_dev_info.released);
+}
+
+void netpath_init(struct netpath *netpath, struct vnic *vnic,
+		  int second_bias)
+{
+	netpath->parent = vnic;
+	netpath->carrier = 0;
+	netpath->viport = NULL;
+	netpath->second_bias = second_bias;
+	netpath->timer_state = NETPATH_TS_IDLE;
+	init_timer(&netpath->timer);
+}
+
+const char *netpath_to_string(struct vnic *vnic, struct netpath *netpath)
+{
+	if (!netpath)
+		return "NULL";
+	else if (netpath == &vnic->primary_path)
+		return "PRIMARY";
+	else if (netpath == &vnic->secondary_path)
+		return "SECONDARY";
+	else
+		return "UNKNOWN";
+}
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_netpath.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_netpath.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_NETPATH_H_INCLUDED
+#define VNIC_NETPATH_H_INCLUDED
+
+#include <linux/spinlock.h>
+
+#include "vnic_sys.h"
+
+struct viport;
+struct vnic;
+
+enum netpath_ts {
+	NETPATH_TS_IDLE		= 0,
+	NETPATH_TS_ACTIVE	= 1,
+	NETPATH_TS_EXPIRED	= 2
+};
+
+struct netpath {
+	int			carrier;
+	struct vnic		*parent;
+	struct viport		*viport;
+	size_t			path_idx;
+	u32			connect_time;
+	int			second_bias;
+	struct timer_list	timer;
+	enum netpath_ts		timer_state;
+	struct class_dev_info	class_dev_info;
+};
+
+void netpath_init(struct netpath *netpath, struct vnic *vnic,
+		  int second_bias);
+void netpath_free(struct netpath *netpath);
+
+void netpath_timer(struct netpath *netpath, int timeout);
+void netpath_timer_stop(struct netpath *netpath);
+
+const char *netpath_to_string(struct vnic *vnic, struct netpath *netpath);
+
+#define netpath_get_hw_addr(netpath, address)		\
+	viport_get_hw_addr((netpath)->viport, address)
+#define netpath_is_connected(netpath)			\
+	(netpath->state == NETPATH_CONNECTED)
+#define netpath_can_tx_csum(netpath)			\
+	viport_can_tx_csum(netpath->viport)
+
+#endif	/* VNIC_NETPATH_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_stats.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_stats.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+
+#include "vnic_main.h"
+
+cycles_t recv_ref;
+
+/*
+ * TODO: Statistics reporting for control path, data path,
+ *       RDMA times, IOs etc
+ *
+ */
+static ssize_t show_lifetime(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+	cycles_t time = get_cycles() - vnic->statistics.start_time;
+
+	return sprintf(buf, "%llu\n", (unsigned long long)time);
+}
+
+static CLASS_DEVICE_ATTR(lifetime, S_IRUGO, show_lifetime, NULL);
+
+static ssize_t show_conntime(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+
+	if (vnic->statistics.conn_time)
+		return sprintf(buf, "%llu\n",
+			   (unsigned long long)vnic->statistics.conn_time);
+	return 0;
+}
+
+static CLASS_DEVICE_ATTR(connection_time, S_IRUGO, show_conntime, NULL);
+
+static ssize_t show_disconnects(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+	u32 num;
+
+	if (vnic->statistics.disconn_ref)
+		num = vnic->statistics.disconn_num + 1;
+	else
+		num = vnic->statistics.disconn_num;
+
+	return sprintf(buf, "%d\n", num);
+}
+
+static CLASS_DEVICE_ATTR(disconnects, S_IRUGO, show_disconnects, NULL);
+
+static ssize_t show_total_disconn_time(struct class_device *class_dev,
+				       char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+	cycles_t time;
+
+	if (vnic->statistics.disconn_ref)
+		time = vnic->statistics.disconn_time +
+		       get_cycles() - vnic->statistics.disconn_ref;
+	else
+		time = vnic->statistics.disconn_time;
+
+	return sprintf(buf, "%llu\n", (unsigned long long)time);
+}
+
+static CLASS_DEVICE_ATTR(total_disconn_time, S_IRUGO,
+			 show_total_disconn_time, NULL);
+
+static ssize_t show_carrier_losses(struct class_device *class_dev,
+				   char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+	u32 num;
+
+	if (vnic->statistics.carrier_ref)
+		num = vnic->statistics.carrier_off_num + 1;
+	else
+		num = vnic->statistics.carrier_off_num;
+
+	return sprintf(buf, "%d\n", num);
+}
+
+static CLASS_DEVICE_ATTR(carrier_losses, S_IRUGO,
+			 show_carrier_losses, NULL);
+
+static ssize_t show_total_carr_loss_time(struct class_device *class_dev,
+					 char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+	cycles_t time;
+
+	if (vnic->statistics.carrier_ref)
+		time = vnic->statistics.carrier_off_time +
+		       get_cycles() - vnic->statistics.carrier_ref;
+	else
+		time = vnic->statistics.carrier_off_time;
+
+	return sprintf(buf, "%llu\n", (unsigned long long)time);
+}
+
+static CLASS_DEVICE_ATTR(total_carrier_loss_time, S_IRUGO,
+			 show_total_carr_loss_time, NULL);
+
+static ssize_t show_total_recv_time(struct class_device *class_dev,
+				    char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+
+	return sprintf(buf, "%llu\n",
+		       (unsigned long long)vnic->statistics.recv_time);
+}
+
+static CLASS_DEVICE_ATTR(total_recv_time, S_IRUGO,
+			 show_total_recv_time, NULL);
+
+static ssize_t show_recvs(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+
+	return sprintf(buf, "%d\n", vnic->statistics.recv_num);
+}
+
+static CLASS_DEVICE_ATTR(recvs, S_IRUGO, show_recvs, NULL);
+
+static ssize_t show_total_xmit_time(struct class_device *class_dev,
+				    char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+
+	return sprintf(buf, "%llu\n",
+		       (unsigned long long)vnic->statistics.xmit_time);
+}
+
+static CLASS_DEVICE_ATTR(total_xmit_time, S_IRUGO,
+			 show_total_xmit_time, NULL);
+
+static ssize_t show_xmits(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+
+	return sprintf(buf, "%d\n", vnic->statistics.xmit_num);
+}
+
+static CLASS_DEVICE_ATTR(xmits, S_IRUGO, show_xmits, NULL);
+
+static ssize_t show_failed_xmits(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+		container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, stat_info);
+
+	return sprintf(buf, "%d\n", vnic->statistics.xmit_fail);
+}
+
+static CLASS_DEVICE_ATTR(failed_xmits, S_IRUGO, show_failed_xmits, NULL);
+
+static struct attribute * vnic_stats_attrs[] = {
+	&class_device_attr_lifetime.attr,
+	&class_device_attr_xmits.attr,
+	&class_device_attr_total_xmit_time.attr,
+	&class_device_attr_failed_xmits.attr,
+	&class_device_attr_recvs.attr,
+	&class_device_attr_total_recv_time.attr,
+	&class_device_attr_connection_time.attr,
+	&class_device_attr_disconnects.attr,
+	&class_device_attr_total_disconn_time.attr,
+	&class_device_attr_carrier_losses.attr,
+	&class_device_attr_total_carrier_loss_time.attr,
+	NULL
+};
+
+struct attribute_group vnic_stats_attr_group = {
+	.attrs = vnic_stats_attrs,
+};
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_stats.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_stats.h
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_STATS_H_INCLUDED
+#define VNIC_STATS_H_INCLUDED
+
+#include "vnic_main.h"
+#include "vnic_ib.h"
+
+#ifdef CONFIG_INFINIBAND_VNIC_STATS
+
+extern struct attribute_group vnic_stats_attr_group;
+extern cycles_t recv_ref;
+
+static inline void vnic_connected_stats(struct vnic *vnic)
+{
+	if (vnic->statistics.conn_time == 0) {
+		vnic->statistics.conn_time =
+		    get_cycles() - vnic->statistics.start_time;
+	}
+
+	if (vnic->statistics.disconn_ref != 0) {
+		vnic->statistics.disconn_time +=
+		    get_cycles() - vnic->statistics.disconn_ref;
+		vnic->statistics.disconn_num++;
+		vnic->statistics.disconn_ref = 0;
+	}
+
+}
+
+static inline void vnic_stop_xmit_stats(struct vnic * vnic)
+{
+	if (vnic->statistics.xmit_ref == 0)
+		vnic->statistics.xmit_ref = get_cycles();
+}
+
+static inline void vnic_restart_xmit_stats(struct vnic *vnic)
+{
+	if (vnic->statistics.xmit_ref != 0) {
+		vnic->statistics.xmit_off_time +=
+		    get_cycles() - vnic->statistics.xmit_ref;
+		vnic->statistics.xmit_off_num++;
+		vnic->statistics.xmit_ref = 0;
+	}
+}
+
+static inline void vnic_recv_pkt_stats(struct vnic *vnic)
+{
+	vnic->statistics.recv_time += get_cycles() - recv_ref;
+	vnic->statistics.recv_num++;
+}
+
+static inline void vnic_pre_pkt_xmit_stats(cycles_t *time)
+{
+	*time = get_cycles();
+}
+
+static inline void vnic_post_pkt_xmit_stats(struct vnic *vnic,
+					    cycles_t time)
+{
+	vnic->statistics.xmit_time += get_cycles() - time;
+	vnic->statistics.xmit_num++;
+
+}
+
+static inline void vnic_xmit_fail_stats(struct vnic *vnic)
+{
+	vnic->statistics.xmit_fail++;
+}
+
+static inline void vnic_carrier_loss_stats(struct vnic *vnic)
+{
+	if (vnic->statistics.carrier_ref != 0) {
+		vnic->statistics.carrier_off_time +=
+			get_cycles() -  vnic->statistics.carrier_ref;
+		vnic->statistics.carrier_off_num++;
+		vnic->statistics.carrier_ref = 0;
+	}
+}
+
+static inline int vnic_setup_stats_files(struct vnic *vnic)
+{
+	init_completion(&vnic->stat_info.released);
+	vnic->stat_info.class_dev.class = &vnic_class;
+	vnic->stat_info.class_dev.parent = &vnic->class_dev_info.class_dev;
+	snprintf(vnic->stat_info.class_dev.class_id, BUS_ID_SIZE,
+		 "stats");
+
+	if (class_device_register(&vnic->stat_info.class_dev)) {
+		SYS_ERROR("create_vnic: error in registering"
+			  " stat class dev\n");
+		goto stats_out;
+	}
+
+	if (sysfs_create_group(&vnic->stat_info.class_dev.kobj,
+			       &vnic_stats_attr_group))
+		goto err_stats_file;
+
+	return 0;
+err_stats_file:
+	class_device_unregister(&vnic->stat_info.class_dev);
+	wait_for_completion(&vnic->stat_info.released);
+stats_out:
+	return -1;
+}
+
+static inline void vnic_cleanup_stats_files(struct vnic * vnic)
+{
+	sysfs_remove_group(&vnic->class_dev_info.class_dev.kobj,
+			   &vnic_stats_attr_group);
+	class_device_unregister(&vnic->stat_info.class_dev);
+	wait_for_completion(&vnic->stat_info.released);
+}
+
+static inline void vnic_disconn_stats(struct vnic *vnic)
+{
+	if (!vnic->statistics.disconn_ref)
+		vnic->statistics.disconn_ref = get_cycles();
+
+	if (vnic->statistics.carrier_ref == 0)
+		vnic->statistics.carrier_ref = get_cycles();
+}
+
+static inline void vnic_alloc_stats(struct vnic *vnic)
+{
+	vnic->statistics.start_time = get_cycles();
+}
+
+static inline void control_note_rsptime_stats(cycles_t *time)
+{
+	*time = get_cycles();
+}
+
+static inline void control_update_rsptime_stats(struct control *control,
+					        cycles_t response_time)
+{
+	response_time -= control->statistics.request_time;
+	control->statistics.response_time += response_time;
+	control->statistics.response_num++;
+	if (control->statistics.response_max < response_time)
+		control->statistics.response_max = response_time;
+	if ((control->statistics.response_min == 0) ||
+	    (control->statistics.response_min > response_time))
+		control->statistics.response_min =  response_time;
+
+}
+
+static inline void control_note_reqtime_stats(struct control * control)
+{
+	control->statistics.request_time = get_cycles();
+}
+
+static inline void control_timeout_stats(struct control *control)
+{
+	control->statistics.timeout_num++;
+}
+
+static inline void data_kickreq_stats(struct data * data)
+{
+	data->statistics.kick_reqs++;
+}
+
+static inline void data_no_xmitbuf_stats(struct data * data)
+{
+	data->statistics.no_xmit_bufs++;
+}
+
+static inline void data_xmits_stats(struct data * data)
+{
+	data->statistics.xmit_num++;
+}
+
+static inline void data_recvs_stats(struct data * data)
+{
+	data->statistics.recv_num++;
+}
+
+static inline void data_note_kickrcv_time(void)
+{
+	recv_ref = get_cycles();
+}
+
+static inline void data_rcvkicks_stats(struct data * data)
+{
+	data->statistics.kick_recvs++;
+}
+
+
+static inline void vnic_ib_conntime_stats(struct vnic_ib_conn *ib_conn)
+{
+	ib_conn->statistics.connection_time = get_cycles();
+}
+
+static inline void vnic_ib_note_comptime_stats(cycles_t *time)
+{
+	*time = get_cycles();
+}
+
+static inline void vnic_ib_callback_stats(struct vnic_ib_conn *ib_conn)
+{
+	ib_conn->statistics.num_callbacks++;
+}
+
+static inline void vnic_ib_comp_stats(struct vnic_ib_conn *ib_conn,
+				      u32 *comp_num)
+{
+	ib_conn->statistics.num_ios++;
+	*comp_num = *comp_num + 1;
+
+}
+
+static inline void vnic_ib_io_stats(struct io * io,
+				    struct vnic_ib_conn *ib_conn,
+				    cycles_t comp_time)
+{
+	if (io->type == RECV)
+		io->time = comp_time;
+	else if (io->type == RDMA) {
+		ib_conn->statistics.rdma_comp_time += comp_time - io->time;
+		ib_conn->statistics.rdma_comp_ios++;
+	} else if (io->type == SEND) {
+		ib_conn->statistics.send_comp_time += comp_time - io->time;
+		ib_conn->statistics.send_comp_ios++;
+	}
+}
+
+static inline void vnic_ib_maxio_stats(struct vnic_ib_conn *ib_conn,
+				       u32 comp_num)
+{
+	if (comp_num > ib_conn->statistics.max_ios)
+		ib_conn->statistics.max_ios = comp_num;
+}
+
+static inline void vnic_ib_connected_time_stats(struct vnic_ib_conn *ib_conn)
+{
+	ib_conn->statistics.connection_time =
+			 get_cycles() - ib_conn->statistics.connection_time;
+
+}
+
+static inline void vnic_ib_pre_rcvpost_stats(struct vnic_ib_conn *ib_conn,
+					     struct io *io,
+					     cycles_t *time)
+{
+	*time = get_cycles();
+	if (io->time != 0) {
+		ib_conn->statistics.recv_comp_time += *time - io->time;
+		ib_conn->statistics.recv_comp_ios++;
+	}
+
+}
+
+static inline void vnic_ib_post_rcvpost_stats(struct vnic_ib_conn *ib_conn,
+					      cycles_t time)
+{
+	ib_conn->statistics.recv_post_time += get_cycles() - time;
+	ib_conn->statistics.recv_post_ios++;
+}
+
+static inline void vnic_ib_pre_sendpost_stats(struct io *io,
+					      cycles_t *time)
+{
+	io->time = *time = get_cycles();
+}
+
+static inline void vnic_ib_post_sendpost_stats(struct vnic_ib_conn *ib_conn,
+					       struct io *io,
+					       cycles_t time)
+{
+	time = get_cycles() - time;
+	if (io->swr.opcode == IB_WR_RDMA_WRITE) {
+		ib_conn->statistics.rdma_post_time += time;
+		ib_conn->statistics.rdma_post_ios++;
+	} else {
+		ib_conn->statistics.send_post_time += time;
+		ib_conn->statistics.send_post_ios++;
+	}
+}
+#else	/*CONFIG_INIFINIBAND_VNIC_STATS*/
+
+static inline void vnic_connected_stats(struct vnic *vnic)
+{
+	;
+}
+
+static inline void vnic_stop_xmit_stats(struct vnic * vnic)
+{
+	;
+}
+
+static inline void vnic_restart_xmit_stats(struct vnic *vnic)
+{
+	;
+}
+
+static inline void vnic_recv_pkt_stats(struct vnic *vnic)
+{
+	;
+}
+
+static inline void vnic_pre_pkt_xmit_stats(cycles_t *time)
+{
+	;
+}
+
+static inline void vnic_post_pkt_xmit_stats(struct vnic *vnic,
+					    cycles_t time)
+{
+	;
+}
+
+static inline void vnic_xmit_fail_stats(struct vnic *vnic)
+{
+	;
+}
+
+static inline int vnic_setup_stats_files(struct vnic *vnic)
+{
+	return 0;
+}
+
+static inline void vnic_cleanup_stats_files(struct vnic * vnic)
+{
+	;
+}
+
+static inline void vnic_carrier_loss_stats(struct vnic *vnic)
+{
+	;
+}
+
+static inline void vnic_disconn_stats(struct vnic *vnic)
+{
+	;
+}
+
+static inline void vnic_alloc_stats(struct vnic *vnic)
+{
+	;
+}
+
+static inline void control_note_rsptime_stats(cycles_t *time)
+{
+	;
+}
+
+static inline void control_update_rsptime_stats(struct control *control,
+					        cycles_t response_time)
+{
+	;
+}
+
+static inline void control_note_reqtime_stats(struct control * control)
+{
+	;
+}
+
+static inline void control_timeout_stats(struct control *control)
+{
+	;
+}
+
+static inline void data_kickreq_stats(struct data * data)
+{
+	;
+}
+
+static inline void data_no_xmitbuf_stats(struct data * data)
+{
+	;
+}
+
+static inline void data_xmits_stats(struct data * data)
+{
+	;
+}
+
+static inline void data_recvs_stats(struct data * data)
+{
+	;
+}
+
+static inline void data_note_kickrcv_time(void)
+{
+	;
+}
+
+static inline void data_rcvkicks_stats(struct data * data)
+{
+	;
+}
+
+static inline void vnic_ib_conntime_stats(struct vnic_ib_conn *ib_conn)
+{
+	;
+}
+
+static inline void vnic_ib_note_comptime_stats(cycles_t *time)
+{
+	;
+}
+
+static inline void vnic_ib_callback_stats(struct vnic_ib_conn *ib_conn)
+
+{
+	;
+}
+static inline void vnic_ib_comp_stats(struct vnic_ib_conn *ib_conn,
+				      u32 *comp_num)
+{
+	;
+}
+
+static inline void vnic_ib_io_stats(struct io * io,
+				    struct vnic_ib_conn *ib_conn,
+				    cycles_t comp_time)
+{
+	;
+}
+
+static inline void vnic_ib_maxio_stats(struct vnic_ib_conn *ib_conn,
+				       u32 comp_num)
+{
+	;
+}
+
+static inline void vnic_ib_connected_time_stats(struct vnic_ib_conn *ib_conn)
+{
+	;
+}
+
+static inline void vnic_ib_pre_rcvpost_stats(struct vnic_ib_conn *ib_conn,
+					     struct io *io,
+					     cycles_t *time)
+{
+	;
+}
+
+static inline void vnic_ib_post_rcvpost_stats(struct vnic_ib_conn *ib_conn,
+					      cycles_t time)
+{
+	;
+}
+
+static inline void vnic_ib_pre_sendpost_stats(struct io *io,
+					      cycles_t *time)
+{
+	;
+}
+
+static inline void vnic_ib_post_sendpost_stats(struct vnic_ib_conn *ib_conn,
+					       struct io *io,
+					       cycles_t time)
+{
+	;
+}
+#endif	/*CONFIG_INIFINIBAND_VNIC_STATS*/
+
+#endif	/*VNIC_STATS_H_INCLUDED*/
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_sys.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_sys.c
@@ -0,0 +1,786 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/parser.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+
+#include "vnic_util.h"
+#include "vnic_config.h"
+#include "vnic_ib.h"
+#include "vnic_viport.h"
+#include "vnic_main.h"
+#include "vnic_stats.h"
+
+extern struct list_head vnic_list;
+
+/*
+ * target eiocs are added by writing
+ *
+ * ioc_guid=<EIOC GUID>,dgid=<dest GID>,pkey=<P_key>,name=<interface_name>
+ * to the create_primary  sysfs attribute.
+ */
+enum {
+	VNIC_OPT_ERR = 0,
+	VNIC_OPT_IOC_GUID = 1 << 0,
+	VNIC_OPT_DGID = 1 << 1,
+	VNIC_OPT_PKEY = 1 << 2,
+	VNIC_OPT_NAME = 1 << 3,
+	VNIC_OPT_INSTANCE = 1 << 4,
+	VNIC_OPT_RXCSUM = 1 << 5,
+	VNIC_OPT_TXCSUM = 1 << 6,
+	VNIC_OPT_HEARTBEAT = 1 << 7,
+	VNIC_OPT_ALL = (VNIC_OPT_IOC_GUID |
+			VNIC_OPT_DGID | VNIC_OPT_NAME | VNIC_OPT_PKEY),
+};
+
+static match_table_t vnic_opt_tokens = {
+	{VNIC_OPT_IOC_GUID, "ioc_guid=%s"},
+	{VNIC_OPT_DGID, "dgid=%s"},
+	{VNIC_OPT_PKEY, "pkey=%x"},
+	{VNIC_OPT_NAME, "name=%s"},
+	{VNIC_OPT_INSTANCE, "instance=%d"},
+	{VNIC_OPT_RXCSUM, "rx_csum=%s"},
+	{VNIC_OPT_TXCSUM, "tx_csum=%s"},
+	{VNIC_OPT_HEARTBEAT, "heartbeat=%d"},
+	{VNIC_OPT_ERR, NULL}
+};
+
+static void vnic_release_class_dev(struct class_device *class_dev)
+{
+	struct class_dev_info *cdev_info =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+
+	complete(&cdev_info->released);
+
+}
+
+struct class vnic_class = {
+	.name = "infiniband_vnic",
+	.release = vnic_release_class_dev
+};
+
+struct class_dev_info interface_cdev;
+
+static int vnic_parse_options(const char *buf, struct path_param *param)
+{
+	char *options, *sep_opt;
+	char *p;
+	char dgid[3];
+	substring_t args[MAX_OPT_ARGS];
+	int opt_mask = 0;
+	int token;
+	int ret = -EINVAL;
+	int i;
+
+	options = kstrdup(buf, GFP_KERNEL);
+	if (!options)
+		return -ENOMEM;
+
+	sep_opt = options;
+	while ((p = strsep(&sep_opt, ",")) != NULL) {
+		if (!*p)
+			continue;
+
+		token = match_token(p, vnic_opt_tokens, args);
+		opt_mask |= token;
+
+		switch (token) {
+		case VNIC_OPT_IOC_GUID:
+			p = match_strdup(args);
+			param->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL,
+								      16));
+			kfree(p);
+			break;
+
+		case VNIC_OPT_DGID:
+			p = match_strdup(args);
+			if (strlen(p) != 32) {
+				printk(KERN_WARNING PFX
+				       "bad dest GID parameter '%s'\n", p);
+				kfree(p);
+				goto out;
+			}
+
+			for (i = 0; i < 16; ++i) {
+				strlcpy(dgid, p + i * 2, 3);
+				param->dgid[i] = simple_strtoul(dgid, NULL,
+								16);
+
+			}
+			kfree(p);
+			break;
+
+		case VNIC_OPT_PKEY:
+			if (match_hex(args, &token)) {
+				printk(KERN_WARNING PFX
+				       "bad P_key parameter '%s'\n", p);
+				goto out;
+			}
+			param->pkey = cpu_to_be16(token);
+			break;
+
+		case VNIC_OPT_NAME:
+			p = match_strdup(args);
+			if (strlen(p) >= IFNAMSIZ) {
+				printk(KERN_WARNING PFX
+				       "interface name parameter too long\n");
+				kfree(p);
+				goto out;
+			}
+			strcpy(param->name, p);
+			kfree(p);
+			break;
+		case VNIC_OPT_INSTANCE:
+			if (match_int(args, &token)) {
+				printk(KERN_WARNING PFX
+				       "bad instance parameter '%s'\n", p);
+				goto out;
+			}
+
+			if (token > 255 || token < 0) {
+				printk(KERN_WARNING PFX
+				       "instance parameter must be"
+				       " > 0 and <= 255\n");
+				goto out;
+			}
+
+			param->instance = token;
+			break;
+		case VNIC_OPT_RXCSUM:
+			p = match_strdup(args);
+			if (!strncmp(p, "true", 4))
+				param->rx_csum = 1;
+			else if (!strncmp(p, "false", 5))
+				param->rx_csum = 0;
+			else {
+				printk(KERN_WARNING PFX
+				       "bad rx_csum parameter."
+				       " must be 'true' or 'false'\n");
+				kfree(p);
+				goto out;
+			}
+			kfree(p);
+			break;
+		case VNIC_OPT_TXCSUM:
+			p = match_strdup(args);
+			if (!strncmp(p, "true", 4))
+				param->tx_csum = 1;
+			else if (!strncmp(p, "false", 5))
+				param->tx_csum = 0;
+			else {
+				printk(KERN_WARNING PFX
+				       "bad tx_csum parameter."
+				       " must be 'true' or 'false'\n");
+				kfree(p);
+				goto out;
+			}
+			kfree(p);
+			break;
+		case VNIC_OPT_HEARTBEAT:
+			if (match_int(args, &token)) {
+				printk(KERN_WARNING PFX
+				       "bad instance parameter '%s'\n", p);
+				goto out;
+			}
+
+			if (token > 6000 || token < 0) {
+				printk(KERN_WARNING PFX
+				       "heartbeat parameter must be"
+				       " > 0 and <= 6000\n");
+				goto out;
+			}
+			param->heartbeat = token;
+			break;
+		default:
+			printk(KERN_WARNING PFX
+			       "unknown parameter or missing value "
+			       "'%s' in target creation request\n", p);
+			goto out;
+		}
+
+	}
+
+	if ((opt_mask & VNIC_OPT_ALL) == VNIC_OPT_ALL)
+		ret = 0;
+	else
+		for (i = 0; i < ARRAY_SIZE(vnic_opt_tokens); ++i)
+			if ((vnic_opt_tokens[i].token & VNIC_OPT_ALL) &&
+			    !(vnic_opt_tokens[i].token & opt_mask))
+				printk(KERN_WARNING PFX
+				       "target creation request is "
+				       "missing parameter '%s'\n",
+				       vnic_opt_tokens[i].pattern);
+
+out:
+	kfree(options);
+	return ret;
+
+}
+
+static ssize_t show_vnic_state(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, class_dev_info);
+	switch (vnic->state) {
+	case VNIC_UNINITIALIZED:
+		return sprintf(buf, "VNIC_UNINITIALIZED\n");
+	case VNIC_REGISTERED:
+		return sprintf(buf, "VNIC_REGISTERED\n");
+	default:
+		return sprintf(buf, "INVALID STATE\n");
+	}
+
+}
+
+static CLASS_DEVICE_ATTR(vnic_state, S_IRUGO, show_vnic_state, NULL);
+
+static ssize_t show_rx_csum(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, class_dev_info);
+
+	if (vnic->config->use_rx_csum)
+		return sprintf(buf, "true\n");
+	else
+		return sprintf(buf, "false\n");
+}
+
+static CLASS_DEVICE_ATTR(rx_csum, S_IRUGO, show_rx_csum, NULL);
+
+static ssize_t show_tx_csum(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, class_dev_info);
+
+	if (vnic->config->use_tx_csum)
+		return sprintf(buf, "true\n");
+	else
+		return sprintf(buf, "false\n");
+}
+
+static CLASS_DEVICE_ATTR(tx_csum, S_IRUGO, show_tx_csum, NULL);
+
+static ssize_t show_current_path(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic *vnic = container_of(info, struct vnic, class_dev_info);
+
+	if (vnic->current_path == &vnic->primary_path)
+		return sprintf(buf, "primary path\n");
+	else if (vnic->current_path == &vnic->secondary_path)
+		return sprintf(buf, "secondary path\n");
+	else
+		return sprintf(buf, "none\n");
+
+}
+
+static CLASS_DEVICE_ATTR(current_path, S_IRUGO, show_current_path, NULL);
+
+static struct attribute * vnic_dev_attrs[] = {
+	&class_device_attr_vnic_state.attr,
+	&class_device_attr_rx_csum.attr,
+	&class_device_attr_tx_csum.attr,
+	&class_device_attr_current_path.attr,
+	NULL
+};
+
+struct attribute_group vnic_dev_attr_group = {
+	.attrs = vnic_dev_attrs,
+};
+
+static int create_netpath(struct netpath *npdest,
+			  struct path_param *p_params)
+{
+	struct viport_config	*viport_config;
+	struct viport		*viport;
+	struct vnic		*vnic;
+	struct list_head	*ptr;
+	int			ret = 0;
+
+	list_for_each(ptr, &vnic_list) {
+		vnic = list_entry(ptr, struct vnic, list_ptrs);
+		if (vnic->primary_path.viport) {
+			viport_config = vnic->primary_path.viport->config;
+			if ((viport_config->ioc_guid == p_params->ioc_guid)
+			    && (viport_config->control_config.vnic_instance
+				== p_params->instance)) {
+				SYS_ERROR("GUID %llx,"
+					  " INSTANCE %d already in use\n",
+					  be64_to_cpu(p_params->ioc_guid),
+					  p_params->instance);
+				ret = -EINVAL;
+				goto out;
+			}
+		}
+
+		if (vnic->secondary_path.viport) {
+			viport_config = vnic->secondary_path.viport->config;
+			if ((viport_config->ioc_guid == p_params->ioc_guid)
+			    && (viport_config->control_config.vnic_instance
+				== p_params->instance)) {
+				SYS_ERROR("GUID %llx,"
+					  " INSTANCE %d already in use\n",
+					  be64_to_cpu(p_params->ioc_guid),
+					  p_params->instance);
+				ret = -EINVAL;
+				goto out;
+			}
+		}
+	}
+
+	if (npdest->viport) {
+		SYS_ERROR("create_netpath: path already exists\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	viport_config = config_alloc_viport(p_params);
+	if (!viport_config) {
+		SYS_ERROR("create_netpath: failed creating viport config\n");
+		ret = -1;
+		goto out;
+	}
+
+	/*User specified heartbeat value is in 1/100s of a sec*/
+	if (p_params->heartbeat != -1) {
+		viport_config->hb_interval =
+			msecs_to_jiffies(p_params->heartbeat * 10);
+		viport_config->hb_timeout =
+			(p_params->heartbeat << 6) * 10000; /* usec */
+	}
+
+	viport_config->path_idx = 0;
+
+	viport = viport_allocate(viport_config);
+	if (!viport) {
+		SYS_ERROR("create_netpath: failed creating viport\n");
+		kfree(viport_config);
+		ret = -1;
+		goto out;
+	}
+
+	npdest->viport = viport;
+	viport->parent = npdest;
+	viport->vnic = npdest->parent;
+	viport_kick(viport);
+	vnic_disconnected(npdest->parent, npdest);
+out:
+	return ret;
+}
+
+struct vnic *create_vnic(struct path_param *param)
+{
+	struct vnic_config *vnic_config;
+	struct vnic *vnic;
+	struct list_head *ptr;
+
+	SYS_INFO("create_vnic: name = %s\n", param->name);
+	list_for_each(ptr, &vnic_list) {
+		vnic = list_entry(ptr, struct vnic, list_ptrs);
+		if (!strcmp(vnic->config->name, param->name)) {
+			SYS_ERROR("vnic %s already exists\n",
+				   param->name);
+			return NULL;
+		}
+	}
+
+	vnic_config = config_alloc_vnic();
+	if (!vnic_config) {
+		SYS_ERROR("create_vnic: failed creating vnic config\n");
+		return NULL;
+	}
+
+	if (param->rx_csum != -1)
+		vnic_config->use_rx_csum = param->rx_csum;
+
+	if (param->tx_csum != -1)
+		vnic_config->use_tx_csum = param->tx_csum;
+
+	strcpy(vnic_config->name, param->name);
+	vnic = vnic_allocate(vnic_config);
+	if (!vnic) {
+		SYS_ERROR("create_vnic: failed allocating vnic\n");
+		goto free_vnic_config;
+	}
+
+	init_completion(&vnic->class_dev_info.released);
+
+	vnic->class_dev_info.class_dev.class = &vnic_class;
+	vnic->class_dev_info.class_dev.parent = &interface_cdev.class_dev;
+	snprintf(vnic->class_dev_info.class_dev.class_id, BUS_ID_SIZE,
+		 vnic_config->name);
+
+	if (class_device_register(&vnic->class_dev_info.class_dev)) {
+		SYS_ERROR("create_vnic: error in registering"
+			  " vnic class dev\n");
+		goto free_vnic;
+	}
+
+	if (sysfs_create_group(&vnic->class_dev_info.class_dev.kobj,
+			       &vnic_dev_attr_group)) {
+		SYS_ERROR("create_vnic: error in creating"
+			  "vnic attr group\n");
+		goto err_attr;
+
+	}
+
+	if (vnic_setup_stats_files(vnic))
+		goto err_stats;
+
+	return vnic;
+err_stats:
+	sysfs_remove_group(&vnic->class_dev_info.class_dev.kobj,
+			   &vnic_dev_attr_group);
+err_attr:
+	class_device_unregister(&vnic->class_dev_info.class_dev);
+	wait_for_completion(&vnic->class_dev_info.released);
+free_vnic:
+	list_del(&vnic->list_ptrs);
+	kfree(vnic);
+free_vnic_config:
+	kfree(vnic_config);
+	return NULL;
+}
+
+ssize_t vnic_delete(struct class_device * class_dev,
+		    const char *buf, size_t count)
+{
+	struct vnic *vnic;
+	struct list_head *ptr;
+	int ret = -EINVAL;
+
+	if (count > IFNAMSIZ) {
+		printk(KERN_WARNING PFX "invalid vnic interface name\n");
+		return ret;
+	}
+
+	SYS_INFO("vnic_delete: name = %s\n", buf);
+	list_for_each(ptr, &vnic_list) {
+		vnic = list_entry(ptr, struct vnic, list_ptrs);
+		if (!strcmp(vnic->config->name, buf)) {
+			vnic_free(vnic);
+			return count;
+		}
+	}
+
+	printk(KERN_WARNING PFX "vnic interface '%s' does not exist\n", buf);
+	return ret;
+}
+
+static ssize_t show_viport_state(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+	struct netpath *path =
+	    container_of(info, struct netpath, class_dev_info);
+	switch (path->viport->state) {
+	case VIPORT_DISCONNECTED:
+		return sprintf(buf, "VIPORT_DISCONNECTED\n");
+	case VIPORT_CONNECTED:
+		return sprintf(buf, "VIPORT_CONNECTED\n");
+	default:
+		return sprintf(buf, "INVALID STATE\n");
+	}
+
+}
+
+static CLASS_DEVICE_ATTR(viport_state, S_IRUGO, show_viport_state, NULL);
+
+static ssize_t show_link_state(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+	struct netpath *path =
+	    container_of(info, struct netpath, class_dev_info);
+
+	switch (path->viport->link_state) {
+	case LINK_UNINITIALIZED:
+		return sprintf(buf, "LINK_UNINITIALIZED\n");
+	case LINK_INITIALIZE:
+		return sprintf(buf, "LINK_INITIALIZE\n");
+	case LINK_INITIALIZECONTROL:
+		return sprintf(buf, "LINK_INITIALIZECONTROL\n");
+	case LINK_INITIALIZEDATA:
+		return sprintf(buf, "LINK_INITIALIZEDATA\n");
+	case LINK_CONTROLCONNECT:
+		return sprintf(buf, "LINK_CONTROLCONNECT\n");
+	case LINK_CONTROLCONNECTWAIT:
+		return sprintf(buf, "LINK_CONTROLCONNECTWAIT\n");
+	case LINK_INITVNICREQ:
+		return sprintf(buf, "LINK_INITVNICREQ\n");
+	case LINK_INITVNICRSP:
+		return sprintf(buf, "LINK_INITVNICRSP\n");
+	case LINK_BEGINDATAPATH:
+		return sprintf(buf, "LINK_BEGINDATAPATH\n");
+	case LINK_CONFIGDATAPATHREQ:
+		return sprintf(buf, "LINK_CONFIGDATAPATHREQ\n");
+	case LINK_CONFIGDATAPATHRSP:
+		return sprintf(buf, "LINK_CONFIGDATAPATHRSP\n");
+	case LINK_DATACONNECT:
+		return sprintf(buf, "LINK_DATACONNECT\n");
+	case LINK_DATACONNECTWAIT:
+		return sprintf(buf, "LINK_DATACONNECTWAIT\n");
+	case LINK_XCHGPOOLREQ:
+		return sprintf(buf, "LINK_XCHGPOOLREQ\n");
+	case LINK_XCHGPOOLRSP:
+		return sprintf(buf, "LINK_XCHGPOOLRSP\n");
+	case LINK_INITIALIZED:
+		return sprintf(buf, "LINK_INITIALIZED\n");
+	case LINK_IDLE:
+		return sprintf(buf, "LINK_IDLE\n");
+	case LINK_IDLING:
+		return sprintf(buf, "LINK_IDLING\n");
+	case LINK_CONFIGLINKREQ:
+		return sprintf(buf, "LINK_CONFIGLINKREQ\n");
+	case LINK_CONFIGLINKRSP:
+		return sprintf(buf, "LINK_CONFIGLINKRSP\n");
+	case LINK_CONFIGADDRSREQ:
+		return sprintf(buf, "LINK_CONFIGADDRSREQ\n");
+	case LINK_CONFIGADDRSRSP:
+		return sprintf(buf, "LINK_CONFIGADDRSRSP\n");
+	case LINK_REPORTSTATREQ:
+		return sprintf(buf, "LINK_REPORTSTATREQ\n");
+	case LINK_REPORTSTATRSP:
+		return sprintf(buf, "LINK_REPORTSTATRSP\n");
+	case LINK_HEARTBEATREQ:
+		return sprintf(buf, "LINK_HEARTBEATREQ\n");
+	case LINK_HEARTBEATRSP:
+		return sprintf(buf, "LINK_HEARTBEATRSP\n");
+	case LINK_RESET:
+		return sprintf(buf, "LINK_RESET\n");
+	case LINK_RESETRSP:
+		return sprintf(buf, "LINK_RESETRSP\n");
+	case LINK_RESETCONTROL:
+		return sprintf(buf, "LINK_RESETCONTROL\n");
+	case LINK_RESETCONTROLRSP:
+		return sprintf(buf, "LINK_RESETCONTROLRSP\n");
+	case LINK_DATADISCONNECT:
+		return sprintf(buf, "LINK_DATADISCONNECT\n");
+	case LINK_CONTROLDISCONNECT:
+		return sprintf(buf, "LINK_CONTROLDISCONNECT\n");
+	case LINK_CLEANUPDATA:
+		return sprintf(buf, "LINK_CLEANUPDATA\n");
+	case LINK_CLEANUPCONTROL:
+		return sprintf(buf, "LINK_CLEANUPCONTROL\n");
+	case LINK_DISCONNECTED:
+		return sprintf(buf, "LINK_DISCONNECTED\n");
+	case LINK_RETRYWAIT:
+		return sprintf(buf, "LINK_RETRYWAIT\n");
+	default:
+		return sprintf(buf, "INVALID STATE\n");
+
+	}
+
+}
+static CLASS_DEVICE_ATTR(link_state, S_IRUGO, show_link_state, NULL);
+
+static ssize_t show_heartbeat(struct class_device *class_dev, char *buf)
+{
+	struct class_dev_info *info =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+
+	struct netpath *path =
+	    container_of(info, struct netpath, class_dev_info);
+
+	/* hb_inteval is in jiffies, convert it back to
+	 * 1/100ths of a second
+	 */
+	return sprintf(buf, "%d\n",
+		(jiffies_to_msecs(path->viport->config->hb_interval)/10));
+}
+
+static CLASS_DEVICE_ATTR(heartbeat, S_IRUGO, show_heartbeat, NULL);
+
+static struct attribute * vnic_path_attrs[] = {
+	&class_device_attr_viport_state.attr,
+	&class_device_attr_link_state.attr,
+	&class_device_attr_heartbeat.attr,
+	NULL
+};
+
+struct attribute_group vnic_path_attr_group = {
+	.attrs = vnic_path_attrs,
+};
+
+
+static int setup_path_class_files(struct netpath *path, char *name)
+{
+	init_completion(&path->class_dev_info.released);
+
+	path->class_dev_info.class_dev.class = &vnic_class;
+	path->class_dev_info.class_dev.parent =
+	    &path->parent->class_dev_info.class_dev;
+	snprintf(path->class_dev_info.class_dev.class_id,
+		 BUS_ID_SIZE, name);
+
+	if (class_device_register(&path->class_dev_info.class_dev)) {
+		SYS_ERROR("error in registering path class dev\n");
+		goto out;
+	}
+
+	if (sysfs_create_group(&path->class_dev_info.class_dev.kobj,
+			       &vnic_path_attr_group)) {
+		SYS_ERROR("error in creating vnic path group attrs");
+		goto err_path;
+	}
+
+	return 0;
+
+err_path:
+	class_device_unregister(&path->class_dev_info.class_dev);
+	wait_for_completion(&path->class_dev_info.released);
+out:
+	return -1;
+
+}
+
+ssize_t vnic_create_primary(struct class_device * class_dev,
+			    const char *buf, size_t count)
+{
+	struct class_dev_info *cdev =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic_ib_port *target =
+	    container_of(cdev, struct vnic_ib_port, cdev_info);
+
+	struct path_param param;
+	int ret = -EINVAL;
+	struct vnic *vnic;
+
+	param.instance = 0;
+	param.rx_csum = -1;
+	param.tx_csum = -1;
+	param.heartbeat = -1;
+
+	ret = vnic_parse_options(buf, &param);
+
+	if (ret)
+		goto out;
+
+	param.ibdev = target->dev->dev;
+	param.ibport = target;
+	param.port = target->port_num;
+
+	vnic = create_vnic(&param);
+	if (!vnic) {
+		printk(KERN_ERR PFX "creating vnic failed\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (create_netpath(&vnic->primary_path, &param)) {
+		printk(KERN_ERR PFX "creating primary netpath failed\n");
+		goto free_vnic;
+	}
+
+	if (setup_path_class_files(&vnic->primary_path, "primary_path"))
+		goto free_vnic;
+
+	if (vnic && !vnic->primary_path.viport) {
+		printk(KERN_ERR PFX "no valid netpaths\n");
+		goto free_vnic;
+	}
+
+	return count;
+
+free_vnic:
+	vnic_free(vnic);
+	ret = -EINVAL;
+out:
+	return ret;
+}
+
+ssize_t vnic_create_secondary(struct class_device * class_dev,
+			      const char *buf, size_t count)
+{
+	struct class_dev_info *cdev =
+	    container_of(class_dev, struct class_dev_info, class_dev);
+	struct vnic_ib_port *target =
+	    container_of(cdev, struct vnic_ib_port, cdev_info);
+
+	struct path_param param;
+	struct vnic *vnic;
+	int ret = -EINVAL;
+	struct list_head *ptr;
+	int found = 0;
+
+	param.instance = 0;
+	param.rx_csum = -1;
+	param.tx_csum = -1;
+	param.heartbeat = -1;
+
+	ret = vnic_parse_options(buf, &param);
+
+	if (ret)
+		goto out;
+
+	list_for_each(ptr, &vnic_list) {
+		vnic = list_entry(ptr, struct vnic, list_ptrs);
+		if (!strncmp(vnic->config->name, param.name, IFNAMSIZ)) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found) {
+		printk(KERN_ERR PFX
+		       "primary connection with name '%s' does not exist\n",
+		       param.name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	param.ibdev = target->dev->dev;
+	param.ibport = target;
+	param.port = target->port_num;
+
+	if (create_netpath(&vnic->secondary_path, &param)) {
+		printk(KERN_ERR PFX "creating secondary netpath failed\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (setup_path_class_files(&vnic->secondary_path, "secondary_path"))
+		goto free_vnic;
+
+	return count;
+
+free_vnic:
+	vnic_free(vnic);
+	ret = -EINVAL;
+out:
+	return ret;
+}
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_sys.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_sys.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_SYS_H_INCLUDED
+#define VNIC_SYS_H_INCLUDED
+
+struct class_dev_info {
+	struct class_device	class_dev;
+	struct completion	released;
+};
+
+extern struct class vnic_class;
+extern struct class_dev_info interface_cdev;
+extern struct attribute_group vnic_dev_attr_group;
+extern struct attribute_group vnic_path_attr_group;
+
+extern ssize_t vnic_create_primary(struct class_device *class_dev,
+			   const char *buf, size_t count);
+
+extern ssize_t vnic_create_secondary(struct class_device *class_dev,
+			     const char *buf, size_t count);
+
+extern ssize_t vnic_delete(struct class_device *class_dev,
+			   const char *buf, size_t count);
+#endif	/*VNIC_SYS_H_INCLUDED*/
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_trailer.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_trailer.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_TRAILER_H_INCLUDED
+#define VNIC_TRAILER_H_INCLUDED
+
+/* pkt_flags values */
+enum {
+	PF_CHASH_VALID		= 0x01,
+	PF_IPSEC_VALID		= 0x02,
+	PF_TCP_SEGMENT		= 0x04,
+	PF_KICK			= 0x08,
+	PF_VLAN_INSERT		= 0x10,
+	PF_PVID_OVERRIDDEN 	= 0x20,
+	PF_FCS_INCLUDED 	= 0x40,
+	PF_FORCE_ROUTE		= 0x80
+};
+
+/* tx_chksum_flags values */
+enum {
+	TX_CHKSUM_FLAGS_CHECKSUM_V4	= 0x01,
+	TX_CHKSUM_FLAGS_CHECKSUM_V6	= 0x02,
+	TX_CHKSUM_FLAGS_TCP_CHECKSUM	= 0x04,
+	TX_CHKSUM_FLAGS_UDP_CHECKSUM	= 0x08,
+	TX_CHKSUM_FLAGS_IP_CHECKSUM	= 0x10
+};
+
+/* rx_chksum_flags values */
+enum {
+	RX_CHKSUM_FLAGS_TCP_CHECKSUM_FAILED	= 0x01,
+	RX_CHKSUM_FLAGS_UDP_CHECKSUM_FAILED	= 0x02,
+	RX_CHKSUM_FLAGS_IP_CHECKSUM_FAILED	= 0x04,
+	RX_CHKSUM_FLAGS_TCP_CHECKSUM_SUCCEEDED	= 0x08,
+	RX_CHKSUM_FLAGS_UDP_CHECKSUM_SUCCEEDED	= 0x10,
+	RX_CHKSUM_FLAGS_IP_CHECKSUM_SUCCEEDED	= 0x20,
+	RX_CHKSUM_FLAGS_LOOPBACK		= 0x40,
+	RX_CHKSUM_FLAGS_RESERVED		= 0x80
+};
+
+/* connection_hash_and_valid values */
+enum {
+	CHV_VALID	= 0x80,
+	CHV_HASH_MASH	= 0x7f
+};
+
+struct viport_trailer {
+	s8	data_alignment_offset;
+	u8	rndis_header_length;	/* reserved for use by edp */
+	__be16	data_length;
+	u8	pkt_flags;
+	u8	tx_chksum_flags;
+	u8	rx_chksum_flags;
+	u8	ip_sec_flags;
+	u32	tcp_seq_no;
+	u32	ip_sec_offload_handle;
+	u32	ip_sec_next_offload_handle;
+	u8	dest_mac_addr[6];
+	__be16	vlan;
+	u16	time_stamp;
+	u8	origin;
+	u8	connection_hash_and_valid;
+};
+
+#define VIPORT_TRAILER_ALIGNMENT	32
+
+#define BUFFER_SIZE(len)					\
+	(sizeof(struct viport_trailer) +			\
+	 ALIGN((len), VIPORT_TRAILER_ALIGNMENT))
+
+#define MAX_PAYLOAD(len)					\
+	ALIGN_DOWN((len) - sizeof(struct viport_trailer),	\
+		   VIPORT_TRAILER_ALIGNMENT)
+
+#endif	/* VNIC_TRAILER_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_util.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_util.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_UTIL_H_INCLUDED
+#define VNIC_UTIL_H_INCLUDED
+
+#define MODULE_NAME "VNIC"
+
+#define VNIC_MAJORVERSION	1
+#define VNIC_MINORVERSION	1
+
+#define is_power_of2(value)	(((value) & ((value - 1))) == 0)
+#define ALIGN_DOWN(x, a)	((x)&(~((a)-1)))
+
+extern u32 vnic_debug;
+
+enum {
+	DEBUG_IB_INFO			= 0x00000001,
+	DEBUG_IB_FUNCTION		= 0x00000002,
+	DEBUG_IB_FSTATUS		= 0x00000004,
+	DEBUG_IB_ASSERTS		= 0x00000008,
+	DEBUG_CONTROL_INFO		= 0x00000010,
+	DEBUG_CONTROL_FUNCTION		= 0x00000020,
+	DEBUG_CONTROL_PACKET		= 0x00000040,
+	DEBUG_CONFIG_INFO		= 0x00000100,
+	DEBUG_DATA_INFO 		= 0x00001000,
+	DEBUG_DATA_FUNCTION		= 0x00002000,
+	DEBUG_NETPATH_INFO		= 0x00010000,
+	DEBUG_VIPORT_INFO		= 0x00100000,
+	DEBUG_VIPORT_FUNCTION		= 0x00200000,
+	DEBUG_LINK_STATE		= 0x00400000,
+	DEBUG_VNIC_INFO 		= 0x01000000,
+	DEBUG_VNIC_FUNCTION		= 0x02000000,
+	DEBUG_SYS_INFO			= 0x10000000,
+	DEBUG_SYS_VERBOSE		= 0x40000000
+};
+
+#ifdef CONFIG_INFINIBAND_VNIC_DEBUG
+#define PRINT(level, x, fmt, arg...)					\
+	printk(level "%s: %s: %s, line %d: " fmt,			\
+	       MODULE_NAME, x, __FILE__, __LINE__, ##arg)
+
+#define PRINT_CONDITIONAL(level, x, condition, fmt, arg...)		\
+	do {								\
+		if (condition)						\
+			printk(level "%s: %s: %s, line %d: " fmt,	\
+			       MODULE_NAME, x, __FILE__, __LINE__,	\
+			       ##arg);					\
+	} while(0)
+#else
+#define PRINT(level, x, fmt, arg...)					\
+	printk(level "%s: " fmt, MODULE_NAME, ##arg)
+
+#define PRINT_CONDITIONAL(level, x, condition, fmt, arg...)		\
+	do {								\
+		 if (condition)						\
+			printk(level "%s: %s: " fmt,			\
+			       MODULE_NAME, x, ##arg);			\
+	} while(0)
+#endif	/*CONFIG_INFINIBAND_VNIC_DEBUG*/
+
+#define IB_PRINT(fmt, arg...)			\
+	PRINT(KERN_INFO, "IB", fmt, ##arg)
+#define IB_ERROR(fmt, arg...)			\
+	PRINT(KERN_ERR, "IB", fmt, ##arg)
+
+#define IB_FUNCTION(fmt, arg...) 				\
+	PRINT_CONDITIONAL(KERN_INFO, 				\
+			  "IB", 				\
+			  (vnic_debug & DEBUG_IB_FUNCTION), 	\
+			  fmt, ##arg)
+
+#define IB_INFO(fmt, arg...)					\
+	PRINT_CONDITIONAL(KERN_INFO,				\
+			  "IB",					\
+			  (vnic_debug & DEBUG_IB_INFO),		\
+			  fmt, ##arg)
+
+#define IB_ASSERT(x)							\
+	do {								\
+		 if ((vnic_debug & DEBUG_IB_ASSERTS) && !(x))		\
+			panic("%s assertion failed, file:  %s,"		\
+				" line %d: ",				\
+				MODULE_NAME,__FILE__,__LINE__)		\
+	} while(0)
+
+#define CONTROL_PRINT(fmt, arg...)			\
+	PRINT(KERN_INFO, "CONTROL", fmt, ##arg)
+#define CONTROL_ERROR(fmt, arg...)			\
+	PRINT(KERN_ERR, "CONTROL", fmt, ##arg)
+
+#define CONTROL_INFO(fmt, arg...)					\
+	PRINT_CONDITIONAL(KERN_INFO,					\
+			  "CONTROL",					\
+			  (vnic_debug & DEBUG_CONTROL_INFO),		\
+			  fmt, ##arg)
+
+#define CONTROL_FUNCTION(fmt, arg...)					\
+	PRINT_CONDITIONAL(KERN_INFO,					\
+		          "CONTROL",					\
+			  (vnic_debug & DEBUG_CONTROL_FUNCTION),	\
+			  fmt, ##arg)
+
+#define CONTROL_PACKET(pkt)					\
+	do {							\
+		 if (vnic_debug & DEBUG_CONTROL_PACKET)		\
+			control_log_control_packet(pkt);	\
+	} while(0)
+
+#define CONFIG_PRINT(fmt, arg...)		\
+	PRINT(KERN_INFO, "CONFIG", fmt, ##arg)
+#define CONFIG_ERROR(fmt, arg...)		\
+	PRINT(KERN_ERR, "CONFIG", fmt, ##arg)
+
+#define CONFIG_INFO(fmt, arg...)				\
+	PRINT_CONDITIONAL(KERN_INFO,				\
+			  "CONFIG",				\
+			  (vnic_debug & DEBUG_CONFIG_INFO),	\
+			  fmt, ##arg)
+
+#define DATA_PRINT(fmt, arg...)			\
+	PRINT(KERN_INFO, "DATA", fmt, ##arg)
+#define DATA_ERROR(fmt, arg...)			\
+	PRINT(KERN_ERR, "DATA", fmt, ##arg)
+
+#define DATA_INFO(fmt, arg...)					\
+	PRINT_CONDITIONAL(KERN_INFO,				\
+			  "DATA",				\
+			  (vnic_debug & DEBUG_DATA_INFO),	\
+			  fmt, ##arg)
+
+#define DATA_FUNCTION(fmt, arg...)				\
+	PRINT_CONDITIONAL(KERN_INFO,				\
+			  "DATA",				\
+			  (vnic_debug & DEBUG_DATA_FUNCTION),	\
+			  fmt, ##arg)
+
+#define NETPATH_PRINT(fmt, arg...)		\
+	PRINT(KERN_INFO, "NETPATH", fmt, ##arg)
+#define NETPATH_ERROR(fmt, arg...)		\
+	PRINT(KERN_ERR, "NETPATH", fmt, ##arg)
+
+#define NETPATH_INFO(fmt, arg...)				\
+	PRINT_CONDITIONAL(KERN_INFO,				\
+			  "NETPATH",				\
+			  (vnic_debug & DEBUG_NETPATH_INFO),	\
+			  fmt, ##arg)
+
+#define VIPORT_PRINT(fmt, arg...)		\
+	PRINT(KERN_INFO, "VIPORT", fmt, ##arg)
+#define VIPORT_ERROR(fmt, arg...)		\
+	PRINT(KERN_ERR, "VIPORT", fmt, ##arg)
+
+#define VIPORT_INFO(fmt, arg...) 				\
+	PRINT_CONDITIONAL(KERN_INFO,				\
+			  "VIPORT",				\
+			  (vnic_debug & DEBUG_VIPORT_INFO),	\
+			  fmt, ##arg)
+
+#define VIPORT_FUNCTION(fmt, arg...)				\
+	PRINT_CONDITIONAL(KERN_INFO,				\
+			  "VIPORT",				\
+			  (vnic_debug & DEBUG_VIPORT_FUNCTION),	\
+			  fmt, ##arg)
+
+#define LINK_STATE(fmt, arg...) 				\
+	PRINT_CONDITIONAL(KERN_INFO,				\
+			  "LINK",				\
+			  (vnic_debug & DEBUG_LINK_STATE),	\
+			  fmt, ##arg)
+
+#define VNIC_PRINT(fmt, arg...)			\
+	PRINT(KERN_INFO, "NIC", fmt, ##arg)
+#define VNIC_ERROR(fmt, arg...)			\
+	PRINT(KERN_ERR, "NIC", fmt, ##arg)
+#define VNIC_INIT(fmt, arg...)			\
+	PRINT(KERN_INFO, "NIC", fmt, ##arg)
+
+#define VNIC_INFO(fmt, arg...)					\
+	 PRINT_CONDITIONAL(KERN_INFO,				\
+			   "NIC",				\
+			   (vnic_debug & DEBUG_VNIC_INFO),	\
+			   fmt, ##arg)
+
+#define VNIC_FUNCTION(fmt, arg...)				\
+	 PRINT_CONDITIONAL(KERN_INFO,				\
+			   "NIC",				\
+			   (vnic_debug & DEBUG_VNIC_FUNCTION),	\
+			   fmt, ##arg)
+
+#define SYS_PRINT(fmt, arg...)			\
+	PRINT(KERN_INFO, "SYS", fmt, ##arg)
+#define SYS_ERROR(fmt, arg...)			\
+	PRINT(KERN_ERR, "SYS", fmt, ##arg)
+
+#define SYS_INFO(fmt, arg...)					\
+	 PRINT_CONDITIONAL(KERN_INFO,				\
+			   "SYS",				\
+			   (vnic_debug & DEBUG_SYS_INFO),	\
+			   fmt, ##arg)
+
+#endif	/* VNIC_UTIL_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_viport.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_viport.c
@@ -0,0 +1,1019 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+
+#include "vnic_util.h"
+#include "vnic_main.h"
+#include "vnic_viport.h"
+#include "vnic_netpath.h"
+#include "vnic_control.h"
+#include "vnic_data.h"
+#include "vnic_config.h"
+#include "vnic_control_pkt.h"
+
+#define VIPORT_DISCONN_TIMER	10000 /*in ms*/
+
+DECLARE_WAIT_QUEUE_HEAD(viport_queue);
+LIST_HEAD(viport_list);
+DECLARE_COMPLETION(viport_thread_exit);
+spinlock_t viport_list_lock = SPIN_LOCK_UNLOCKED;
+
+int viport_thread = -1;
+int viport_thread_end = 0;
+
+struct viport *viport_allocate(struct viport_config *config)
+{
+	struct viport *viport;
+
+	VIPORT_FUNCTION("viport_allocate()\n");
+	viport = kzalloc(sizeof *viport, GFP_KERNEL);
+	if (!viport) {
+		VIPORT_ERROR("failed allocating viport structure\n");
+		return NULL;
+	}
+
+	viport->state = VIPORT_DISCONNECTED;
+	viport->link_state = LINK_RETRYWAIT;
+	viport->connect = WAIT;
+	viport->new_mtu = 1500;
+	viport->new_flags = 0;
+	viport->config = config;
+
+	spin_lock_init(&viport->lock);
+	init_waitqueue_head(&viport->stats_queue);
+	init_waitqueue_head(&viport->disconnect_queue);
+	INIT_LIST_HEAD(&viport->list_ptrs);
+
+	viport_kick(viport);
+
+	return viport;
+}
+
+void viport_connect(struct viport * viport, int delay)
+{
+	VIPORT_FUNCTION("viport_connect()\n");
+
+	if (delay)
+		viport->connect = DELAY;
+	else
+		viport->connect = NOW;
+
+	viport_kick(viport);
+}
+
+void viport_disconnect(struct viport *viport)
+{
+	VIPORT_FUNCTION("viport_disconnect()\n");
+	viport->disconnect = 1;
+	viport_failure(viport);
+	wait_event(viport->disconnect_queue, viport->disconnect == 0);
+}
+
+void viport_free(struct viport *viport)
+{
+	VIPORT_FUNCTION("viport_free()\n");
+	viport_disconnect(viport);	/* NOTE: this can sleep */
+	kfree(viport->config);
+	kfree(viport);
+}
+
+void viport_set_link(struct viport * viport, u16 flags, u16 mtu)
+{
+	unsigned long localflags;
+
+	VIPORT_FUNCTION("viport_set_link()\n");
+	if (mtu > data_max_mtu(&viport->data)) {
+		VIPORT_ERROR("configuration error."
+			     " mtu of %d unsupported by %s\n", mtu,
+			     config_viport_name(viport->config));
+		goto failure;
+	}
+
+	spin_lock_irqsave(&viport->lock, localflags);
+	flags &= IFF_UP | IFF_ALLMULTI | IFF_PROMISC;
+	if ((viport->new_flags != flags)
+	    || (viport->new_mtu != mtu)) {
+		viport->new_flags = flags;
+		viport->new_mtu = mtu;
+		viport->updates |= NEED_LINK_CONFIG;
+		viport_kick(viport);
+	}
+
+	spin_unlock_irqrestore(&viport->lock, localflags);
+	return;
+failure:
+	viport_failure(viport);
+}
+
+int viport_set_unicast(struct viport * viport, u8 * address)
+{
+	unsigned long flags;
+	int	ret = -1;
+	VIPORT_FUNCTION("viport_set_unicast()\n");
+	spin_lock_irqsave(&viport->lock, flags);
+
+	if (!viport->mac_addresses)
+		goto out;
+
+	if (memcmp(viport->mac_addresses[UNICAST_ADDR].address,
+		   address, ETH_ALEN)) {
+		memcpy(viport->mac_addresses[UNICAST_ADDR].address,
+		       address, ETH_ALEN);
+		viport->mac_addresses[UNICAST_ADDR].operation
+		    = VNIC_OP_SET_ENTRY;
+		viport->updates |= NEED_ADDRESS_CONFIG;
+		viport_kick(viport);
+	}
+	ret = 0;
+out:
+	spin_unlock_irqrestore(&viport->lock, flags);
+	return ret;
+}
+
+int viport_set_multicast(struct viport * viport,
+			 struct dev_mc_list * mc_list, int mc_count)
+{
+	u32 old_update_list;
+	int i;
+	int ret = -1;
+	unsigned long flags;
+
+	VIPORT_FUNCTION("viport_set_multicast()\n");
+	spin_lock_irqsave(&viport->lock, flags);
+
+	if (!viport->mac_addresses)
+		goto out;
+
+	old_update_list = viport->updates;
+	if (mc_count > viport->num_mac_addresses - MCAST_ADDR_START)
+		viport->updates |= NEED_LINK_CONFIG | MCAST_OVERFLOW;
+	else {
+		if (viport->updates & MCAST_OVERFLOW) {
+			viport->updates &= ~MCAST_OVERFLOW;
+			viport->updates |= NEED_LINK_CONFIG;
+		}
+		/* brute force algorithm */
+		for (i = MCAST_ADDR_START;
+		     i < mc_count + MCAST_ADDR_START;
+		     i++, mc_list = mc_list->next) {
+			if (viport->mac_addresses[i].valid &&
+			    !memcmp(viport->mac_addresses[i].address,
+				    mc_list->dmi_addr, ETH_ALEN))
+				continue;
+			memcpy(viport->mac_addresses[i].address,
+			       mc_list->dmi_addr, ETH_ALEN);
+			viport->mac_addresses[i].valid = 1;
+			viport->mac_addresses[i].operation =
+						VNIC_OP_SET_ENTRY;
+		}
+		for (; i < viport->num_mac_addresses; i++) {
+			if (!viport->mac_addresses[i].valid)
+				continue;
+			viport->mac_addresses[i].valid = 0;
+			viport->mac_addresses[i].operation =
+						VNIC_OP_SET_ENTRY;
+		}
+		if (mc_count)
+			viport->updates |= NEED_ADDRESS_CONFIG;
+	}
+
+	if (viport->updates != old_update_list)
+		viport_kick(viport);
+	ret = 0;
+out:
+	spin_unlock_irqrestore(&viport->lock, flags);
+	return ret;
+}
+
+void viport_get_stats(struct viport * viport,
+		     struct net_device_stats * stats)
+{
+	unsigned long flags;
+
+	VIPORT_FUNCTION("viport_get_stats()\n");
+	if (jiffies > viport->last_stats_time +
+		      viport->config->stats_interval) {
+		spin_lock_irqsave(&viport->lock, flags);
+		viport->updates |= NEED_STATS;
+		spin_unlock_irqrestore(&viport->lock, flags);
+		viport_kick(viport);
+		wait_event(viport->stats_queue,
+			   !(viport->updates & NEED_STATS));
+
+		if (viport->stats.ethernet_status)
+			vnic_link_up(viport->vnic, viport->parent);
+		else
+			vnic_link_down(viport->vnic, viport->parent);
+	}
+
+	stats->rx_packets = be64_to_cpu(viport->stats.if_in_ok);
+	stats->tx_packets = be64_to_cpu(viport->stats.if_out_ok);
+	stats->rx_bytes   = be64_to_cpu(viport->stats.if_in_octets);
+	stats->tx_bytes   = be64_to_cpu(viport->stats.if_out_octets);
+	stats->rx_errors  = be64_to_cpu(viport->stats.if_in_errors);
+	stats->tx_errors  = be64_to_cpu(viport->stats.if_out_errors);
+	stats->rx_dropped = 0;	/* EIOC doesn't track */
+	stats->tx_dropped = 0;	/* EIOC doesn't track */
+	stats->multicast  = be64_to_cpu(viport->stats.if_in_nucast_pkts);
+	stats->collisions = 0;	/* EIOC doesn't track */
+}
+
+int viport_xmit_packet(struct viport * viport, struct sk_buff * skb)
+{
+	int status = -1;
+	unsigned long flags;
+
+	VIPORT_FUNCTION("viport_xmit_packet()\n");
+	spin_lock_irqsave(&viport->lock, flags);
+	if (viport->state == VIPORT_CONNECTED)
+		status = data_xmit_packet(&viport->data, skb);
+	spin_unlock_irqrestore(&viport->lock, flags);
+
+	return status;
+}
+
+void viport_kick(struct viport *viport)
+{
+	unsigned long flags;
+
+	VIPORT_FUNCTION("viport_kick()\n");
+	spin_lock_irqsave(&viport_list_lock, flags);
+	if (list_empty(&viport->list_ptrs)) {
+		list_add_tail(&viport->list_ptrs, &viport_list);
+		wake_up(&viport_queue);
+	}
+	spin_unlock_irqrestore(&viport_list_lock, flags);
+}
+
+void viport_failure(struct viport *viport)
+{
+	unsigned long flags;
+
+	VIPORT_FUNCTION("viport_failure()\n");
+	spin_lock_irqsave(&viport_list_lock, flags);
+	viport->errored = 1;
+	if (list_empty(&viport->list_ptrs)) {
+		list_add_tail(&viport->list_ptrs, &viport_list);
+		wake_up(&viport_queue);
+	}
+	spin_unlock_irqrestore(&viport_list_lock, flags);
+}
+
+static void viport_timeout(unsigned long data)
+{
+	struct viport *viport;
+
+	VIPORT_FUNCTION("viport_timeout()\n");
+	viport = (struct viport *)data;
+	viport->timer_active = 0;
+	viport_kick(viport);
+}
+
+static void viport_timer(struct viport *viport, int timeout)
+{
+	VIPORT_FUNCTION("viport_timer()\n");
+	if (viport->timer_active)
+		del_timer(&viport->timer);
+	init_timer(&viport->timer);
+	viport->timer.expires = jiffies + timeout;
+	viport->timer.data = (unsigned long)viport;
+	viport->timer.function = viport_timeout;
+	viport->timer_active = 1;
+	add_timer(&viport->timer);
+}
+
+static void viport_timer_stop(struct viport *viport)
+{
+	VIPORT_FUNCTION("viport_timer_stop()\n");
+	if (viport->timer_active)
+		del_timer(&viport->timer);
+	viport->timer_active = 0;
+}
+
+static int viport_init_mac_addresses(struct viport *viport)
+{
+	struct vnic_address_op	*temp;
+	unsigned long		flags;
+	int			i;
+
+	VIPORT_FUNCTION("viport_init_mac_addresses()\n");
+	i = viport->num_mac_addresses * sizeof *temp;
+	temp = kzalloc(viport->num_mac_addresses * sizeof *temp,
+		       GFP_KERNEL);
+	if (!temp) {
+		VIPORT_ERROR("failed allocating MAC address table\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&viport->lock, flags);
+	viport->mac_addresses = temp;
+	for (i = 0; i < viport->num_mac_addresses; i++) {
+		viport->mac_addresses[i].index = cpu_to_be16(i);
+		viport->mac_addresses[i].vlan =
+				cpu_to_be16(viport->default_vlan);
+	}
+	memset(viport->mac_addresses[BROADCAST_ADDR].address,
+	       0xFF, ETH_ALEN);
+	viport->mac_addresses[BROADCAST_ADDR].valid = 1;
+	memcpy(viport->mac_addresses[UNICAST_ADDR].address,
+	       viport->hw_mac_address, ETH_ALEN);
+	viport->mac_addresses[UNICAST_ADDR].valid = 1;
+
+	spin_unlock_irqrestore(&viport->lock, flags);
+
+	return 0;
+}
+
+static int viport_handle_init_states(struct viport *viport)
+{
+	enum link_state old_state;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_UNINITIALIZED:
+			LINK_STATE("state LINK_UNINITIALIZED\n");
+			viport->updates = 0;
+			wake_up(&viport->stats_queue);
+			/* in case of going to
+			 * uninitialized put this viport
+			 * back on the serviceQ, delete
+			 * it off again.
+			 */
+			spin_lock_irq(&viport_list_lock);
+			list_del_init(&viport->list_ptrs);
+			spin_unlock_irq(&viport_list_lock);
+			viport->disconnect = 0;
+			wake_up(&viport->disconnect_queue);
+			break;
+		case LINK_INITIALIZE:
+			LINK_STATE("state LINK_INITIALIZE\n");
+			viport->errored = 0;
+			viport->connect = WAIT;
+			viport->last_stats_time = 0;
+			if (viport->disconnect)
+				viport->link_state = LINK_UNINITIALIZED;
+			else
+				viport->link_state = LINK_INITIALIZECONTROL;
+			break;
+		case LINK_INITIALIZECONTROL:
+			LINK_STATE("state LINK_INITIALIZECONTROL\n");
+			viport->pd = ib_alloc_pd(viport->config->ibdev);
+			if (IS_ERR(viport->pd))
+				viport->link_state = LINK_DISCONNECTED;
+			else if (control_init(&viport->control, viport,
+					    &viport->config->control_config,
+					    viport->pd)) {
+				ib_dealloc_pd(viport->pd);
+				viport->link_state = LINK_DISCONNECTED;
+
+			}
+			else
+				viport->link_state = LINK_INITIALIZEDATA;
+			break;
+		case LINK_INITIALIZEDATA:
+			LINK_STATE("state LINK_INITIALIZEDATA\n");
+			if (data_init(&viport->data, viport,
+				      &viport->config->data_config,
+				      viport->pd))
+				viport->link_state = LINK_CLEANUPCONTROL;
+			else
+				viport->link_state = LINK_CONTROLCONNECT;
+			break;
+		default:
+			return -1;
+		}
+	} while (viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_handle_control_states(struct viport *viport)
+{
+	enum link_state old_state;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_CONTROLCONNECT:
+			if (vnic_ib_cm_connect(&viport->control.ib_conn))
+				viport->link_state = LINK_CLEANUPDATA;
+			else
+				viport->link_state = LINK_CONTROLCONNECTWAIT;
+			break;
+		case LINK_CONTROLCONNECTWAIT:
+			LINK_STATE("state LINK_CONTROLCONNECTWAIT\n");
+			if (control_is_connected(&viport->control))
+				viport->link_state = LINK_INITVNICREQ;
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_CONTROLDISCONNECT;
+			}
+			break;
+		case LINK_INITVNICREQ:
+			LINK_STATE("state LINK_INITVNICREQ\n");
+			if (control_init_vnic_req(&viport->control))
+				viport->link_state = LINK_RESETCONTROL;
+			else
+				viport->link_state = LINK_INITVNICRSP;
+			break;
+		case LINK_INITVNICRSP:
+			LINK_STATE("state LINK_INITVNICRSP\n");
+			control_process_async(&viport->control);
+
+			if (!control_init_vnic_rsp(&viport->control,
+						  &viport->features_supported,
+						  viport->hw_mac_address,
+						  &viport->num_mac_addresses,
+						  &viport->default_vlan)) {
+				if (viport_init_mac_addresses(viport))
+					viport->link_state =
+							LINK_RESETCONTROL;
+				else
+					viport->link_state =
+							LINK_BEGINDATAPATH;
+			}
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_RESETCONTROL;
+			}
+			break;
+		default:
+			return -1;
+		}
+	} while(viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_handle_data_states(struct viport *viport)
+{
+	enum link_state old_state;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_BEGINDATAPATH:
+			LINK_STATE("state LINK_BEGINDATAPATH\n");
+			viport->link_state = LINK_CONFIGDATAPATHREQ;
+			break;
+		case LINK_CONFIGDATAPATHREQ:
+			LINK_STATE("state LINK_CONFIGDATAPATHREQ\n");
+			if (control_config_data_path_req(&viport->control,
+						data_path_id(&viport->
+							     data),
+						data_host_pool_max
+						(&viport->data),
+						data_eioc_pool_max
+						(&viport->data)))
+				viport->link_state = LINK_RESETCONTROL;
+			else
+				viport->link_state = LINK_CONFIGDATAPATHRSP;
+			break;
+		case LINK_CONFIGDATAPATHRSP:
+			LINK_STATE("state LINK_CONFIGDATAPATHRSP\n");
+			control_process_async(&viport->control);
+
+			if (!control_config_data_path_rsp(&viport->control,
+							 data_host_pool
+							 (&viport->data),
+							 data_eioc_pool
+							 (&viport->data),
+							 data_host_pool_max
+							 (&viport->data),
+							 data_eioc_pool_max
+							 (&viport->data),
+							 data_host_pool_min
+							 (&viport->data),
+							 data_eioc_pool_min
+							 (&viport->data)))
+				viport->link_state = LINK_DATACONNECT;
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_RESETCONTROL;
+			}
+			break;
+		case LINK_DATACONNECT:
+			LINK_STATE("state LINK_DATACONNECT\n");
+			if (data_connect(&viport->data))
+				viport->link_state = LINK_RESETCONTROL;
+			else
+				viport->link_state = LINK_DATACONNECTWAIT;
+			break;
+		case LINK_DATACONNECTWAIT:
+			LINK_STATE("state LINK_DATACONNECTWAIT\n");
+			control_process_async(&viport->control);
+			if (data_is_connected(&viport->data))
+				viport->link_state = LINK_XCHGPOOLREQ;
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_RESET;
+			}
+			break;
+		default:
+			return -1;
+		}
+	} while(viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_handle_xchgpool_states(struct viport *viport)
+{
+	enum link_state old_state;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_XCHGPOOLREQ:
+			LINK_STATE("state LINK_XCHGPOOLREQ\n");
+			if (control_exchange_pools_req(&viport->control,
+						       data_local_pool_addr
+						       (&viport->data),
+						       data_local_pool_rkey
+						       (&viport->data)))
+				viport->link_state = LINK_RESET;
+			else
+				viport->link_state = LINK_XCHGPOOLRSP;
+			break;
+		case LINK_XCHGPOOLRSP:
+			LINK_STATE("state LINK_XCHGPOOLRSP\n");
+			control_process_async(&viport->control);
+
+			if (!control_exchange_pools_rsp(&viport->control,
+						       data_remote_pool_addr
+						       (&viport->data),
+						       data_remote_pool_rkey
+						       (&viport->data)))
+				viport->link_state = LINK_INITIALIZED;
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_RESET;
+			}
+			break;
+		case LINK_INITIALIZED:
+			LINK_STATE("state LINK_INITIALIZED\n");
+			viport->state = VIPORT_CONNECTED;
+			printk(KERN_INFO PFX
+			       "%s: connection established\n",
+			       config_viport_name(viport->config));
+			data_connected(&viport->data);
+			vnic_connected(viport->parent->parent,
+				       viport->parent);
+			spin_lock_irq(&viport->lock);
+			viport->mtu = 1500;
+			viport->flags = 0;
+			if ((viport->mtu != viport->new_mtu) ||
+			    (viport->flags != viport->new_flags))
+				viport->updates |= NEED_LINK_CONFIG;
+			spin_unlock_irq(&viport->lock);
+			viport->link_state = LINK_IDLE;
+			break;
+		default:
+			return -1;
+		}
+	} while(viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_handle_idle_states(struct viport *viport)
+{
+	enum link_state old_state;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_IDLE:
+			LINK_STATE("state LINK_IDLE\n");
+			if (viport->config->hb_interval)
+				viport_timer(viport,
+					     viport->config->hb_interval);
+			viport->link_state = LINK_IDLING;
+			break;
+		case LINK_IDLING:
+			LINK_STATE("state LINK_IDLING\n");
+			control_process_async(&viport->control);
+			if (viport->errored) {
+				viport_timer_stop(viport);
+				viport->errored = 0;
+				viport->link_state = LINK_RESET;
+				break;
+			}
+
+			spin_lock_irq(&viport->lock);
+			if (viport->updates & NEED_LINK_CONFIG) {
+				viport_timer_stop(viport);
+				viport->link_state = LINK_CONFIGLINKREQ;
+			} else if (viport->updates & NEED_ADDRESS_CONFIG) {
+				viport_timer_stop(viport);
+				viport->link_state = LINK_CONFIGADDRSREQ;
+			} else if (viport->updates & NEED_STATS) {
+				viport_timer_stop(viport);
+				viport->link_state = LINK_REPORTSTATREQ;
+			} else if (viport->config->hb_interval) {
+				if (!viport->timer_active)
+					viport->link_state =
+						LINK_HEARTBEATREQ;
+			}
+			spin_unlock_irq(&viport->lock);
+			break;
+		default:
+			return -1;
+		}
+	} while(viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_handle_config_states(struct viport *viport)
+{
+	enum link_state old_state;
+	int res;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_CONFIGLINKREQ:
+			LINK_STATE("state LINK_CONFIGLINKREQ\n");
+			spin_lock_irq(&viport->lock);
+			viport->updates &= ~NEED_LINK_CONFIG;
+			viport->flags = viport->new_flags;
+			if (viport->updates & MCAST_OVERFLOW)
+				viport->flags |= IFF_ALLMULTI;
+			viport->mtu = viport->new_mtu;
+			spin_unlock_irq(&viport->lock);
+			if (control_config_link_req(&viport->control,
+						    viport->flags,
+						    viport->mtu))
+				viport->link_state = LINK_RESET;
+			else
+				viport->link_state = LINK_CONFIGLINKRSP;
+			break;
+		case LINK_CONFIGLINKRSP:
+			LINK_STATE("state LINK_CONFIGLINKRSP\n");
+			control_process_async(&viport->control);
+
+			if (!control_config_link_rsp(&viport->control,
+						    &viport->flags,
+						    &viport->mtu))
+				viport->link_state = LINK_IDLE;
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_RESET;
+			}
+			break;
+		case LINK_CONFIGADDRSREQ:
+			LINK_STATE("state LINK_CONFIGADDRSREQ\n");
+
+			spin_lock_irq(&viport->lock);
+			res = control_config_addrs_req(&viport->control,
+						       viport->mac_addresses,
+						       viport->
+						       num_mac_addresses);
+
+			if (res > 0) {
+				viport->updates &= ~NEED_ADDRESS_CONFIG;
+				viport->link_state = LINK_CONFIGADDRSRSP;
+			} else if (res == 0)
+				viport->link_state = LINK_CONFIGADDRSRSP;
+			else
+				viport->link_state = LINK_RESET;
+			spin_unlock_irq(&viport->lock);
+			break;
+		case LINK_CONFIGADDRSRSP:
+			LINK_STATE("state LINK_CONFIGADDRSRSP\n");
+			control_process_async(&viport->control);
+
+			if (!control_config_addrs_rsp(&viport->control))
+				viport->link_state = LINK_IDLE;
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_RESET;
+			}
+			break;
+		default:
+			return -1;
+		}
+	} while(viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_handle_stat_states(struct viport *viport)
+{
+	enum link_state old_state;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_REPORTSTATREQ:
+			LINK_STATE("state LINK_REPORTSTATREQ\n");
+			if (control_report_statistics_req(&viport->control))
+				viport->link_state = LINK_RESET;
+			else
+				viport->link_state = LINK_REPORTSTATRSP;
+			break;
+		case LINK_REPORTSTATRSP:
+			LINK_STATE("state LINK_REPORTSTATRSP\n");
+			control_process_async(&viport->control);
+
+			spin_lock_irq(&viport->lock);
+			if (control_report_statistics_rsp(&viport->control,
+						  &viport->stats) == 0) {
+				viport->updates &= ~NEED_STATS;
+				viport->last_stats_time = jiffies;
+				wake_up(&viport->stats_queue);
+				viport->link_state = LINK_IDLE;
+			}
+
+			spin_unlock_irq(&viport->lock);
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_RESET;
+			}
+			break;
+		default:
+			return -1;
+		}
+	} while(viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_handle_heartbeat_states(struct viport *viport)
+{
+	enum link_state old_state;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_HEARTBEATREQ:
+			LINK_STATE("state LINK_HEARTBEATREQ\n");
+			if (control_heartbeat_req(&viport->control,
+						  viport->config->hb_timeout))
+				viport->link_state = LINK_RESET;
+			else
+				viport->link_state = LINK_HEARTBEATRSP;
+			break;
+		case LINK_HEARTBEATRSP:
+			LINK_STATE("state LINK_HEARTBEATRSP\n");
+			control_process_async(&viport->control);
+
+			if (!control_heartbeat_rsp(&viport->control))
+				viport->link_state = LINK_IDLE;
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_RESET;
+			}
+			break;
+		default:
+			return -1;
+		}
+	} while(viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_handle_reset_states(struct viport *viport)
+{
+	enum link_state old_state;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_RESET:
+			LINK_STATE("state LINK_RESET\n");
+			viport->errored = 0;
+			spin_lock_irq(&viport->lock);
+			viport->state = VIPORT_DISCONNECTED;
+			spin_unlock_irq(&viport->lock);
+			vnic_link_down(viport->vnic, viport->parent);
+			printk(KERN_INFO PFX
+			       "%s: connection lost\n",
+			       config_viport_name(viport->config));
+			if (control_reset_req(&viport->control))
+				viport->link_state = LINK_DATADISCONNECT;
+			else
+				viport->link_state = LINK_RESETRSP;
+			break;
+		case LINK_RESETRSP:
+			LINK_STATE("state LINK_RESETRSP\n");
+			control_process_async(&viport->control);
+
+			if (!control_reset_rsp(&viport->control))
+				viport->link_state = LINK_DATADISCONNECT;
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_DATADISCONNECT;
+			}
+			break;
+		case LINK_RESETCONTROL:
+			LINK_STATE("state LINK_RESETCONTROL\n");
+			if (control_reset_req(&viport->control))
+				viport->link_state = LINK_CONTROLDISCONNECT;
+			else
+				viport->link_state = LINK_RESETCONTROLRSP;
+			break;
+		case LINK_RESETCONTROLRSP:
+			LINK_STATE("state LINK_RESETCONTROLRSP\n");
+			control_process_async(&viport->control);
+
+			if (!control_reset_rsp(&viport->control))
+				viport->link_state = LINK_CONTROLDISCONNECT;
+
+			if (viport->errored) {
+				viport->errored = 0;
+				viport->link_state = LINK_CONTROLDISCONNECT;
+			}
+			break;
+		default:
+			return -1;
+		}
+	} while(viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_handle_disconn_states(struct viport *viport)
+{
+	enum link_state old_state;
+
+	do {
+		switch(old_state = viport->link_state) {
+		case LINK_DATADISCONNECT:
+			LINK_STATE("state LINK_DATADISCONNECT\n");
+			data_disconnect(&viport->data);
+			viport->link_state = LINK_CONTROLDISCONNECT;
+			break;
+		case LINK_CONTROLDISCONNECT:
+			LINK_STATE("state LINK_CONTROLDISCONNECT\n");
+			viport->link_state = LINK_CLEANUPDATA;
+			break;
+		case LINK_CLEANUPDATA:
+			LINK_STATE("state LINK_CLEANUPDATA\n");
+			data_cleanup(&viport->data);
+			viport->link_state = LINK_CLEANUPCONTROL;
+			break;
+		case LINK_CLEANUPCONTROL:
+			LINK_STATE("state LINK_CLEANUPCONTROL\n");
+			spin_lock_irq(&viport->lock);
+			if (viport->mac_addresses) {
+				kfree(viport->mac_addresses);
+				viport->mac_addresses = NULL;
+			}
+			spin_unlock_irq(&viport->lock);
+			control_cleanup(&viport->control);
+			ib_dealloc_pd(viport->pd);
+			viport->link_state = LINK_DISCONNECTED;
+			break;
+		case LINK_DISCONNECTED:
+			LINK_STATE("state LINK_DISCONNECTED\n");
+			vnic_disconnected(viport->parent->parent,
+					  viport->parent);
+			if (viport->disconnect != 0)
+				viport->link_state = LINK_UNINITIALIZED;
+			else {
+				viport_timer(viport,
+				     msecs_to_jiffies(VIPORT_DISCONN_TIMER));
+				viport->link_state = LINK_RETRYWAIT;
+			}
+			break;
+		case LINK_RETRYWAIT:
+			LINK_STATE("state LINK_RETRYWAIT\n");
+			viport->stats.ethernet_status = 0;
+			viport->updates = 0;
+			wake_up(&viport->stats_queue);
+			if (viport->disconnect != 0) {
+				viport_timer_stop(viport);
+				viport->link_state = LINK_UNINITIALIZED;
+			} else if (viport->connect == DELAY) {
+				if (!viport->timer_active) {
+					viport->link_state = LINK_INITIALIZE;
+				}
+			} else if (viport->connect == NOW) {
+				viport_timer_stop(viport);
+				viport->link_state = LINK_INITIALIZE;
+			}
+			break;
+		default:
+			return -1;
+		}
+	} while(viport->link_state != old_state);
+
+	return 0;
+}
+
+static int viport_statemachine(void *context)
+{
+	struct viport *viport;
+	enum link_state old_link_state;
+
+	VIPORT_FUNCTION("viport_statemachine()\n");
+	daemonize("vnic_viport");
+	while (!viport_thread_end || !list_empty(&viport_list)) {
+		wait_event_interruptible(viport_queue,
+					 !list_empty(&viport_list)
+					 || viport_thread_end);
+		spin_lock_irq(&viport_list_lock);
+		if (list_empty(&viport_list)) {
+			spin_unlock_irq(&viport_list_lock);
+			continue;
+		}
+		viport = list_entry(viport_list.next, struct viport,
+				    list_ptrs);
+		list_del_init(&viport->list_ptrs);
+		spin_unlock_irq(&viport_list_lock);
+
+		do {
+			old_link_state = viport->link_state;
+
+			/*
+			 * Optimize for the state machine steady state
+			 * by checking for the most common states first.
+			 *
+			 */
+			if (viport_handle_idle_states(viport) == 0)
+				break;
+			if (viport_handle_heartbeat_states(viport) == 0)
+				break;
+			if (viport_handle_stat_states(viport) == 0)
+				break;
+			if (viport_handle_config_states(viport) == 0)
+				break;
+
+			if (viport_handle_init_states(viport) == 0)
+				break;
+			if (viport_handle_control_states(viport) == 0)
+				break;
+			if (viport_handle_data_states(viport) == 0)
+				break;
+			if (viport_handle_xchgpool_states(viport) == 0)
+				break;
+			if (viport_handle_reset_states(viport) == 0)
+				break;
+			if (viport_handle_disconn_states(viport) == 0)
+				break;
+		} while (viport->link_state != old_link_state);
+	}
+
+	complete_and_exit(&viport_thread_exit, 0);
+}
+
+int viport_start(void)
+{
+	VIPORT_FUNCTION("viport_start()\n");
+
+	viport_thread = kernel_thread(viport_statemachine, NULL, 0);
+	if (viport_thread < 0) {
+		printk(KERN_WARNING PFX "Could not create viport_thread;"
+		       " error %d\n", viport_thread);
+		return viport_thread;
+	}
+
+	return 0;
+}
+
+void viport_cleanup(void)
+{
+	VIPORT_FUNCTION("viport_cleanup()\n");
+	if (viport_thread > 0) {
+		viport_thread_end = 1;
+		wake_up(&viport_queue);
+		wait_for_completion(&viport_thread_exit);
+	}
+}
--- linux-2.6.18.noarch/drivers/infiniband/ulp/vnic/vnic_viport.h
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/ulp/vnic/vnic_viport.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef VNIC_VIPORT_H_INCLUDED
+#define VNIC_VIPORT_H_INCLUDED
+
+#include "vnic_control.h"
+#include "vnic_data.h"
+
+enum viport_state {
+	VIPORT_DISCONNECTED	= 0,
+	VIPORT_CONNECTED	= 1
+};
+
+enum link_state {
+	LINK_UNINITIALIZED	= 0,
+	LINK_INITIALIZE		= 1,
+	LINK_INITIALIZECONTROL	= 2,
+	LINK_INITIALIZEDATA	= 3,
+	LINK_CONTROLCONNECT	= 4,
+	LINK_CONTROLCONNECTWAIT	= 5,
+	LINK_INITVNICREQ	= 6,
+	LINK_INITVNICRSP	= 7,
+	LINK_BEGINDATAPATH	= 8,
+	LINK_CONFIGDATAPATHREQ	= 9,
+	LINK_CONFIGDATAPATHRSP	= 10,
+	LINK_DATACONNECT	= 11,
+	LINK_DATACONNECTWAIT	= 12,
+	LINK_XCHGPOOLREQ	= 13,
+	LINK_XCHGPOOLRSP	= 14,
+	LINK_INITIALIZED	= 15,
+	LINK_IDLE		= 16,
+	LINK_IDLING		= 17,
+	LINK_CONFIGLINKREQ	= 18,
+	LINK_CONFIGLINKRSP	= 19,
+	LINK_CONFIGADDRSREQ	= 20,
+	LINK_CONFIGADDRSRSP	= 21,
+	LINK_REPORTSTATREQ	= 22,
+	LINK_REPORTSTATRSP	= 23,
+	LINK_HEARTBEATREQ	= 24,
+	LINK_HEARTBEATRSP	= 25,
+	LINK_RESET		= 26,
+	LINK_RESETRSP		= 27,
+	LINK_RESETCONTROL	= 28,
+	LINK_RESETCONTROLRSP	= 29,
+	LINK_DATADISCONNECT	= 30,
+	LINK_CONTROLDISCONNECT	= 31,
+	LINK_CLEANUPDATA	= 32,
+	LINK_CLEANUPCONTROL	= 33,
+	LINK_DISCONNECTED	= 34,
+	LINK_RETRYWAIT		= 35
+};
+
+enum {
+	BROADCAST_ADDR		= 0,
+	UNICAST_ADDR		= 1,
+	MCAST_ADDR_START	= 2
+};
+
+#define current_mac_address	mac_addresses[UNICAST_ADDR].address
+
+enum {
+	NEED_STATS           = 0x00000001,
+	NEED_ADDRESS_CONFIG  = 0x00000002,
+	NEED_LINK_CONFIG     = 0x00000004,
+	MCAST_OVERFLOW       = 0x00000008
+};
+
+struct viport {
+	struct list_head		list_ptrs;
+	struct netpath			*parent;
+	struct vnic			*vnic;
+	struct viport_config		*config;
+	struct control			control;
+	struct data			data;
+	spinlock_t			lock;
+	struct ib_pd			*pd;
+	enum viport_state		state;
+	enum link_state			link_state;
+	struct vnic_cmd_report_stats_rsp stats;
+	wait_queue_head_t		stats_queue;
+	u32				last_stats_time;
+	u32				features_supported;
+	u8				hw_mac_address[ETH_ALEN];
+	u16				default_vlan;
+	u16				num_mac_addresses;
+	struct vnic_address_op		*mac_addresses;
+	u32				updates;
+	u16				flags;
+	u16				new_flags;
+	u16				mtu;
+	u16				new_mtu;
+	u32				errored;
+	enum { WAIT, DELAY, NOW }	connect;
+	u32				disconnect;
+	wait_queue_head_t		disconnect_queue;
+	int				timer_active;
+	struct timer_list		timer;
+};
+
+int  viport_start(void);
+void viport_cleanup(void);
+
+struct viport *viport_allocate(struct viport_config *config);
+void viport_free(struct viport *viport);
+
+void viport_connect(struct viport *viport, int delay);
+void viport_disconnect(struct viport *viport);
+
+void viport_set_link(struct viport *viport, u16 flags, u16 mtu);
+void viport_get_stats(struct viport *viport,
+		      struct net_device_stats *stats);
+int  viport_xmit_packet(struct viport *viport, struct sk_buff *skb);
+void viport_kick(struct viport *viport);
+
+void viport_failure(struct viport *viport);
+
+int viport_set_unicast(struct viport *viport, u8 * address);
+int viport_set_multicast(struct viport *viport,
+			 struct dev_mc_list *mc_list,
+			 int mc_count);
+
+#define viport_max_mtu(viport)		data_max_mtu(&(viport)->data)
+
+#define viport_get_hw_addr(viport, address)			\
+	memcpy(address, (viport)->hw_mac_address, ETH_ALEN)
+
+#define viport_features(viport) ((viport)->features_supported)
+
+#define viport_can_tx_csum(viport)				\
+	(((viport)->features_supported & 			\
+	(VNIC_FEAT_IPV4_CSUM_TX | VNIC_FEAT_TCP_CSUM_TX |	\
+	VNIC_FEAT_UDP_CSUM_TX)) == (VNIC_FEAT_IPV4_CSUM_TX |	\
+	VNIC_FEAT_TCP_CSUM_TX | VNIC_FEAT_UDP_CSUM_TX))
+
+#endif /* VNIC_VIPORT_H_INCLUDED */
--- linux-2.6.18.noarch/drivers/infiniband/util/madeye.c
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/util/madeye.c
@@ -372,17 +372,17 @@ static void print_smp(struct ib_smp *smp
 
 	if (data) {
 		for (i = 0; i < IB_SMP_DATA_SIZE; i++) {
-			if (i % 16 == 0) 
+			if (i % 16 == 0)
 				printk("\nSMP Data.......");
 			printk("%01x ", smp->data[i]);
 		}
 		for (i = 0; i < IB_SMP_MAX_PATH_HOPS; i++) {
-			if (i % 16 == 0) 
+			if (i % 16 == 0)
 				printk("\nInitial path...");
 			printk("%01x ", smp->initial_path[i]);
 		}
 		for (i = 0; i < IB_SMP_MAX_PATH_HOPS; i++) {
-			if (i % 16 == 0) 
+			if (i % 16 == 0)
 				printk("\nReturn path....");
 			printk("%01x ", smp->return_path[i]);
 		}
@@ -503,7 +503,7 @@ static void recv_gsi_handler(struct ib_m
 			}
 		}
 		for (i = 0; i < j; i++) {
-			if (i % 16 == 0) 
+			if (i % 16 == 0)
 				printk("\nData...........");
 			printk("%01x ", mad_data[i]);
 		}
@@ -517,7 +517,7 @@ static void madeye_add_one(struct ib_dev
 	int reg_flags;
 	u8 i, s, e;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
@@ -530,7 +530,7 @@ static void madeye_add_one(struct ib_dev
 		goto out;
 
 	reg_flags = IB_MAD_SNOOP_SEND_COMPLETIONS | IB_MAD_SNOOP_RECVS;
-	for (i = s; i <= e; i++) {
+	for (i = 0; i <= e - s; i++) {
 		port[i].smi_agent = ib_register_mad_snoop(device, i,
 							  IB_QPT_SMI,
 							  reg_flags,
@@ -559,7 +559,7 @@ static void madeye_remove_one(struct ib_
 	if (!port)
 		return;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
@@ -567,7 +567,7 @@ static void madeye_remove_one(struct ib_
 		e = device->phys_port_cnt;
 	}
 
-	for (i = s; i <= e; i++) {
+	for (i = 0; i <= e - s; i++) {
 		if (!IS_ERR(port[i].smi_agent))
 			ib_unregister_mad_agent(port[i].smi_agent);
 		if (!IS_ERR(port[i].gsi_agent))
--- linux-2.6.18.noarch/drivers/infiniband/util/Makefile
+++ linux-2.6.18.noarch.ofed/drivers/infiniband/util/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_INFINIBAND_MADEYE)	+= ib_madeye.o
 
-ib_madeye-y := madeye.o 
+ib_madeye-y := madeye.o
--- linux-2.6.18.noarch/include/rdma/ib_addr.h
+++ linux-2.6.18.noarch.ofed/include/rdma/ib_addr.h
@@ -36,11 +36,27 @@
 #include <linux/socket.h>
 #include <rdma/ib_verbs.h>
 
+struct rdma_addr_client {
+	atomic_t refcount;
+	struct completion comp;
+};
+
+/**
+ * rdma_addr_register_client - Register an address client.
+ */
+void rdma_addr_register_client(struct rdma_addr_client *client);
+
+/**
+ * rdma_addr_unregister_client - Deregister an address client.
+ * @client: Client object to deregister.
+ */
+void rdma_addr_unregister_client(struct rdma_addr_client *client);
+
 struct rdma_dev_addr {
 	unsigned char src_dev_addr[MAX_ADDR_LEN];
 	unsigned char dst_dev_addr[MAX_ADDR_LEN];
 	unsigned char broadcast[MAX_ADDR_LEN];
-	enum ib_node_type dev_type;
+	enum rdma_node_type dev_type;
 };
 
 /**
@@ -52,6 +68,7 @@ int rdma_translate_ip(struct sockaddr *a
 /**
  * rdma_resolve_ip - Resolve source and destination IP addresses to
  *   RDMA hardware addresses.
+ * @client: Address client associated with request.
  * @src_addr: An optional source address to use in the resolution.  If a
  *   source address is not provided, a usable address will be returned via
  *   the callback.
@@ -64,7 +81,8 @@ int rdma_translate_ip(struct sockaddr *a
  *   or been canceled.  A status of 0 indicates success.
  * @context: User-specified context associated with the call.
  */
-int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct rdma_addr_client *client,
+		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
@@ -72,6 +90,9 @@ int rdma_resolve_ip(struct sockaddr *src
 
 void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+	      const unsigned char *dst_dev_addr);
+
 static inline int ip_addr_size(struct sockaddr *addr)
 {
 	return addr->sa_family == AF_INET6 ?
@@ -89,6 +110,12 @@ static inline void ib_addr_set_pkey(stru
 	dev_addr->broadcast[9] = (unsigned char) pkey;
 }
 
+static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
+}
+
 static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
 				    union ib_gid *gid)
 {
@@ -113,4 +140,16 @@ static inline void ib_addr_set_dgid(stru
 	memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
 }
 
+static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
+}
+
+static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
+}
+
 #endif /* IB_ADDR_H */
--- linux-2.6.18.noarch/include/rdma/ib_cm.h
+++ linux-2.6.18.noarch.ofed/include/rdma/ib_cm.h
@@ -60,6 +60,7 @@ enum ib_cm_state {
 };
 
 enum ib_cm_lap_state {
+	IB_CM_LAP_UNINIT,
 	IB_CM_LAP_IDLE,
 	IB_CM_LAP_SENT,
 	IB_CM_LAP_RCVD,
@@ -443,13 +444,20 @@ int ib_send_cm_drep(struct ib_cm_id *cm_
 		    u8 private_data_len);
 
 /**
- * ib_cm_establish - Forces a connection state to established.
+ * ib_cm_notify - Notifies the CM of an event reported to the consumer.
  * @cm_id: Connection identifier to transition to established.
+ * @event: Type of event.
  *
- * This routine should be invoked by users who receive messages on a
- * connected QP before an RTU has been received.
+ * This routine should be invoked by users to notify the CM of relevant
+ * communication events.  Events that should be reported to the CM and
+ * when to report them are:
+ *
+ * IB_EVENT_COMM_EST - Used when a message is received on a connected
+ *    QP before an RTU has been received.
+ * IB_EVENT_PATH_MIG - Notifies the CM that the connection has failed over
+ *   to the alternate path.
  */
-int ib_cm_establish(struct ib_cm_id *cm_id);
+int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event);
 
 /**
  * ib_send_cm_rej - Sends a connection rejection message to the
--- linux-2.6.18.noarch/include/rdma/ib_local_sa.h
+++ linux-2.6.18.noarch.ofed/include/rdma/ib_local_sa.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_LOCAL_SA_H
+#define IB_LOCAL_SA_H
+
+#include <rdma/ib_sa.h>
+
+/**
+ * ib_get_path_rec - Query the local SA database for path information.
+ * @device: The local device to query.
+ * @port_num: The port of the local device being queried.
+ * @sgid: The source GID of the path record.
+ * @dgid: The destination GID of the path record.
+ * @pkey: The protection key of the path record.
+ * @rec: A reference to a path record structure that will receive a copy of
+ *   the response.
+ *
+ * Returns a copy of a path record meeting the specified criteria to the
+ * location referenced by %rec.  A return value < 0 indicates that an error
+ * occurred processing the request, or no path record was found.
+ */
+int ib_get_path_rec(struct ib_device *device, u8 port_num, union ib_gid *sgid,
+		    union ib_gid *dgid, u16 pkey, struct ib_sa_path_rec *rec);
+
+/**
+ * ib_create_path_iter - Create an iterator that may be used to walk through
+ *   a list of path records.
+ * @device: The local device to retrieve path records for.
+ * @port_num: The port of the local device.
+ * @dgid: The destination GID of the path record.
+ *
+ * This call allocates an iterator that is used to walk through a list of
+ * cached path records.  All path records accessed by the iterator will have the
+ * specified DGID.  User should not hold the iterator for an extended period of
+ * time, and must free it by calling ib_free_sa_iter.
+ */
+struct ib_sa_iterator *ib_create_path_iter(struct ib_device *device,
+					   u8 port_num, union ib_gid *dgid);
+
+/**
+ * ib_free_sa_iter - Release an iterator.
+ * @iter: The iterator to free.
+ */
+void ib_free_sa_iter(struct ib_sa_iterator *iter);
+
+/**
+ * ib_get_next_sa_attr - Retrieve the next SA attribute referenced by an
+ *   iterator.
+ * @iter: A reference to an iterator that points to the next attribute to
+ *   retrieve.
+ */
+void *ib_get_next_sa_attr(struct ib_sa_iterator **iter);
+
+#endif /* IB_LOCAL_SA_H */
--- linux-2.6.18.noarch/include/rdma/ib_marshall.h
+++ linux-2.6.18.noarch.ofed/include/rdma/ib_marshall.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -41,6 +41,9 @@
 void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
 			     struct ib_qp_attr *src);
 
+void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
+			     struct ib_ah_attr *src);
+
 void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
 			      struct ib_sa_path_rec *src);
 
--- linux-2.6.18.noarch/include/rdma/ib_sa.h
+++ linux-2.6.18.noarch.ofed/include/rdma/ib_sa.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -36,8 +37,11 @@
 #ifndef IB_SA_H
 #define IB_SA_H
 
+#include <linux/completion.h>
 #include <linux/compiler.h>
 
+#include <asm/atomic.h>
+
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_mad.h>
 
@@ -250,11 +254,28 @@ struct ib_sa_service_rec {
 	u64		data64[2];
 };
 
+struct ib_sa_client {
+	atomic_t users;
+	struct completion comp;
+};
+
+/**
+ * ib_sa_register_client - Register an SA client.
+ */
+void ib_sa_register_client(struct ib_sa_client *client);
+
+/**
+ * ib_sa_unregister_client - Deregister an SA client.
+ * @client: Client object to deregister.
+ */
+void ib_sa_unregister_client(struct ib_sa_client *client);
+
 struct ib_sa_query;
 
 void ib_sa_cancel_query(int id, struct ib_sa_query *query);
 
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@@ -264,18 +285,8 @@ int ib_sa_path_rec_get(struct ib_device 
 		       void *context,
 		       struct ib_sa_query **query);
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
-			     u8 method,
-			     struct ib_sa_mcmember_rec *rec,
-			     ib_sa_comp_mask comp_mask,
-			     int timeout_ms, gfp_t gfp_mask,
-			     void (*callback)(int status,
-					      struct ib_sa_mcmember_rec *resp,
-					      void *context),
-			     void *context,
-			     struct ib_sa_query **query);
-
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			 struct ib_device *device, u8 port_num,
 			 u8 method,
 			 struct ib_sa_service_rec *rec,
 			 ib_sa_comp_mask comp_mask,
@@ -286,89 +297,87 @@ int ib_sa_service_rec_query(struct ib_de
 			 void *context,
 			 struct ib_sa_query **sa_query);
 
+struct ib_sa_multicast {
+	struct ib_sa_mcmember_rec rec;
+	ib_sa_comp_mask		comp_mask;
+	int			(*callback)(int status,
+					    struct ib_sa_multicast *multicast);
+	void			*context;
+};
+
 /**
- * ib_sa_mcmember_rec_set - Start an MCMember set query
- * @device:device to send query on
- * @port_num: port number to send query on
- * @rec:MCMember Record to send in query
- * @comp_mask:component mask to send in query
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when query completes, times out or is
- * canceled
- * @context:opaque user context passed to callback
- * @sa_query:query context, used to cancel query
- *
- * Send an MCMember Set query to the SA (eg to join a multicast
- * group).  The callback function will be called when the query
- * completes (or fails); status is 0 for a successful response, -EINTR
- * if the query is canceled, -ETIMEDOUT is the query timed out, or
- * -EIO if an error occurred sending the query.  The resp parameter of
- * the callback is only valid if status is 0.
- *
- * If the return value of ib_sa_mcmember_rec_set() is negative, it is
- * an error code.  Otherwise it is a query ID that can be used to
- * cancel the query.
+ * ib_sa_join_multicast - Initiates a join request to the specified multicast
+ *   group.
+ * @client: SA client
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ *   group.
+ * @rec: SA multicast member record specifying group attributes.
+ * @comp_mask: Component mask indicating which group attributes of %rec are
+ *   valid.
+ * @gfp_mask: GFP mask for memory allocations.
+ * @callback: User callback invoked once the join operation completes.
+ * @context: User specified context stored with the ib_sa_multicast structure.
+ *
+ * This call initiates a multicast join request with the SA for the specified
+ * multicast group.  If the join operation is started successfully, it returns
+ * an ib_sa_multicast structure that is used to track the multicast operation.
+ * Users must free this structure by calling ib_free_multicast, even if the
+ * join operation later fails.  (The callback status is non-zero.)
+ *
+ * If the join operation fails; status will be non-zero, with the following
+ * failures possible:
+ * -ETIMEDOUT: The request timed out.
+ * -EIO: An error occurred sending the query.
+ * -EINVAL: The MCMemberRecord values differed from the existing group's.
+ * -ENETRESET: Indicates that an fatal error has occurred on the multicast
+ *   group, and the user must rejoin the group to continue using it.
  */
-static inline int
-ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
-		       struct ib_sa_mcmember_rec *rec,
-		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, gfp_t gfp_mask,
-		       void (*callback)(int status,
-					struct ib_sa_mcmember_rec *resp,
-					void *context),
-		       void *context,
-		       struct ib_sa_query **query)
-{
-	return ib_sa_mcmember_rec_query(device, port_num,
-					IB_MGMT_METHOD_SET,
-					rec, comp_mask,
-					timeout_ms, gfp_mask, callback,
-					context, query);
-}
+struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client,
+					     struct ib_device *device, u8 port_num,
+					     struct ib_sa_mcmember_rec *rec,
+					     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+					     int (*callback)(int status,
+							     struct ib_sa_multicast
+								    *multicast),
+					     void *context);
 
 /**
- * ib_sa_mcmember_rec_delete - Start an MCMember delete query
- * @device:device to send query on
- * @port_num: port number to send query on
- * @rec:MCMember Record to send in query
- * @comp_mask:component mask to send in query
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when query completes, times out or is
- * canceled
- * @context:opaque user context passed to callback
- * @sa_query:query context, used to cancel query
- *
- * Send an MCMember Delete query to the SA (eg to leave a multicast
- * group).  The callback function will be called when the query
- * completes (or fails); status is 0 for a successful response, -EINTR
- * if the query is canceled, -ETIMEDOUT is the query timed out, or
- * -EIO if an error occurred sending the query.  The resp parameter of
- * the callback is only valid if status is 0.
- *
- * If the return value of ib_sa_mcmember_rec_delete() is negative, it
- * is an error code.  Otherwise it is a query ID that can be used to
- * cancel the query.
+ * ib_free_multicast - Frees the multicast tracking structure, and releases
+ *    any reference on the multicast group.
+ * @multicast: Multicast tracking structure allocated by ib_join_multicast.
+ *
+ * This call blocks until the multicast identifier is destroyed.  It may
+ * not be called from within the multicast callback; however, returning a non-
+ * zero value from the callback will result in destroying the multicast
+ * tracking structure.
  */
-static inline int
-ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
-			  struct ib_sa_mcmember_rec *rec,
-			  ib_sa_comp_mask comp_mask,
-			  int timeout_ms, gfp_t gfp_mask,
-			  void (*callback)(int status,
-					   struct ib_sa_mcmember_rec *resp,
-					   void *context),
-			  void *context,
-			  struct ib_sa_query **query)
-{
-	return ib_sa_mcmember_rec_query(device, port_num,
-					IB_SA_METHOD_DELETE,
-					rec, comp_mask,
-					timeout_ms, gfp_mask, callback,
-					context, query);
-}
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast);
+
+/**
+ * ib_get_mcmember_rec - Looks up a multicast member record by its MGID and
+ *   returns it if found.
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ *   group.
+ * @mgid: optional MGID of multicast group.
+ * @rec: Location to copy SA multicast member record.
+ *
+ * If an MGID is specified, returns an existing multicast member record if
+ * one is found for the local port.  If no MGID is specified, or the specified
+ * MGID is 0, returns a multicast member record filled in with default values
+ * that may be used to create a new multicast group.
+ */
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+			   union ib_gid *mgid, struct ib_sa_mcmember_rec *rec);
+
+/**
+ * ib_init_ah_from_mcmember - Initialize address handle attributes based on
+ * an SA multicast member record.
+ */
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+			     struct ib_sa_mcmember_rec *rec,
+			     struct ib_ah_attr *ah_attr);
 
 /**
  * ib_init_ah_from_path - Initialize address handle attributes based on an SA
@@ -378,4 +387,7 @@ int ib_init_ah_from_path(struct ib_devic
 			 struct ib_sa_path_rec *rec,
 			 struct ib_ah_attr *ah_attr);
 
+int ib_sa_pack_attr(void *dst, void *src, int attr_id);
+int ib_sa_unpack_attr(void *dst, void *src, int attr_id);
+
 #endif /* IB_SA_H */
--- linux-2.6.18.noarch/include/rdma/ib_user_cm.h
+++ linux-2.6.18.noarch.ofed/include/rdma/ib_user_cm.h
@@ -38,7 +38,7 @@
 
 #include <rdma/ib_user_sa.h>
 
-#define IB_USER_CM_ABI_VERSION 4
+#define IB_USER_CM_ABI_VERSION 5
 
 enum {
 	IB_USER_CM_CMD_CREATE_ID,
@@ -46,7 +46,7 @@ enum {
 	IB_USER_CM_CMD_ATTR_ID,
 
 	IB_USER_CM_CMD_LISTEN,
-	IB_USER_CM_CMD_ESTABLISH,
+	IB_USER_CM_CMD_NOTIFY,
 
 	IB_USER_CM_CMD_SEND_REQ,
 	IB_USER_CM_CMD_SEND_REP,
@@ -117,8 +117,9 @@ struct ib_ucm_listen {
 	__u32 reserved;
 };
 
-struct ib_ucm_establish {
+struct ib_ucm_notify {
 	__u32 id;
+	__u32 event;
 };
 
 struct ib_ucm_private_data {
--- linux-2.6.18.noarch/include/rdma/ib_user_verbs.h
+++ linux-2.6.18.noarch.ofed/include/rdma/ib_user_verbs.h
@@ -275,6 +275,8 @@ struct ib_uverbs_resize_cq {
 
 struct ib_uverbs_resize_cq_resp {
 	__u32 cqe;
+	__u32 reserved;
+	__u64 driver_data[0];
 };
 
 struct ib_uverbs_poll_cq {
@@ -456,7 +458,7 @@ struct ib_uverbs_query_qp_resp {
 	__u8  cur_qp_state;
 	__u8  path_mtu;
 	__u8  path_mig_state;
-	__u8  en_sqd_async_notify;
+	__u8  sq_draining;
 	__u8  max_rd_atomic;
 	__u8  max_dest_rd_atomic;
 	__u8  min_rnr_timer;
--- linux-2.6.18.noarch/include/rdma/ib_verbs.h
+++ linux-2.6.18.noarch.ofed/include/rdma/ib_verbs.h
@@ -41,12 +41,16 @@
 #if !defined(IB_VERBS_H)
 #define IB_VERBS_H
 
+#include <linux/pci.h>
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/atomic.h>
 #include <asm/scatterlist.h>
 #include <asm/uaccess.h>
+#include <linux/kref.h>
 
 union ib_gid {
 	u8	raw[16];
@@ -56,12 +60,22 @@ union ib_gid {
 	} global;
 };
 
-enum ib_node_type {
-	IB_NODE_CA 	= 1,
-	IB_NODE_SWITCH,
-	IB_NODE_ROUTER
+enum rdma_node_type {
+	/* IB values map to NodeInfo:NodeType. */
+	RDMA_NODE_IB_CA 	= 1,
+	RDMA_NODE_IB_SWITCH,
+	RDMA_NODE_IB_ROUTER,
+	RDMA_NODE_RNIC
 };
 
+enum rdma_transport_type {
+	RDMA_TRANSPORT_IB,
+	RDMA_TRANSPORT_IWARP
+};
+
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
+
 enum ib_device_cap_flags {
 	IB_DEVICE_RESIZE_MAX_WR		= 1,
 	IB_DEVICE_BAD_PKEY_CNTR		= (1<<1),
@@ -78,6 +92,9 @@ enum ib_device_cap_flags {
 	IB_DEVICE_RC_RNR_NAK_GEN	= (1<<12),
 	IB_DEVICE_SRQ_RESIZE		= (1<<13),
 	IB_DEVICE_N_NOTIFY_CQ		= (1<<14),
+	IB_DEVICE_ZERO_STAG		= (1<<15),
+	IB_DEVICE_SEND_W_INV		= (1<<16),
+	IB_DEVICE_MEM_WINDOW		= (1<<17)
 };
 
 enum ib_atomic_cap {
@@ -404,8 +421,8 @@ struct ib_wc {
 	enum ib_wc_opcode	opcode;
 	u32			vendor_err;
 	u32			byte_len;
+	struct ib_qp	       *qp;
 	__be32			imm_data;
-	u32			qp_num;
 	u32			src_qp;
 	int			wc_flags;
 	u16			pkey_index;
@@ -835,6 +852,51 @@ struct ib_cache {
 	u8                     *lmc_cache;
 };
 
+struct ib_dma_mapping_ops {
+	int		(*mapping_error)(struct ib_device *dev,
+					 u64 dma_addr);
+	u64		(*map_single)(struct ib_device *dev,
+				      void *ptr, size_t size,
+				      enum dma_data_direction direction);
+	void		(*unmap_single)(struct ib_device *dev,
+					u64 addr, size_t size,
+					enum dma_data_direction direction);
+	u64		(*map_page)(struct ib_device *dev,
+				    struct page *page, unsigned long offset,
+				    size_t size,
+				    enum dma_data_direction direction);
+	void		(*unmap_page)(struct ib_device *dev,
+				      u64 addr, size_t size,
+				      enum dma_data_direction direction);
+	int		(*map_sg)(struct ib_device *dev,
+				  struct scatterlist *sg, int nents,
+				  enum dma_data_direction direction);
+	void		(*unmap_sg)(struct ib_device *dev,
+				    struct scatterlist *sg, int nents,
+				    enum dma_data_direction direction);
+	u64		(*dma_address)(struct ib_device *dev,
+				       struct scatterlist *sg);
+	unsigned int	(*dma_len)(struct ib_device *dev,
+				   struct scatterlist *sg);
+	void		(*sync_single_for_cpu)(struct ib_device *dev,
+					       u64 dma_handle,
+					       size_t size,
+				               enum dma_data_direction dir);
+	void		(*sync_single_for_device)(struct ib_device *dev,
+						  u64 dma_handle,
+						  size_t size,
+						  enum dma_data_direction dir);
+	void		*(*alloc_coherent)(struct ib_device *dev,
+					   size_t size,
+					   u64 *dma_handle,
+					   gfp_t flag);
+	void		(*free_coherent)(struct ib_device *dev,
+					 size_t size, void *cpu_addr,
+					 u64 dma_handle);
+};
+
+struct iw_cm_verbs;
+
 struct ib_device {
 	struct device                *dma_device;
 
@@ -851,6 +913,8 @@ struct ib_device {
 
 	u32                           flags;
 
+	struct iw_cm_verbs	     *iwcm;
+
 	int		           (*query_device)(struct ib_device *device,
 						   struct ib_device_attr *device_attr);
 	int		           (*query_port)(struct ib_device *device,
@@ -888,7 +952,8 @@ struct ib_device {
 						 struct ib_udata *udata);
 	int                        (*modify_srq)(struct ib_srq *srq,
 						 struct ib_srq_attr *srq_attr,
-						 enum ib_srq_attr_mask srq_attr_mask);
+						 enum ib_srq_attr_mask srq_attr_mask,
+						 struct ib_udata *udata);
 	int                        (*query_srq)(struct ib_srq *srq,
 						struct ib_srq_attr *srq_attr);
 	int                        (*destroy_srq)(struct ib_srq *srq);
@@ -900,7 +965,8 @@ struct ib_device {
 						struct ib_udata *udata);
 	int                        (*modify_qp)(struct ib_qp *qp,
 						struct ib_qp_attr *qp_attr,
-						int qp_attr_mask);
+						int qp_attr_mask,
+						struct ib_udata *udata);
 	int                        (*query_qp)(struct ib_qp *qp,
 					       struct ib_qp_attr *qp_attr,
 					       int qp_attr_mask,
@@ -973,6 +1039,8 @@ struct ib_device {
 						  struct ib_mad *in_mad,
 						  struct ib_mad *out_mad);
 
+	struct ib_dma_mapping_ops   *dma_ops;
+
 	struct module               *owner;
 	struct class_device          class_dev;
 	struct kobject               ports_parent;
@@ -991,6 +1059,8 @@ struct ib_device {
 	__be64			     node_guid;
 	u8                           node_type;
 	u8                           phys_port_cnt;
+	int                          *pkey_tbl_len;
+	int                          *gid_tbl_len;
 };
 
 struct ib_client {
@@ -1067,6 +1137,29 @@ int ib_modify_port(struct ib_device *dev
 		   struct ib_port_modify *port_modify);
 
 /**
+ * ib_find_gid - Returns the port number and GID table index where
+ *   a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the GID table where the GID was found.  This
+ *   parameter may be NULL.
+ */
+int ib_find_gid(struct ib_device *device, union ib_gid *gid,
+			u8 *port_num, u16 *index);
+
+/**
+ * ib_find_pkey - Returns the PKey table index where a specified
+ *   PKey value occurs.
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the PKey table where the PKey was found.
+ */
+int ib_find_pkey(struct ib_device *device,
+			u8 port_num, u16 pkey, u16 *index);
+
+/**
  * ib_alloc_pd - Allocates an unused protection domain.
  * @device: The device on which to allocate the protection domain.
  *
@@ -1376,10 +1469,231 @@ static inline int ib_req_ncomp_notif(str
  *   usable for DMA.
  * @pd: The protection domain associated with the memory region.
  * @mr_access_flags: Specifies the memory access rights.
+ *
+ * Note that the ib_dma_*() functions defined below must be used
+ * to create/destroy addresses used with the Lkey or Rkey returned
+ * by ib_get_dma_mr().
  */
 struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
 
 /**
+ * ib_dma_mapping_error - check a DMA addr for error
+ * @dev: The device for which the dma_addr was created
+ * @dma_addr: The DMA address to check
+ */
+static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->mapping_error(dev, dma_addr);
+	return dma_mapping_error(dma_addr);
+}
+
+/**
+ * ib_dma_map_single - Map a kernel virtual address to DMA address
+ * @dev: The device for which the dma_addr is to be created
+ * @cpu_addr: The kernel virtual address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline u64 ib_dma_map_single(struct ib_device *dev,
+				    void *cpu_addr, size_t size,
+				    enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->map_single(dev, cpu_addr, size, direction);
+	return dma_map_single(dev->dma_device, cpu_addr, size, direction);
+}
+
+/**
+ * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single()
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_single(struct ib_device *dev,
+				       u64 addr, size_t size,
+				       enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->unmap_single(dev, addr, size, direction);
+	else
+		dma_unmap_single(dev->dma_device, addr, size, direction);
+}
+
+/**
+ * ib_dma_map_page - Map a physical page to DMA address
+ * @dev: The device for which the dma_addr is to be created
+ * @page: The page to be mapped
+ * @offset: The offset within the page
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline u64 ib_dma_map_page(struct ib_device *dev,
+				  struct page *page,
+				  unsigned long offset,
+				  size_t size,
+					 enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->map_page(dev, page, offset, size, direction);
+	return dma_map_page(dev->dma_device, page, offset, size, direction);
+}
+
+/**
+ * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page()
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_page(struct ib_device *dev,
+				     u64 addr, size_t size,
+				     enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->unmap_page(dev, addr, size, direction);
+	else
+		dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+/**
+ * ib_dma_map_sg - Map a scatter/gather list to DMA addresses
+ * @dev: The device for which the DMA addresses are to be created
+ * @sg: The array of scatter/gather entries
+ * @nents: The number of scatter/gather entries
+ * @direction: The direction of the DMA
+ */
+static inline int ib_dma_map_sg(struct ib_device *dev,
+				struct scatterlist *sg, int nents,
+				enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->map_sg(dev, sg, nents, direction);
+	return dma_map_sg(dev->dma_device, sg, nents, direction);
+}
+
+/**
+ * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The array of scatter/gather entries
+ * @nents: The number of scatter/gather entries
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_sg(struct ib_device *dev,
+				   struct scatterlist *sg, int nents,
+				   enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->unmap_sg(dev, sg, nents, direction);
+	else
+		dma_unmap_sg(dev->dma_device, sg, nents, direction);
+}
+
+/**
+ * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The scatter/gather entry
+ */
+static inline u64 ib_sg_dma_address(struct ib_device *dev,
+				    struct scatterlist *sg)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->dma_address(dev, sg);
+	return sg_dma_address(sg);
+}
+
+/**
+ * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The scatter/gather entry
+ */
+static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
+					 struct scatterlist *sg)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->dma_len(dev, sg);
+	return sg_dma_len(sg);
+}
+
+/**
+ * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @dir: The direction of the DMA
+ */
+static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
+					      u64 addr,
+					      size_t size,
+					      enum dma_data_direction dir)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir);
+	else
+		dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+}
+
+/**
+ * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @dir: The direction of the DMA
+ */
+static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
+						 u64 addr,
+						 size_t size,
+						 enum dma_data_direction dir)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->sync_single_for_device(dev, addr, size, dir);
+	else
+		dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+}
+
+/**
+ * ib_dma_alloc_coherent - Allocate memory and map it for DMA
+ * @dev: The device for which the DMA address is requested
+ * @size: The size of the region to allocate in bytes
+ * @dma_handle: A pointer for returning the DMA address of the region
+ * @flag: memory allocator flags
+ */
+static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
+					   size_t size,
+					   u64 *dma_handle,
+					   gfp_t flag)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag);
+	else {
+		dma_addr_t handle;
+		void *ret;
+
+		ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag);
+		*dma_handle = handle;
+		return ret;
+	}
+}
+
+/**
+ * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
+ * @dev: The device for which the DMA addresses were allocated
+ * @size: The size of the region
+ * @cpu_addr: the address returned by ib_dma_alloc_coherent()
+ * @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
+ */
+static inline void ib_dma_free_coherent(struct ib_device *dev,
+					size_t size, void *cpu_addr,
+					u64 dma_handle)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+	else
+		dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+}
+
+/**
  * ib_reg_phys_mr - Prepares a virtually addressed memory region for use
  *   by an HCA.
  * @pd: The protection domain associated assigned to the registered region.
--- linux-2.6.18.noarch/include/rdma/iw_cm.h
+++ linux-2.6.18.noarch.ofed/include/rdma/iw_cm.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IW_CM_H
+#define IW_CM_H
+
+#include <linux/in.h>
+#include <rdma/ib_cm.h>
+
+struct iw_cm_id;
+
+enum iw_cm_event_type {
+	IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */
+	IW_CM_EVENT_CONNECT_REPLY,	 /* reply from active connect request */
+	IW_CM_EVENT_ESTABLISHED,	 /* passive side accept successful */
+	IW_CM_EVENT_DISCONNECT,		 /* orderly shutdown */
+	IW_CM_EVENT_CLOSE		 /* close complete */
+};
+
+enum iw_cm_event_status {
+	IW_CM_EVENT_STATUS_OK = 0,	 /* request successful */
+	IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */
+	IW_CM_EVENT_STATUS_REJECTED,	 /* connect request rejected */
+	IW_CM_EVENT_STATUS_TIMEOUT,	 /* the operation timed out */
+	IW_CM_EVENT_STATUS_RESET,	 /* reset from remote peer */
+	IW_CM_EVENT_STATUS_EINVAL,	 /* asynchronous failure for bad parm */
+};
+
+struct iw_cm_event {
+	enum iw_cm_event_type event;
+	enum iw_cm_event_status status;
+	struct sockaddr_in local_addr;
+	struct sockaddr_in remote_addr;
+	void *private_data;
+	u8 private_data_len;
+	void* provider_data;
+};
+
+/**
+ * iw_cm_handler - Function to be called by the IW CM when delivering events
+ * to the client.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *event);
+
+/**
+ * iw_event_handler - Function called by the provider when delivering provider
+ * events to the IW CM.  Returns either 0 indicating the event was processed
+ * or -errno if the event could not be processed.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_event_handler)(struct iw_cm_id *cm_id,
+				 struct iw_cm_event *event);
+
+struct iw_cm_id {
+	iw_cm_handler		cm_handler;      /* client callback function */
+	void		        *context;	 /* client cb context */
+	struct ib_device	*device;
+	struct sockaddr_in      local_addr;
+	struct sockaddr_in	remote_addr;
+	void			*provider_data;	 /* provider private data */
+	iw_event_handler        event_handler;   /* cb for provider
+						    events */
+	/* Used by provider to add and remove refs on IW cm_id */
+	void (*add_ref)(struct iw_cm_id *);
+	void (*rem_ref)(struct iw_cm_id *);
+};
+
+struct iw_cm_conn_param {
+	const void *private_data;
+	u16 private_data_len;
+	u32 ord;
+	u32 ird;
+	u32 qpn;
+};
+
+struct iw_cm_verbs {
+	void		(*add_ref)(struct ib_qp *qp);
+
+	void		(*rem_ref)(struct ib_qp *qp);
+
+	struct ib_qp *	(*get_qp)(struct ib_device *device,
+				  int qpn);
+
+	int		(*connect)(struct iw_cm_id *cm_id,
+				   struct iw_cm_conn_param *conn_param);
+
+	int		(*accept)(struct iw_cm_id *cm_id,
+				  struct iw_cm_conn_param *conn_param);
+
+	int		(*reject)(struct iw_cm_id *cm_id,
+				  const void *pdata, u8 pdata_len);
+
+	int		(*create_listen)(struct iw_cm_id *cm_id,
+					 int backlog);
+
+	int		(*destroy_listen)(struct iw_cm_id *cm_id);
+};
+
+/**
+ * iw_create_cm_id - Create an IW CM identifier.
+ *
+ * @device: The IB device on which to create the IW CM identier.
+ * @event_handler: User callback invoked to report events associated with the
+ *   returned IW CM identifier.
+ * @context: User specified context associated with the id.
+ */
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 iw_cm_handler cm_handler, void *context);
+
+/**
+ * iw_destroy_cm_id - Destroy an IW CM identifier.
+ *
+ * @cm_id: The previously created IW CM identifier to destroy.
+ *
+ * The client can assume that no events will be delivered for the CM ID after
+ * this function returns.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id);
+
+/**
+ * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP
+ *
+ * @cm_id: The IW CM idenfier to unbind from the QP.
+ * @qp: The QP
+ *
+ * This is called by the provider when destroying the QP to ensure
+ * that any references held by the IWCM are released. It may also
+ * be called by the IWCM when destroying a CM_ID to that any
+ * references held by the provider are released.
+ */
+void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp);
+
+/**
+ * iw_cm_get_qp - Return the ib_qp associated with a QPN
+ *
+ * @ib_device: The IB device
+ * @qpn: The queue pair number
+ */
+struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn);
+
+/**
+ * iw_cm_listen - Listen for incoming connection requests on the
+ * specified IW CM id.
+ *
+ * @cm_id: The IW CM identifier.
+ * @backlog: The maximum number of outstanding un-accepted inbound listen
+ *   requests to queue.
+ *
+ * The source address and port number are specified in the IW CM identifier
+ * structure.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog);
+
+/**
+ * iw_cm_accept - Called to accept an incoming connect request.
+ *
+ * @cm_id: The IW CM identifier associated with the connection request.
+ * @iw_param: Pointer to a structure containing connection establishment
+ *   parameters.
+ *
+ * The specified cm_id will have been provided in the event data for a
+ * CONNECT_REQUEST event. Subsequent events related to this connection will be
+ * delivered to the specified IW CM identifier prior and may occur prior to
+ * the return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_reject - Reject an incoming connection request.
+ *
+ * @cm_id: Connection identifier associated with the request.
+ * @private_daa: Pointer to data to deliver to the remote peer as part of the
+ *   reject message.
+ * @private_data_len: The number of bytes in the private_data parameter.
+ *
+ * The client can assume that no events will be delivered to the specified IW
+ * CM identifier following the return of this function. The private_data
+ * buffer is available for reuse when this function returns.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data,
+		 u8 private_data_len);
+
+/**
+ * iw_cm_connect - Called to request a connection to a remote peer.
+ *
+ * @cm_id: The IW CM identifier for the connection.
+ * @iw_param: Pointer to a structure containing connection  establishment
+ *   parameters.
+ *
+ * Events may be delivered to the specified IW CM identifier prior to the
+ * return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_disconnect - Close the specified connection.
+ *
+ * @cm_id: The IW CM identifier to close.
+ * @abrupt: If 0, the connection will be closed gracefully, otherwise, the
+ *   connection will be reset.
+ *
+ * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is
+ * delivered.
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt);
+
+/**
+ * iw_cm_init_qp_attr - Called to initialize the attributes of the QP
+ * associated with a IW CM identifier.
+ *
+ * @cm_id: The IW CM identifier associated with the QP
+ * @qp_attr: Pointer to the QP attributes structure.
+ * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are
+ *   valid.
+ */
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+#endif /* IW_CM_H */
--- linux-2.6.18.noarch/include/rdma/Kbuild
+++ linux-2.6.18.noarch.ofed/include/rdma/Kbuild
@@ -1 +1 @@
-header-y := ib_user_mad.h
+header-y += ib_user_mad.h
--- linux-2.6.18.noarch/include/rdma/rdma_cm.h
+++ linux-2.6.18.noarch.ofed/include/rdma/rdma_cm.h
@@ -52,10 +52,13 @@ enum rdma_cm_event_type {
 	RDMA_CM_EVENT_ESTABLISHED,
 	RDMA_CM_EVENT_DISCONNECTED,
 	RDMA_CM_EVENT_DEVICE_REMOVAL,
+	RDMA_CM_EVENT_MULTICAST_JOIN,
+	RDMA_CM_EVENT_MULTICAST_ERROR
 };
 
 enum rdma_port_space {
 	RDMA_PS_SDP  = 0x0001,
+	RDMA_PS_IPOIB= 0x0002,
 	RDMA_PS_TCP  = 0x0106,
 	RDMA_PS_UDP  = 0x0111,
 	RDMA_PS_SCTP = 0x0183
@@ -77,11 +80,34 @@ struct rdma_route {
 	int num_paths;
 };
 
+struct rdma_conn_param {
+	const void *private_data;
+	u8 private_data_len;
+	u8 responder_resources;
+	u8 initiator_depth;
+	u8 flow_control;
+	u8 retry_count;		/* ignored when accepting */
+	u8 rnr_retry_count;
+	/* Fields below ignored if a QP is created on the rdma_cm_id. */
+	u8 srq;
+	u32 qp_num;
+};
+
+struct rdma_ud_param {
+	const void *private_data;
+	u8 private_data_len;
+	struct ib_ah_attr ah_attr;
+	u32 qp_num;
+	u32 qkey;
+};
+
 struct rdma_cm_event {
 	enum rdma_cm_event_type	 event;
 	int			 status;
-	void			*private_data;
-	u8			 private_data_len;
+	union {
+		struct rdma_conn_param	conn;
+		struct rdma_ud_param	ud;
+	} param;
 };
 
 struct rdma_cm_id;
@@ -117,6 +143,14 @@ struct rdma_cm_id {
 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 				  void *context, enum rdma_port_space ps);
 
+/**
+  * rdma_destroy_id - Destroys an RDMA identifier.
+  *
+  * @id: RDMA identifier.
+  *
+  * Note: calling this function has the effect of canceling in-flight
+  * asynchronous operations associated with the id.
+  */
 void rdma_destroy_id(struct rdma_cm_id *id);
 
 /**
@@ -196,25 +230,17 @@ void rdma_destroy_qp(struct rdma_cm_id *
 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 		       int *qp_attr_mask);
 
-struct rdma_conn_param {
-	const void *private_data;
-	u8 private_data_len;
-	u8 responder_resources;
-	u8 initiator_depth;
-	u8 flow_control;
-	u8 retry_count;		/* ignored when accepting */
-	u8 rnr_retry_count;
-	/* Fields below ignored if a QP is created on the rdma_cm_id. */
-	u8 srq;
-	u32 qp_num;
-	enum ib_qp_type qp_type;
-};
-
 /**
  * rdma_connect - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
  *
  * Users must have resolved a route for the rdma_cm_id to connect with
  * by having called rdma_resolve_route before calling this routine.
+ *
+ * This call will either connect to a remote QP or obtain remote QP
+ * information for unconnected rdma_cm_id's.  The actual operation is
+ * based on the rdma_cm_id's port space.
  */
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
 
@@ -237,18 +263,27 @@ int rdma_listen(struct rdma_cm_id *id, i
  * Typically, this routine is only called by the listener to accept a connection
  * request.  It must also be called on the active side of a connection if the
  * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
  */
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
 
 /**
- * rdma_establish - Forces a connection state to established.
+ * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
+ * occurred on the connection.
  * @id: Connection identifier to transition to established.
+ * @event: Asynchronous event.
+ *
+ * This routine should be invoked by users to notify the CM of relevant
+ * communication events.  Events that should be reported to the CM and
+ * when to report them are:
  *
- * This routine should be invoked by users who receive messages on a
- * QP before being notified that the connection has been established by the
- * RDMA CM.
+ * IB_EVENT_COMM_EST - Used when a message is received on a connected
+ *    QP before an RTU has been received.
  */
-int rdma_establish(struct rdma_cm_id *id);
+int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event);
 
 /**
  * rdma_reject - Called to reject a connection request or response.
@@ -262,5 +297,21 @@ int rdma_reject(struct rdma_cm_id *id, c
  */
 int rdma_disconnect(struct rdma_cm_id *id);
 
-#endif /* RDMA_CM_H */
+/**
+ * rdma_join_multicast - Join the multicast group specified by the given
+ *   address.
+ * @id: Communication identifier associated with the request.
+ * @addr: Multicast address identifying the group to join.
+ * @context: User-defined context associated with the join request, returned
+ * to the user through the private_data pointer in multicast events.
+ */
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+			void *context);
 
+/**
+ * rdma_leave_multicast - Leave the multicast group specified by the given
+ *   address.
+ */
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
+
+#endif /* RDMA_CM_H */
--- linux-2.6.18.noarch/include/rdma/rdma_cm_ib.h
+++ linux-2.6.18.noarch.ofed/include/rdma/rdma_cm_ib.h
@@ -44,26 +44,7 @@
 int rdma_set_ib_paths(struct rdma_cm_id *id,
 		      struct ib_sa_path_rec *path_rec, int num_paths);
 
-struct ib_cm_req_opt {
-	u8	remote_cm_response_timeout;
-	u8	local_cm_response_timeout;
-	u8	max_cm_retries;
-};
-
-/**
- * rdma_get_ib_req_info - Retrieves the current IB CM REQ / SIDR REQ values
- *   that will be used when connection, or performing service ID resolution.
- * @id: Connection identifier associated with the request.
- * @info: Current values for CM REQ messages.
- */
-int rdma_get_ib_req_info(struct rdma_cm_id *id, struct ib_cm_req_opt *info);
-
-/**
- * rdma_set_ib_req_info - Sets the current IB CM REQ / SIDR REQ values
- *   that will be used when connection, or performing service ID resolution.
- * @id: Connection identifier associated with the request.
- * @info: New values for CM REQ messages.
- */
-int rdma_set_ib_req_info(struct rdma_cm_id *id, struct ib_cm_req_opt *info);
+/* Global qkey for UDP QPs and multicast groups. */
+#define RDMA_UDP_QKEY 0x01234567
 
 #endif /* RDMA_CM_IB_H */
--- linux-2.6.18.noarch/include/rdma/rdma_user_cm.h
+++ linux-2.6.18.noarch.ofed/include/rdma/rdma_user_cm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -38,7 +38,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_user_sa.h>
 
-#define RDMA_USER_CM_ABI_VERSION	1
+#define RDMA_USER_CM_ABI_VERSION	4
 
 #define RDMA_MAX_PRIVATE_DATA		256
 
@@ -58,6 +58,9 @@ enum {
 	RDMA_USER_CM_CMD_GET_EVENT,
 	RDMA_USER_CM_CMD_GET_OPTION,
 	RDMA_USER_CM_CMD_SET_OPTION,
+	RDMA_USER_CM_CMD_NOTIFY,
+	RDMA_USER_CM_CMD_JOIN_MCAST,
+	RDMA_USER_CM_CMD_LEAVE_MCAST
 };
 
 /*
@@ -72,6 +75,8 @@ struct rdma_ucm_cmd_hdr {
 struct rdma_ucm_create_id {
 	__u64 uid;
 	__u64 response;
+	__u16 ps;
+	__u8  reserved[6];
 };
 
 struct rdma_ucm_create_id_resp {
@@ -124,7 +129,7 @@ struct rdma_ucm_query_route_resp {
 
 struct rdma_ucm_conn_param {
 	__u32 qp_num;
-	__u32 qp_type;
+	__u32 reserved;
 	__u8  private_data[RDMA_MAX_PRIVATE_DATA];
 	__u8  private_data_len;
 	__u8  srq;
@@ -136,6 +141,15 @@ struct rdma_ucm_conn_param {
 	__u8  valid;
 };
 
+struct rdma_ucm_ud_param {
+	__u32 qp_num;
+	__u32 qkey;
+	struct ib_uverbs_ah_attr ah_attr;
+	__u8  private_data[RDMA_MAX_PRIVATE_DATA];
+	__u8  private_data_len;
+	__u8  reserved[7];
+};
+
 struct rdma_ucm_connect {
 	struct rdma_ucm_conn_param conn_param;
 	__u32 id;
@@ -171,51 +185,31 @@ struct rdma_ucm_init_qp_attr {
 	__u32 qp_state;
 };
 
-struct rdma_ucm_get_event {
-	__u64 response;
-};
-
-struct rdma_ucm_event_resp {
-	__u64 uid;
+struct rdma_ucm_notify {
 	__u32 id;
 	__u32 event;
-	__u32 status;
-	__u8  private_data_len;
-	__u8  reserved[3];
-	__u8  private_data[RDMA_MAX_PRIVATE_DATA];
 };
 
-struct rdma_ucm_get_option {
-	__u64 response;
-	__u64 optval;
+struct rdma_ucm_join_mcast {
+	__u64 response;		/* rdma_ucm_create_id_resp */
+	__u64 uid;
+	struct sockaddr_in6 addr;
 	__u32 id;
-	__u32 level;
-	__u32 optname;
-	__u32 optlen;
-};
-
-/* Protocol levels for get/set options. */
-enum {
-	RDMA_PROTO_IP = 0,
-	RDMA_PROTO_IB = 1,
 };
 
-/* IB specific option names for get/set. */
-enum {
-	IB_PATH_OPTIONS = 1,
-	IB_CM_REQ_OPTIONS = 2,
-};
-
-struct rdma_ucm_get_option_resp {
-	__u32 optlen;
+struct rdma_ucm_get_event {
+	__u64 response;
 };
 
-struct rdma_ucm_set_option {
-	__u64 optval;
+struct rdma_ucm_event_resp {
+	__u64 uid;
 	__u32 id;
-	__u32 level;
-	__u32 optname;
-	__u32 optlen;
+	__u32 event;
+	__u32 status;
+	union {
+		struct rdma_ucm_conn_param conn;
+		struct rdma_ucm_ud_param   ud;
+	} param;
 };
 
 #endif /* RDMA_USER_CM_H */