Sophie

Sophie

distrib > Scientific%20Linux > 5x > i386 > by-pkgid > 351d529f9beeb4e5d936a6d5e3e7813a > files > 2052

kernel-2.6.18-128.29.1.el5.src.rpm

From: Hans-Joachim Picht <hpicht@redhat.com>
Date: Fri, 16 Nov 2007 13:57:15 +0100
Subject: [s390] data corruption on DASD while toggling CHPIDs
Message-id: 20071116125715.GQ6053@redhat.com
O-Subject: [RHEL5 U2 PATCH 9/14] s390 - Data corruption on DASD while toggling CHPIDs off/on on HMC
Bugzilla: 360611

Description
============

The code for removing channel paths issues a clear and sets internal retries,
regardless whether there is device I/O running or internal I/O. This can result
in interrupts for the clear that are reported to the device driver, which then
assumes a successful processing which hasn't happened.

Bugzilla
=========

BZ  360611
https://bugzilla.redhat.com/show_bug.cgi?id=360611

Upstream status of the patch:
=============================
The code for this is already integrated in the IBM October 2005 branch
posted on the IBM developerworks website.

http://www.ibm.com/developerworks/linux/linux390/october2005_recommended.html

The fixes are also contained upstream as of
387b734fc2b55f776b192c7afdfd892ba42347d4.

Test status:
============
Kernel with patch was built and successfully tested

Please ACK.

With best regards,

Hans

diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 1554416..3ffc5c8 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -182,6 +182,39 @@ css_get_ssd_info(struct subchannel *sch)
 }
 
 static int
+check_for_io_on_path(struct subchannel *sch, int mask)
+{
+	int cc;
+
+	cc = stsch(sch->schid, &sch->schib);
+	if (cc)
+		return 0;
+	if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == mask)
+		return 1;
+	return 0;
+}
+
+static void
+terminate_internal_io(struct subchannel *sch)
+{
+	if (cio_clear(sch)) {
+		/* Recheck device in case clear failed */
+		sch->lpm = 0;
+		if (css_enqueue_subchannel_slow(sch->schid)) {
+			css_clear_subchannel_slow_list();
+			need_rescan = 1;
+		}
+		return;
+	}
+	/* Request retry of internal operation. */
+	device_set_intretry(sch);
+
+	/* Call termination handler. */
+	if (sch->driver && sch->driver->termination)
+		sch->driver->termination(&sch->dev);
+}
+
+static int
 s390_subchannel_remove_chpid(struct device *dev, void *data)
 {
 	int j;
@@ -211,37 +244,33 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	if (sch->schib.pmcw.pim == 0x80)
 		goto out_unreg;
 
-	if ((sch->schib.scsw.actl & SCSW_ACTL_DEVACT) &&
-	    (sch->schib.scsw.actl & SCSW_ACTL_SCHACT) &&
-	    (sch->schib.pmcw.lpum == mask)) {
-		int cc;
-
-		cc = cio_clear(sch);
-		if (cc == -ENODEV)
+	if (check_for_io_on_path(sch, mask)) {
+		if (device_is_online(sch))
+			device_kill_io(sch);
+		else {
+			terminate_internal_io(sch);
+			/* Re-start path verification. */
+			if (sch->driver && sch->driver->verify)
+				sch->driver->verify(&sch->dev);
+		}
+	} else {
+		/* trigger path verification. */
+		if (sch->driver && sch->driver->verify)
+			sch->driver->verify(&sch->dev);
+		else if (sch->lpm == mask)
 			goto out_unreg;
-		/* Request retry of internal operation. */
-		device_set_intretry(sch);
-		/* Call handler. */
-		if (sch->driver && sch->driver->termination)
-			sch->driver->termination(&sch->dev);
-		goto out_unlock;
 	}
 
-	/* trigger path verification. */
-	if (sch->driver && sch->driver->verify)
-		sch->driver->verify(&sch->dev);
-	else if (sch->lpm == mask)
-		goto out_unreg;
-out_unlock:
 	spin_unlock_irq(&sch->lock);
 	return 0;
+
 out_unreg:
-	spin_unlock_irq(&sch->lock);
 	sch->lpm = 0;
 	if (css_enqueue_subchannel_slow(sch->schid)) {
 		css_clear_subchannel_slow_list();
 		need_rescan = 1;
 	}
+	spin_unlock_irq(&sch->lock);
 	return 0;
 }
 
@@ -693,42 +722,11 @@ int chsc_chp_online(struct chp_id chpid)
 	return rc;
 }
 
-static inline int
-check_for_io_on_path(struct subchannel *sch, int index)
-{
-	int cc;
-
-	cc = stsch(sch->schid, &sch->schib);
-	if (cc)
-		return 0;
-	if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index))
-		return 1;
-	return 0;
-}
-
-static void
-terminate_internal_io(struct subchannel *sch)
-{
-	if (cio_clear(sch)) {
-		/* Recheck device in case clear failed */
-		sch->lpm = 0;
-		if (css_enqueue_subchannel_slow(sch->schid)) {
-			css_clear_subchannel_slow_list();
-			need_rescan = 1;
-		}
-		return;
-	}
-	/* Request retry of internal operation. */
-	device_set_intretry(sch);
-	/* Call handler. */
-	if (sch->driver && sch->driver->termination)
-		sch->driver->termination(&sch->dev);
-}
-
 static void __s390_subchannel_vary_chpid(struct subchannel *sch,
 					 struct chp_id chpid, int on)
 {
 	int chp, old_lpm;
+	int mask;
 	unsigned long flags;
 
 	if (!sch->ssd_info.valid)
@@ -737,39 +735,46 @@ static void __s390_subchannel_vary_chpid(struct subchannel *sch,
 	spin_lock_irqsave(&sch->lock, flags);
 	old_lpm = sch->lpm;
 	for (chp = 0; chp < 8; chp++) {
+		mask = 0x80 >> chp;
 		if (sch->ssd_info.chpid[chp] != chpid.id)
 			continue;
 
 		if (on) {
-			sch->opm |= (0x80 >> chp);
-			sch->lpm |= (0x80 >> chp);
+			sch->opm |= mask;
+			sch->lpm |= mask;
 			if (!old_lpm)
 				device_trigger_reprobe(sch);
 			else if (sch->driver && sch->driver->verify)
 				sch->driver->verify(&sch->dev);
-		} else {
-			sch->opm &= ~(0x80 >> chp);
-			sch->lpm &= ~(0x80 >> chp);
-			/*
-			 * Give running I/O a grace period in which it
-			 * can successfully terminate, even using the
-			 * just varied off path. Then kill it.
-			 */
-			if (check_for_io_on_path(sch, chp)) {
-				if (device_is_online(sch))
-					/* Wait for I/O to finish */
-					device_set_waiting(sch);
-				else
-					/* Kill and retry internal I/O */
-					terminate_internal_io(sch);
-			} else if (!sch->lpm) {
+			break;
+		}
+		sch->opm &= ~mask;
+		sch->lpm &= ~mask;
+		/*
+		 * Give running I/O a grace period in which it
+		 * can successfully terminate, even using the
+		 * just varied off path. Then kill it.
+		 */
+		if (check_for_io_on_path(sch, chp)) {
+			if (device_is_online(sch))
+				/* Wait for I/O to finish */
+				device_set_waiting(sch);
+			else {
+				/* Kill and retry internal I/O */
+				terminate_internal_io(sch);
+				/* Re-start path verification. */
+				if (sch->driver && sch->driver->verify)
+					sch->driver->verify(&sch->dev);
+			}
+		} else if (!sch->lpm) {
+			if (device_trigger_verify(sch) != 0) {
 				if (css_enqueue_subchannel_slow(sch->schid)) {
 					css_clear_subchannel_slow_list();
 					need_rescan = 1;
 				}
-			} else if (sch->driver && sch->driver->verify)
-				sch->driver->verify(&sch->dev);
-		}
+			}
+		} else if (sch->driver && sch->driver->verify)
+			sch->driver->verify(&sch->dev);
 		break;
 	}
 	spin_unlock_irqrestore(&sch->lock, flags);
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index ced4216..b999cb0 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -173,8 +173,10 @@ void device_trigger_reprobe(struct subchannel *);
 
 /* Helper functions for vary on/off. */
 int device_is_online(struct subchannel *);
+void device_kill_io(struct subchannel *);
 void device_set_waiting(struct subchannel *);
 void device_set_intretry(struct subchannel *sch);
+int device_trigger_verify(struct subchannel *sch);
 
 /* Machine check helper function. */
 void device_kill_pending_timer(struct subchannel *);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 8872ac7..beb8f64 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -85,6 +85,18 @@ device_set_intretry(struct subchannel *sch)
 	cdev->private->flags.intretry = 1;
 }
 
+int
+device_trigger_verify(struct subchannel *sch)
+{
+	struct ccw_device *cdev;
+
+	cdev = sch->dev.driver_data;
+	if (!cdev || !cdev->online)
+		return -EINVAL;
+	dev_fsm_event(cdev, DEV_EVENT_VERIFY);
+	return 0;
+}
+
 /*
  * Timeout function. It just triggers a DEV_EVENT_TIMEOUT.
  */
@@ -1013,6 +1025,38 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event)
 			      ERR_PTR(-ETIMEDOUT));
 }
 
+void device_kill_io(struct subchannel *sch)
+{
+	int ret;
+	struct ccw_device *cdev = sch->dev.driver_data;
+
+	ret = ccw_device_cancel_halt_clear(cdev);
+	if (ret == -EBUSY) {
+		ccw_device_set_timeout(cdev, 3*HZ);
+		cdev->private->state = DEV_STATE_TIMEOUT_KILL;
+		return;
+	}
+	if (ret == -ENODEV) {
+		if (!sch->lpm) {
+			PREPARE_WORK(&cdev->private->kick_work,
+				ccw_device_nopath_notify, cdev);
+			queue_work(ccw_device_notify_work,
+				&cdev->private->kick_work);
+		} else
+			dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+		return;
+	}
+	if (cdev->handler)
+		cdev->handler(cdev, cdev->private->intparm, ERR_PTR(-EIO));
+	if (!sch->lpm) {
+		PREPARE_WORK(&cdev->private->kick_work,
+			ccw_device_nopath_notify, cdev);
+		queue_work(ccw_device_notify_work, &cdev->private->kick_work);
+	} else
+		/* Start delayed path verification. */
+		ccw_device_online_verify(cdev, 0);
+}
+
 static void
 ccw_device_wait4io_irq(struct ccw_device *cdev, enum dev_event dev_event)
 {