From 96d2e8f913ef4e62b93a2fd42412655643d24ad1 Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.org>
Date: Mon, 20 Jun 2016 13:51:44 +0100
Subject: [PATCH] vchiq_arm: Avoid use of mutex in add_completion

Claiming the completion_mutex within add_completion did prevent some
messages appearing twice, but provokes a deadlock caused by vcsm using
vchiq within a page fault handler.

Revert the use of completion_mutex, and instead fix the original
problem using more memory barriers.

Signed-off-by: Phil Elwell <phil@raspberrypi.org>
---
 .../vc04_services/interface/vchiq_arm/vchiq_arm.c  | 55 +++++++++++-----------
 .../vc04_services/interface/vchiq_arm/vchiq_core.c | 14 ++++--
 2 files changed, 37 insertions(+), 32 deletions(-)

--- a/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -64,10 +64,10 @@
 #define VCHIQ_MINOR 0
 
 /* Some per-instance constants */
-#define MAX_COMPLETIONS 16
+#define MAX_COMPLETIONS 128
 #define MAX_SERVICES 64
 #define MAX_ELEMENTS 8
-#define MSG_QUEUE_SIZE 64
+#define MSG_QUEUE_SIZE 128
 
 #define KEEPALIVE_VER 1
 #define KEEPALIVE_VER_MIN KEEPALIVE_VER
@@ -208,28 +208,24 @@ add_completion(VCHIQ_INSTANCE_T instance
 	void *bulk_userdata)
 {
 	VCHIQ_COMPLETION_DATA_T *completion;
+	int insert;
 	DEBUG_INITIALISE(g_state.local)
 
-	mutex_lock(&instance->completion_mutex);
-
-	while (instance->completion_insert ==
-		(instance->completion_remove + MAX_COMPLETIONS)) {
+	insert = instance->completion_insert;
+	while ((insert - instance->completion_remove) >= MAX_COMPLETIONS) {
 		/* Out of space - wait for the client */
 		DEBUG_TRACE(SERVICE_CALLBACK_LINE);
 		vchiq_log_trace(vchiq_arm_log_level,
 			"add_completion - completion queue full");
 		DEBUG_COUNT(COMPLETION_QUEUE_FULL_COUNT);
 
-		mutex_unlock(&instance->completion_mutex);
 		if (down_interruptible(&instance->remove_event) != 0) {
 			vchiq_log_info(vchiq_arm_log_level,
 				"service_callback interrupted");
 			return VCHIQ_RETRY;
 		}
 
-		mutex_lock(&instance->completion_mutex);
 		if (instance->closing) {
-			mutex_unlock(&instance->completion_mutex);
 			vchiq_log_info(vchiq_arm_log_level,
 				"service_callback closing");
 			return VCHIQ_SUCCESS;
@@ -237,9 +233,7 @@ add_completion(VCHIQ_INSTANCE_T instance
 		DEBUG_TRACE(SERVICE_CALLBACK_LINE);
 	}
 
-	completion =
-		 &instance->completions[instance->completion_insert &
-		 (MAX_COMPLETIONS - 1)];
+	completion = &instance->completions[insert & (MAX_COMPLETIONS - 1)];
 
 	completion->header = header;
 	completion->reason = reason;
@@ -260,12 +254,9 @@ add_completion(VCHIQ_INSTANCE_T instance
 	wmb();
 
 	if (reason == VCHIQ_MESSAGE_AVAILABLE)
-		user_service->message_available_pos =
-			instance->completion_insert;
-
-	instance->completion_insert++;
+		user_service->message_available_pos = insert;
 
-	mutex_unlock(&instance->completion_mutex);
+	instance->completion_insert = ++insert;
 
 	up(&instance->insert_event);
 
@@ -795,6 +786,7 @@ vchiq_ioctl(struct file *file, unsigned
 			instance->completion_insert)
 			&& !instance->closing) {
 			int rc;
+
 			DEBUG_TRACE(AWAIT_COMPLETION_LINE);
 			mutex_unlock(&instance->completion_mutex);
 			rc = down_interruptible(&instance->insert_event);
@@ -809,24 +801,29 @@ vchiq_ioctl(struct file *file, unsigned
 		}
 		DEBUG_TRACE(AWAIT_COMPLETION_LINE);
 
-		/* A read memory barrier is needed to stop prefetch of a stale
-		** completion record
-		*/
-		rmb();
-
 		if (ret == 0) {
 			int msgbufcount = args.msgbufcount;
+			int remove;
+
+			remove = instance->completion_remove;
+
 			for (ret = 0; ret < args.count; ret++) {
 				VCHIQ_COMPLETION_DATA_T *completion;
 				VCHIQ_SERVICE_T *service;
 				USER_SERVICE_T *user_service;
 				VCHIQ_HEADER_T *header;
-				if (instance->completion_remove ==
-					instance->completion_insert)
+
+				if (remove == instance->completion_insert)
 					break;
+
 				completion = &instance->completions[
-					instance->completion_remove &
-					(MAX_COMPLETIONS - 1)];
+					remove & (MAX_COMPLETIONS - 1)];
+
+
+				/* A read memory barrier is needed to prevent
+				** the prefetch of a stale completion record
+				*/
+				rmb();
 
 				service = completion->service_userdata;
 				user_service = service->base.userdata;
@@ -903,7 +900,11 @@ vchiq_ioctl(struct file *file, unsigned
 					break;
 				}
 
-				instance->completion_remove++;
+				/* Ensure that the above copy has completed
+				** before advancing the remove pointer. */
+				mb();
+
+				instance->completion_remove = ++remove;
 			}
 
 			if (msgbufcount != args.msgbufcount) {
--- a/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_core.c
+++ b/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_core.c
@@ -610,15 +610,15 @@ process_free_queue(VCHIQ_STATE_T *state)
 	BITSET_T service_found[BITSET_SIZE(VCHIQ_MAX_SERVICES)];
 	int slot_queue_available;
 
-	/* Use a read memory barrier to ensure that any state that may have
-	** been modified by another thread is not masked by stale prefetched
-	** values. */
-	rmb();
-
 	/* Find slots which have been freed by the other side, and return them
 	** to the available queue. */
 	slot_queue_available = state->slot_queue_available;
 
+	/* Use a memory barrier to ensure that any state that may have been
+	** modified by another thread is not masked by stale prefetched
+	** values. */
+	mb();
+
 	while (slot_queue_available != local->slot_queue_recycle) {
 		unsigned int pos;
 		int slot_index = local->slot_queue[slot_queue_available++ &
@@ -626,6 +626,8 @@ process_free_queue(VCHIQ_STATE_T *state)
 		char *data = (char *)SLOT_DATA_FROM_INDEX(state, slot_index);
 		int data_found = 0;
 
+		rmb();
+
 		vchiq_log_trace(vchiq_core_log_level, "%d: pfq %d=%x %x %x",
 			state->id, slot_index, (unsigned int)data,
 			local->slot_queue_recycle, slot_queue_available);
@@ -741,6 +743,8 @@ process_free_queue(VCHIQ_STATE_T *state)
 				up(&state->data_quota_event);
 		}
 
+		mb();
+
 		state->slot_queue_available = slot_queue_available;
 		up(&state->slot_available_event);
 	}