From 0b8275da8346466af37b50d5ba687a386df9b0f4 Mon Sep 17 00:00:00 2001
From: Gordon Hollingworth <gordon@holliweb.co.uk>
Date: Thu, 4 Apr 2013 11:05:21 +0100
Subject: [PATCH 072/196] USB fix using a FIQ to implement split transactions

This commit adds a FIQ implementaion that schedules
the split transactions using a FIQ so we don't get
held off by the interrupt latency of Linux
---
 .../usb/host/dwc_common_port/dwc_common_linux.c    |   7 +-
 drivers/usb/host/dwc_otg/dwc_otg_cil_intr.c        |  37 +-
 drivers/usb/host/dwc_otg/dwc_otg_driver.c          |   6 +-
 drivers/usb/host/dwc_otg/dwc_otg_hcd.c             | 125 ++++-
 drivers/usb/host/dwc_otg/dwc_otg_hcd.h             |  20 +-
 drivers/usb/host/dwc_otg/dwc_otg_hcd_intr.c        | 538 ++++++++++++++++++---
 drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c       |  18 +-
 drivers/usb/host/dwc_otg/dwc_otg_hcd_queue.c       |  31 +-
 drivers/usb/host/dwc_otg/dwc_otg_mphi_fix.h        |  26 +-
 drivers/usb/host/dwc_otg/dwc_otg_pcd_intr.c        |   2 +-
 10 files changed, 696 insertions(+), 114 deletions(-)

diff --git a/drivers/usb/host/dwc_common_port/dwc_common_linux.c b/drivers/usb/host/dwc_common_port/dwc_common_linux.c
index 6814e51..0812d3a 100644
--- a/drivers/usb/host/dwc_common_port/dwc_common_linux.c
+++ b/drivers/usb/host/dwc_common_port/dwc_common_linux.c
@@ -580,7 +580,12 @@ void DWC_WRITE_REG64(uint64_t volatile *reg, uint64_t value)
 
 void DWC_MODIFY_REG32(uint32_t volatile *reg, uint32_t clear_mask, uint32_t set_mask)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
+	local_fiq_disable();
 	writel((readl(reg) & ~clear_mask) | set_mask, reg);
+	local_irq_restore(flags);
 }
 
 #if 0
@@ -1301,7 +1306,7 @@ EXPORT_SYMBOL(DWC_EXCEPTION);
 EXPORT_SYMBOL(__DWC_DEBUG);
 #endif
 
-EXPORT_SYMBOL(__DWC_DMA_ALLOC);
+EXPORT_SYMBOL(__DWC_DMA_ALLOC);
 EXPORT_SYMBOL(__DWC_DMA_ALLOC_ATOMIC);
 EXPORT_SYMBOL(__DWC_DMA_FREE);
 EXPORT_SYMBOL(__DWC_ALLOC);
diff --git a/drivers/usb/host/dwc_otg/dwc_otg_cil_intr.c b/drivers/usb/host/dwc_otg/dwc_otg_cil_intr.c
index b861b55..b5a007d 100644
--- a/drivers/usb/host/dwc_otg/dwc_otg_cil_intr.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_cil_intr.c
@@ -47,8 +47,6 @@
 #include "dwc_otg_hcd.h"
 #include "dwc_otg_mphi_fix.h"
 
-extern bool fiq_fix_enable;
-
 #ifdef DEBUG
 inline const char *op_state_str(dwc_otg_core_if_t * core_if)
 {
@@ -1321,7 +1319,7 @@ static int32_t dwc_otg_handle_lpm_intr(dwc_otg_core_if_t * core_if)
 /**
  * This function returns the Core Interrupt register.
  */
-static inline uint32_t dwc_otg_read_common_intr(dwc_otg_core_if_t * core_if)
+static inline uint32_t dwc_otg_read_common_intr(dwc_otg_core_if_t * core_if, gintmsk_data_t *reenable_gintmsk)
 {
 	gahbcfg_data_t gahbcfg = {.d32 = 0 };
 	gintsts_data_t gintsts;
@@ -1338,19 +1336,33 @@ static inline uint32_t dwc_otg_read_common_intr(dwc_otg_core_if_t * core_if)
 	gintmsk_common.b.lpmtranrcvd = 1;
 #endif
 	gintmsk_common.b.restoredone = 1;
-	/** @todo: The port interrupt occurs while in device
-         * mode. Added code to CIL to clear the interrupt for now!
-         */
-	gintmsk_common.b.portintr = 1;
-
+	if(dwc_otg_is_device_mode(core_if))
+	{
+		/** @todo: The port interrupt occurs while in device
+		 * mode. Added code to CIL to clear the interrupt for now!
+		 */
+		gintmsk_common.b.portintr = 1;
+	}
 	gintsts.d32 = DWC_READ_REG32(&core_if->core_global_regs->gintsts);
 	gintmsk.d32 = DWC_READ_REG32(&core_if->core_global_regs->gintmsk);
+	{
+		unsigned long flags;
+
+		// Re-enable the saved interrupts
+		local_irq_save(flags);
+		local_fiq_disable();
+		gintmsk.d32 |= gintmsk_common.d32;
+		gintsts_saved.d32 &= ~gintmsk_common.d32;
+		reenable_gintmsk->d32 = gintmsk.d32;
+		local_irq_restore(flags);
+	}
+
 	gahbcfg.d32 = DWC_READ_REG32(&core_if->core_global_regs->gahbcfg);
 
 #ifdef DEBUG
 	/* if any common interrupts set */
 	if (gintsts.d32 & gintmsk_common.d32) {
-		DWC_DEBUGPL(DBG_ANY, "gintsts=%08x  gintmsk=%08x\n",
+		DWC_DEBUGPL(DBG_ANY, "common_intr: gintsts=%08x  gintmsk=%08x\n",
 			    gintsts.d32, gintmsk.d32);
 	}
 #endif
@@ -1394,6 +1406,7 @@ int32_t dwc_otg_handle_common_intr(void *dev)
 {
 	int retval = 0;
 	gintsts_data_t gintsts;
+	gintmsk_data_t reenable_gintmsk;
 	gpwrdn_data_t gpwrdn = {.d32 = 0 };
 	dwc_otg_device_t *otg_dev = dev;
 	dwc_otg_core_if_t *core_if = otg_dev->core_if;
@@ -1415,7 +1428,7 @@ int32_t dwc_otg_handle_common_intr(void *dev)
 	}
 
 	if (core_if->hibernation_suspend <= 0) {
-		gintsts.d32 = dwc_otg_read_common_intr(core_if);
+		gintsts.d32 = dwc_otg_read_common_intr(core_if, &reenable_gintmsk);
 
 		if (gintsts.b.modemismatch) {
 			retval |= dwc_otg_handle_mode_mismatch_intr(core_if);
@@ -1512,8 +1525,12 @@ int32_t dwc_otg_handle_common_intr(void *dev)
 			gintsts.b.portintr = 1;
 			DWC_WRITE_REG32(&core_if->core_global_regs->gintsts,gintsts.d32);
 			retval |= 1;
+			reenable_gintmsk.b.portintr = 1;
 
 		}
+
+		DWC_WRITE_REG32(&core_if->core_global_regs->gintmsk, reenable_gintmsk.d32);
+
 	} else {
 		DWC_DEBUGPL(DBG_ANY, "gpwrdn=%08x\n", gpwrdn.d32);
 
diff --git a/drivers/usb/host/dwc_otg/dwc_otg_driver.c b/drivers/usb/host/dwc_otg/dwc_otg_driver.c
index cea8fcb..6c89a69 100644
--- a/drivers/usb/host/dwc_otg/dwc_otg_driver.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_driver.c
@@ -242,7 +242,8 @@ static struct dwc_otg_driver_module_params dwc_otg_module_params = {
 
 //Global variable to switch the fiq fix on or off (declared in bcm2708.c)
 extern bool fiq_fix_enable;
-
+// Global variable to enable the split transaction fix
+bool fiq_split_enable = true;
 //Global variable to switch the nak holdoff on or off
 bool nak_holdoff_enable = true;
 
@@ -1090,6 +1091,7 @@ static int __init dwc_otg_driver_init(void)
 	}
 	printk(KERN_DEBUG "dwc_otg: FIQ %s\n", fiq_fix_enable ? "enabled":"disabled");
 	printk(KERN_DEBUG "dwc_otg: NAK holdoff %s\n", nak_holdoff_enable ? "enabled":"disabled");
+	printk(KERN_DEBUG "dwc_otg: FIQ split fix %s\n", fiq_split_enable ? "enabled":"disabled");
 
 	error = driver_create_file(drv, &driver_attr_version);
 #ifdef DEBUG
@@ -1374,6 +1376,8 @@ module_param(fiq_fix_enable, bool, 0444);
 MODULE_PARM_DESC(fiq_fix_enable, "Enable the fiq fix");
 module_param(nak_holdoff_enable, bool, 0444);
 MODULE_PARM_DESC(nak_holdoff_enable, "Enable the NAK holdoff");
+module_param(fiq_split_enable, bool, 0444);
+MODULE_PARM_DESC(fiq_split_enable, "Enable the FIQ fix on split transactions");
 
 /** @page "Module Parameters"
  *
diff --git a/drivers/usb/host/dwc_otg/dwc_otg_hcd.c b/drivers/usb/host/dwc_otg/dwc_otg_hcd.c
index 9c2e71a..af9108c 100644
--- a/drivers/usb/host/dwc_otg/dwc_otg_hcd.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd.c
@@ -45,6 +45,7 @@
 
 #include "dwc_otg_hcd.h"
 #include "dwc_otg_regs.h"
+#include "dwc_otg_mphi_fix.h"
 
 extern bool microframe_schedule, nak_holdoff_enable;
 
@@ -581,6 +582,8 @@ int dwc_otg_hcd_urb_dequeue(dwc_otg_hcd_t * hcd,
 			 */
 			dwc_otg_hc_halt(hcd->core_if, qh->channel,
 					DWC_OTG_HC_XFER_URB_DEQUEUE);
+
+			dwc_otg_hcd_release_port(hcd, qh);
 		}
 	}
 
@@ -716,6 +719,8 @@ static void completion_tasklet_func(void *ptr)
 
 		usb_hcd_giveback_urb(hcd->priv, urb, urb->status);
 
+		fiq_print(FIQDBG_PORTHUB, "COMPLETE");
+
 		DWC_SPINLOCK_IRQSAVE(hcd->lock, &flags);
 	}
 	DWC_SPINUNLOCK_IRQRESTORE(hcd->lock, flags);
@@ -979,6 +984,10 @@ int dwc_otg_hcd_init(dwc_otg_hcd_t * hcd, dwc_otg_core_if_t * core_if)
 	hcd->frame_list = NULL;
 	hcd->frame_list_dma = 0;
 	hcd->periodic_qh_count = 0;
+
+	DWC_MEMSET(hcd->hub_port, 0, sizeof(hcd->hub_port));
+	DWC_MEMSET(hcd->hub_port_alloc, -1, sizeof(hcd->hub_port_alloc));
+
 out:
 	return retval;
 }
@@ -1124,7 +1133,12 @@ static void assign_and_init_hc(dwc_otg_hcd_t * hcd, dwc_otg_qh_t * qh)
 		uint32_t hub_addr, port_addr;
 		hc->do_split = 1;
 		hc->xact_pos = qtd->isoc_split_pos;
-		hc->complete_split = qtd->complete_split;
+		/* We don't need to do complete splits anymore */
+		if(fiq_split_enable)
+			hc->complete_split = qtd->complete_split = 0;
+		else
+			hc->complete_split = qtd->complete_split;
+
 		hcd->fops->hub_info(hcd, urb->priv, &hub_addr, &port_addr);
 		hc->hub_addr = (uint8_t) hub_addr;
 		hc->port_addr = (uint8_t) port_addr;
@@ -1271,6 +1285,62 @@ static void assign_and_init_hc(dwc_otg_hcd_t * hcd, dwc_otg_qh_t * qh)
 	hc->qh = qh;
 }
 
+/*
+** Check the transaction to see if the port / hub has already been assigned for
+** a split transaction
+**
+** Return 0 - Port is already in use
+*/
+int dwc_otg_hcd_allocate_port(dwc_otg_hcd_t * hcd, dwc_otg_qh_t *qh)
+{
+	uint32_t hub_addr, port_addr;
+
+	if(!fiq_split_enable)
+		return 0;
+
+	hcd->fops->hub_info(hcd, DWC_CIRCLEQ_FIRST(&qh->qtd_list)->urb->priv, &hub_addr, &port_addr);
+
+	if(hcd->hub_port[hub_addr] & (1 << port_addr))
+	{
+		fiq_print(FIQDBG_PORTHUB, "H%dP%d:S%02d", hub_addr, port_addr, qh->skip_count);
+
+		qh->skip_count++;
+
+		if(qh->skip_count > 40000)
+		{
+			printk_once(KERN_ERR "Error: Having to skip port allocation");
+			local_fiq_disable();
+			BUG();
+			return 0;
+		}
+		return 1;
+	}
+	else
+	{
+		qh->skip_count = 0;
+		hcd->hub_port[hub_addr] |= 1 << port_addr;
+		fiq_print(FIQDBG_PORTHUB, "H%dP%d:A %d", hub_addr, port_addr, DWC_CIRCLEQ_FIRST(&qh->qtd_list)->urb->pipe_info.ep_num);
+		hcd->hub_port_alloc[hub_addr * 16 + port_addr] = dwc_otg_hcd_get_frame_number(hcd);
+		return 0;
+	}
+}
+void dwc_otg_hcd_release_port(dwc_otg_hcd_t * hcd, dwc_otg_qh_t *qh)
+{
+	uint32_t hub_addr, port_addr;
+
+	if(!fiq_split_enable)
+		return;
+
+	hcd->fops->hub_info(hcd, DWC_CIRCLEQ_FIRST(&qh->qtd_list)->urb->priv, &hub_addr, &port_addr);
+
+	hcd->hub_port[hub_addr] &= ~(1 << port_addr);
+	hcd->hub_port_alloc[hub_addr * 16 + port_addr] = -1;
+
+	fiq_print(FIQDBG_PORTHUB, "H%dP%d:RO%d", hub_addr, port_addr, DWC_CIRCLEQ_FIRST(&qh->qtd_list)->urb->pipe_info.ep_num);
+
+}
+
+
 /**
  * This function selects transactions from the HCD transfer schedule and
  * assigns them to available host channels. It is called from HCD interrupt
@@ -1304,11 +1374,22 @@ dwc_otg_transaction_type_e dwc_otg_hcd_select_transactions(dwc_otg_hcd_t * hcd)
 
 	while (qh_ptr != &hcd->periodic_sched_ready &&
 	       !DWC_CIRCLEQ_EMPTY(&hcd->free_hc_list)) {
+
+		qh = DWC_LIST_ENTRY(qh_ptr, dwc_otg_qh_t, qh_list_entry);
+
+		if(qh->do_split && dwc_otg_hcd_allocate_port(hcd, qh))
+		{
+			qh_ptr = DWC_LIST_NEXT(qh_ptr);
+			g_next_sched_frame = dwc_frame_num_inc(dwc_otg_hcd_get_frame_number(hcd), 1);
+			continue;
+		}
+
 		if (microframe_schedule) {
 			// Make sure we leave one channel for non periodic transactions.
 			DWC_SPINLOCK_IRQSAVE(channel_lock, &flags);
 			if (hcd->available_host_channels <= 1) {
 				DWC_SPINUNLOCK_IRQRESTORE(channel_lock, flags);
+				if(qh->do_split) dwc_otg_hcd_release_port(hcd, qh);
 				break;
 			}
 			hcd->available_host_channels--;
@@ -1329,8 +1410,6 @@ dwc_otg_transaction_type_e dwc_otg_hcd_select_transactions(dwc_otg_hcd_t * hcd)
 		DWC_LIST_MOVE_HEAD(&hcd->periodic_sched_assigned,
 				   &qh->qh_list_entry);
 		DWC_SPINUNLOCK_IRQRESTORE(channel_lock, flags);
-
-		ret_val = DWC_OTG_TRANSACTION_PERIODIC;
 	}
 
 	/*
@@ -1369,10 +1448,19 @@ dwc_otg_transaction_type_e dwc_otg_hcd_select_transactions(dwc_otg_hcd_t * hcd)
 				qh->nak_frame = 0xffff;
 			}
 		}
+
+		if (qh->do_split && dwc_otg_hcd_allocate_port(hcd, qh))
+		{
+			g_next_sched_frame = dwc_frame_num_inc(dwc_otg_hcd_get_frame_number(hcd), 1);
+			qh_ptr = DWC_LIST_NEXT(qh_ptr);
+			continue;
+		}
+
 		if (microframe_schedule) {
 				DWC_SPINLOCK_IRQSAVE(channel_lock, &flags);
 				if (hcd->available_host_channels < 1) {
 					DWC_SPINUNLOCK_IRQRESTORE(channel_lock, flags);
+					if(qh->do_split) dwc_otg_hcd_release_port(hcd, qh);
 					break;
 				}
 				hcd->available_host_channels--;
@@ -1396,16 +1484,17 @@ dwc_otg_transaction_type_e dwc_otg_hcd_select_transactions(dwc_otg_hcd_t * hcd)
 
 		g_np_sent++;
 
-		if (ret_val == DWC_OTG_TRANSACTION_NONE) {
-			ret_val = DWC_OTG_TRANSACTION_NON_PERIODIC;
-		} else {
-			ret_val = DWC_OTG_TRANSACTION_ALL;
-		}
-
 		if (!microframe_schedule)
 			hcd->non_periodic_channels++;
 	}
 
+	if(!DWC_LIST_EMPTY(&hcd->periodic_sched_assigned))
+		ret_val |= DWC_OTG_TRANSACTION_PERIODIC;
+
+	if(!DWC_LIST_EMPTY(&hcd->non_periodic_sched_active))
+		ret_val |= DWC_OTG_TRANSACTION_NON_PERIODIC;
+
+
 #ifdef DEBUG_HOST_CHANNELS
 	last_sel_trans_num_avail_hc_at_end = hcd->available_host_channels;
 #endif /* DEBUG_HOST_CHANNELS */
@@ -1522,6 +1611,15 @@ static void process_periodic_channels(dwc_otg_hcd_t * hcd)
 
 		qh = DWC_LIST_ENTRY(qh_ptr, dwc_otg_qh_t, qh_list_entry);
 
+		// Do not send a split start transaction any later than frame .6
+		// Note, we have to schedule a periodic in .5 to make it go in .6
+		if(fiq_split_enable && qh->do_split && ((dwc_otg_hcd_get_frame_number(hcd) + 1) & 7) > 6)
+		{
+			qh_ptr = qh_ptr->next;
+			g_next_sched_frame = dwc_otg_hcd_get_frame_number(hcd) | 7;
+			continue;
+		}
+
 		/*
 		 * Set a flag if we're queuing high-bandwidth in slave mode.
 		 * The flag prevents any halts to get into the request queue in
@@ -1651,6 +1749,15 @@ static void process_non_periodic_channels(dwc_otg_hcd_t * hcd)
 
 		qh = DWC_LIST_ENTRY(hcd->non_periodic_qh_ptr, dwc_otg_qh_t,
 				    qh_list_entry);
+
+		// Do not send a split start transaction any later than frame .5
+		// non periodic transactions will start immediately in this uframe
+		if(fiq_split_enable && qh->do_split && ((dwc_otg_hcd_get_frame_number(hcd) + 1) & 7) > 6)
+		{
+			g_next_sched_frame = dwc_otg_hcd_get_frame_number(hcd) | 7;
+			break;
+		}
+
 		status =
 		    queue_transaction(hcd, qh->channel,
 				      tx_status.b.nptxfspcavail);
diff --git a/drivers/usb/host/dwc_otg/dwc_otg_hcd.h b/drivers/usb/host/dwc_otg/dwc_otg_hcd.h
index 0493dbf..d3d6e997 100644
--- a/drivers/usb/host/dwc_otg/dwc_otg_hcd.h
+++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd.h
@@ -168,10 +168,10 @@ typedef enum dwc_otg_control_phase {
 
 /** Transaction types. */
 typedef enum dwc_otg_transaction_type {
-	DWC_OTG_TRANSACTION_NONE,
-	DWC_OTG_TRANSACTION_PERIODIC,
-	DWC_OTG_TRANSACTION_NON_PERIODIC,
-	DWC_OTG_TRANSACTION_ALL
+	DWC_OTG_TRANSACTION_NONE          = 0,
+	DWC_OTG_TRANSACTION_PERIODIC      = 1,
+	DWC_OTG_TRANSACTION_NON_PERIODIC  = 2,
+	DWC_OTG_TRANSACTION_ALL           = DWC_OTG_TRANSACTION_PERIODIC + DWC_OTG_TRANSACTION_NON_PERIODIC
 } dwc_otg_transaction_type_e;
 
 struct dwc_otg_qh;
@@ -370,6 +370,8 @@ typedef struct dwc_otg_qh {
 
 	uint16_t speed;
 	uint16_t frame_usecs[8];
+
+	uint32_t skip_count;
 } dwc_otg_qh_t;
 
 DWC_CIRCLEQ_HEAD(hc_list, dwc_hc);
@@ -574,6 +576,10 @@ struct dwc_otg_hcd {
 	/** Frame List */
 	uint32_t *frame_list;
 
+	/** Hub - Port assignment */
+	int hub_port[16];
+	int hub_port_alloc[256];
+
 	/** Frame List DMA address */
 	dma_addr_t frame_list_dma;
 
@@ -604,12 +610,16 @@ extern dwc_otg_transaction_type_e dwc_otg_hcd_select_transactions(dwc_otg_hcd_t
 extern void dwc_otg_hcd_queue_transactions(dwc_otg_hcd_t * hcd,
 					   dwc_otg_transaction_type_e tr_type);
 
+int dwc_otg_hcd_allocate_port(dwc_otg_hcd_t * hcd, dwc_otg_qh_t *qh);
+void dwc_otg_hcd_release_port(dwc_otg_hcd_t * dwc_otg_hcd, dwc_otg_qh_t *qh);
+
+
 /** @} */
 
 /** @name Interrupt Handler Functions */
 /** @{ */
 extern int32_t dwc_otg_hcd_handle_intr(dwc_otg_hcd_t * dwc_otg_hcd);
-extern int32_t dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd_t * dwc_otg_hcd, int32_t);
+extern int32_t dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd_t * dwc_otg_hcd);
 extern int32_t dwc_otg_hcd_handle_rx_status_q_level_intr(dwc_otg_hcd_t *
 							 dwc_otg_hcd);
 extern int32_t dwc_otg_hcd_handle_np_tx_fifo_empty_intr(dwc_otg_hcd_t *
diff --git a/drivers/usb/host/dwc_otg/dwc_otg_hcd_intr.c b/drivers/usb/host/dwc_otg/dwc_otg_hcd_intr.c
index 16e8c6c..e8b4d35 100644
--- a/drivers/usb/host/dwc_otg/dwc_otg_hcd_intr.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd_intr.c
@@ -38,6 +38,7 @@
 
 #include <linux/jiffies.h>
 #include <mach/hardware.h>
+#include <asm/fiq.h>
 
 
 extern bool microframe_schedule;
@@ -52,21 +53,295 @@ extern bool microframe_schedule;
 
 void * dummy_send;
 mphi_regs_t c_mphi_regs;
+volatile void *dwc_regs_base;
 int fiq_done, int_done;
-int g_next_sched_frame, g_np_count, g_np_sent, g_work_expected;
-static int mphi_int_count = 0 ;
 
-extern bool fiq_fix_enable, nak_holdoff_enable;
+gintsts_data_t  gintsts_saved = {.d32 = 0};
+hcint_data_t    hcint_saved[MAX_EPS_CHANNELS];
+hcintmsk_data_t hcintmsk_saved[MAX_EPS_CHANNELS];
+int             split_out_xfersize[MAX_EPS_CHANNELS];
+haint_data_t    haint_saved;
+
+int g_next_sched_frame, g_np_count, g_np_sent;
+static int mphi_int_count = 0 ;
 
 hcchar_data_t nak_hcchar;
 hctsiz_data_t nak_hctsiz;
 hcsplt_data_t nak_hcsplt;
 int nak_count;
 
+int complete_sched[MAX_EPS_CHANNELS] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
+int split_start_frame[MAX_EPS_CHANNELS];
+int queued_port[MAX_EPS_CHANNELS];
+
+#ifdef FIQ_DEBUG
+char buffer[1000*16];
+int wptr;
+void _fiq_print(FIQDBG_T dbg_lvl, char *fmt, ...)
+{
+	FIQDBG_T dbg_lvl_req = FIQDBG_PORTHUB;
+	va_list args;
+	char text[17];
+	hfnum_data_t hfnum = { .d32 = FIQ_READ(dwc_regs_base + 0x408) };
+	unsigned long flags;
+
+	local_irq_save(flags);
+	local_fiq_disable();
+	if(dbg_lvl & dbg_lvl_req || dbg_lvl == FIQDBG_ERR)
+	{
+		snprintf(text, 9, "%4d%d:%d ", hfnum.b.frnum/8, hfnum.b.frnum%8, 8 - hfnum.b.frrem/937);
+		va_start(args, fmt);
+		vsnprintf(text+8, 9, fmt, args);
+		va_end(args);
+
+		memcpy(buffer + wptr, text, 16);
+		wptr = (wptr + 16) % sizeof(buffer);
+	}
+	local_irq_restore(flags);
+}
+#endif
+
+void fiq_queue_request(int channel, int odd_frame)
+{
+	hcchar_data_t   hcchar   = { .d32 = FIQ_READ(dwc_regs_base + 0x500 + (channel * 0x20) + 0x0)  };
+	hcsplt_data_t   hcsplt   = { .d32 = FIQ_READ(dwc_regs_base + 0x500 + (channel * 0x20) + 0x4)  };
+	hctsiz_data_t   hctsiz   = { .d32 = FIQ_READ(dwc_regs_base + 0x500 + (channel * 0x20) + 0x10) };
+
+	if(hcsplt.b.spltena	== 0)
+	{
+		fiq_print(FIQDBG_ERR, "SPLTENA ");
+		BUG();
+	}
+
+	if(hcchar.b.epdir == 1)
+	{
+		fiq_print(FIQDBG_SCHED, "IN  Ch %d", channel);
+	}
+	else
+	{
+		hctsiz.b.xfersize = 0;
+		fiq_print(FIQDBG_SCHED, "OUT Ch %d", channel);
+	}
+	FIQ_WRITE((dwc_regs_base + 0x500 + (channel * 0x20) + 0x10), hctsiz.d32);
+
+	hcsplt.b.compsplt = 1;
+	FIQ_WRITE((dwc_regs_base + 0x500 + (channel * 0x20) + 0x4), hcsplt.d32);
+
+	// Send the Split complete
+	hcchar.b.chen = 1;
+	hcchar.b.oddfrm = odd_frame ? 1 : 0;
+
+	// Post this for transmit on the next frame for periodic or this frame for non-periodic
+	fiq_print(FIQDBG_SCHED, "SND_%s", odd_frame ? "ODD " : "EVEN");
+
+	FIQ_WRITE((dwc_regs_base + 0x500 + (channel * 0x20) + 0x0), hcchar.d32);
+}
+
+static int last_sof = -1;
+
+/*
+** Function to handle the start of frame interrupt, choose whether we need to do anything and
+** therefore trigger the main interrupt
+**
+** returns int != 0 - interrupt has been handled
+*/
+int diff;
+
+int fiq_sof_handle(hfnum_data_t hfnum)
+{
+	int handled = 0;
+	int i;
+
+	// Just check that once we're running we don't miss a SOF
+	/*if(last_sof != -1 && (hfnum.b.frnum != ((last_sof + 1) & 0x3fff)))
+	{
+		fiq_print(FIQDBG_ERR, "LASTSOF ");
+		fiq_print(FIQDBG_ERR, "%4d%d   ", last_sof / 8, last_sof & 7);
+		fiq_print(FIQDBG_ERR, "%4d%d   ", hfnum.b.frnum / 8, hfnum.b.frnum & 7);
+		BUG();
+	}*/
+
+	// Only start remembering the last sof when the interrupt has been
+	// enabled (we don't check the mask to come in here...)
+	if(last_sof != -1 || FIQ_READ(dwc_regs_base + 0x18) & (1<<3))
+		last_sof = hfnum.b.frnum;
+
+	for(i = 0; i < MAX_EPS_CHANNELS; i++)
+	{
+		if(complete_sched[i] != -1)
+		{
+			if(complete_sched[i] <= hfnum.b.frnum || (complete_sched[i] > 0x3f00 && hfnum.b.frnum < 0xf0))
+			{
+				fiq_queue_request(i, hfnum.b.frnum & 1);
+				complete_sched[i] = -1;
+			}
+		}
+
+		if(complete_sched[i] != -1)
+		{
+			// This is because we've seen a split complete occur with no start...
+			// most likely because missed the complete 0x3fff frames ago!
+
+			diff = (hfnum.b.frnum + 0x3fff - complete_sched[i]) & 0x3fff ;
+			if(diff > 32 && diff < 0x3f00)
+			{
+				fiq_print(FIQDBG_ERR, "SPLTMISS");
+				BUG();
+			}
+		}
+	}
+
+	if(g_np_count == g_np_sent && dwc_frame_num_gt(g_next_sched_frame, hfnum.b.frnum))
+	{
+		/*
+		 * If np_count != np_sent that means we need to queue non-periodic (bulk) packets this packet
+		 * g_next_sched_frame is the next frame we have periodic packets for
+		 *
+		 * if neither of these are required for this frame then just clear the interrupt
+		 */
+		handled = 1;
+
+	}
+
+	return handled;
+}
+
+int port_id(hcsplt_data_t hcsplt)
+{
+	return hcsplt.b.prtaddr + (hcsplt.b.hubaddr << 8);
+}
+
+int fiq_hcintr_handle(int channel, hfnum_data_t hfnum)
+{
+	hcchar_data_t   hcchar   = { .d32 = FIQ_READ(dwc_regs_base + 0x500 + (channel * 0x20) + 0x0) };
+	hcsplt_data_t   hcsplt   = { .d32 = FIQ_READ(dwc_regs_base + 0x500 + (channel * 0x20) + 0x4) };
+	hcint_data_t    hcint    = { .d32 = FIQ_READ(dwc_regs_base + 0x500 + (channel * 0x20) + 0x8) };
+	hcintmsk_data_t hcintmsk = { .d32 = FIQ_READ(dwc_regs_base + 0x500 + (channel * 0x20) + 0xc) };
+	hctsiz_data_t   hctsiz   = { .d32 = FIQ_READ(dwc_regs_base + 0x500 + (channel * 0x20) + 0x10)};
+
+	hcint_saved[channel].d32 |= hcint.d32;
+	hcintmsk_saved[channel].d32 =  hcintmsk.d32;
+
+	if(hcsplt.b.spltena)
+	{
+		fiq_print(FIQDBG_PORTHUB, "ph: %4x", port_id(hcsplt));
+		if(hcint.b.chhltd)
+		{
+			fiq_print(FIQDBG_SCHED, "CH HLT %d", channel);
+			fiq_print(FIQDBG_SCHED, "%08x", hcint_saved[channel]);
+		}
+		if(hcint.b.stall || hcint.b.xacterr || hcint.b.bblerr || hcint.b.frmovrun || hcint.b.datatglerr)
+		{
+			queued_port[channel] = 0;
+			fiq_print(FIQDBG_ERR, "CHAN ERR");
+		}
+		if(hcint.b.xfercomp)
+		{
+			// Clear the port allocation and transmit anything also on this port
+			queued_port[channel] = 0;
+			fiq_print(FIQDBG_SCHED, "XFERCOMP");
+		}
+		if(hcint.b.nak)
+		{
+			queued_port[channel] = 0;
+			fiq_print(FIQDBG_SCHED, "NAK");
+		}
+		if(hcint.b.ack && !hcsplt.b.compsplt)
+		{
+			int i;
+
+			// Do not complete isochronous out transactions
+			if(hcchar.b.eptype == 1 && hcchar.b.epdir == 0)
+			{
+				queued_port[channel] = 0;
+				fiq_print(FIQDBG_SCHED, "ISOC_OUT");
+			}
+			else
+			{
+				// Make sure we check the port / hub combination that we sent this split on.
+				// Do not queue a second request to the same port
+				for(i = 0; i < MAX_EPS_CHANNELS; i++)
+				{
+					if(port_id(hcsplt) == queued_port[i])
+					{
+						fiq_print(FIQDBG_ERR, "PORTERR ");
+						//BUG();
+					}
+				}
+
+				split_start_frame[channel] = (hfnum.b.frnum + 1) & ~7;
+
+				// Note, the size of an OUT is in the start split phase, not
+				// the complete split
+				split_out_xfersize[channel] = hctsiz.b.xfersize;
+
+				hcint_saved[channel].b.chhltd = 0;
+				hcint_saved[channel].b.ack = 0;
+
+				queued_port[channel] = port_id(hcsplt);
+
+				if(hcchar.b.eptype & 1)
+				{
+					// Send the periodic complete in the same oddness frame as the ACK went...
+					fiq_queue_request(channel, !(hfnum.b.frnum & 1));
+	//				complete_sched[channel] = dwc_frame_num_inc(hfnum.b.frnum, 1);
+				}
+				else
+				{
+					// Schedule the split complete to occur later
+					complete_sched[channel] = dwc_frame_num_inc(hfnum.b.frnum, 2);
+					fiq_print(FIQDBG_SCHED, "ACK%04d%d", complete_sched[channel]/8, complete_sched[channel]%8);
+				}
+			}
+		}
+		if(hcint.b.nyet)
+		{
+			fiq_print(FIQDBG_ERR, "NYETERR1");
+			//BUG();
+			// Can transmit a split complete up to uframe .0 of the next frame
+			if(hfnum.b.frnum <= dwc_frame_num_inc(split_start_frame[channel], 8))
+			{
+				// Send it next frame
+				if(hcchar.b.eptype & 1) // type 1 & 3 are interrupt & isoc
+				{
+					fiq_print(FIQDBG_SCHED, "NYT:SEND");
+					fiq_queue_request(channel, !(hfnum.b.frnum & 1));
+				}
+				else
+				{
+					// Schedule non-periodic access for next frame (the odd-even bit doesn't effect NP)
+					complete_sched[channel] = dwc_frame_num_inc(hfnum.b.frnum, 1);
+					fiq_print(FIQDBG_SCHED, "NYT%04d%d", complete_sched[channel]/8, complete_sched[channel]%8);
+				}
+				hcint_saved[channel].b.chhltd = 0;
+				hcint_saved[channel].b.nyet = 0;
+			}
+			else
+			{
+				queued_port[channel] = 0;
+				fiq_print(FIQDBG_ERR, "NYETERR2");
+				//BUG();
+			}
+		}
+	}
+
+	// Clear the interrupt, this will also clear the HAINT bit
+	FIQ_WRITE((dwc_regs_base + 0x500 + (channel * 0x20) + 0x8), hcint.d32);
+	return hcint_saved[channel].d32 == 0;
+}
+
+gintsts_data_t gintsts;
+gintmsk_data_t gintmsk;
+// triggered: The set of interrupts that were triggered
+// handled:   The set of interrupts that have been handled (no IRQ is
+//            required)
+// keep:      The set of interrupts we want to keep unmasked even though we
+//            want to trigger an IRQ to handle it (SOF and HCINTR)
+gintsts_data_t triggered, handled, keep;
+hfnum_data_t hfnum;
+
 void __attribute__ ((naked)) dwc_otg_hcd_handle_fiq(void)
 {
-	gintsts_data_t gintsts;
-	hfnum_data_t hfnum;
 
 	/* entry takes care to store registers we will be treading on here */
 	asm __volatile__ (
@@ -74,43 +349,112 @@ void __attribute__ ((naked)) dwc_otg_hcd_handle_fiq(void)
 		/* stash FIQ and normal regs */
 		"stmdb	sp!, {r0-r12,  lr};"
 		/* !! THIS SETS THE FRAME, adjust to > sizeof locals */
-		"sub     fp, ip, #256 ;"
+		"sub     fp, ip, #512 ;"
 		);
 
-	fiq_done++;
-	gintsts.d32 = FIQ_READ_IO_ADDRESS(USB_BASE + 0x14) & FIQ_READ_IO_ADDRESS(USB_BASE + 0x18);
-	hfnum.d32 =   FIQ_READ_IO_ADDRESS(USB_BASE + 0x408);
-
-	if(gintsts.d32)
+	// Cannot put local variables at the beginning of the function
+	// because otherwise 'C' will play with the stack pointer. any locals
+	// need to be inside the following block
+	do
 	{
-		if(gintsts.b.sofintr && g_np_count == g_np_sent && dwc_frame_num_gt(g_next_sched_frame, hfnum.b.frnum))
+		fiq_done++;
+		gintsts.d32 = FIQ_READ(dwc_regs_base + 0x14);
+		gintmsk.d32 = FIQ_READ(dwc_regs_base + 0x18);
+		hfnum.d32 =   FIQ_READ(dwc_regs_base + 0x408);
+		triggered.d32 = gintsts.d32 & gintmsk.d32;
+		handled.d32 = 0;
+		keep.d32 = 0;
+		fiq_print(FIQDBG_INT, "FIQ     ");
+		fiq_print(FIQDBG_INT, "%08x", gintsts.d32);
+		fiq_print(FIQDBG_INT, "%08x", gintmsk.d32);
+		if(gintsts.d32)
 		{
-			/*
-			 * If np_count != np_sent that means we need to queue non-periodic (bulk) packets this packet
-			 * g_next_sched_frame is the next frame we have periodic packets for
-			 *
-			 * if neither of these are required for this frame then just clear the interrupt
-			 */
-			gintsts.d32 = 0;
-			gintsts.b.sofintr = 1;
-			FIQ_WRITE_IO_ADDRESS((USB_BASE + 0x14), gintsts.d32);
+			// If port enabled
+			if((FIQ_READ(dwc_regs_base + 0x440) & 0xf) == 0x5)
+			{
+				if(gintsts.b.sofintr)
+				{
+					if(fiq_sof_handle(hfnum))
+					{
+						handled.b.sofintr = 1; /* Handled in FIQ */
+					}
+					else
+					{
+						/* Keer interrupt unmasked */
+						keep.b.sofintr = 1;
+					}
+					{
+						// Need to make sure the read and clearing of the SOF interrupt is as close as possible to avoid the possibility of missing
+						// a start of frame interrupt
+						gintsts_data_t gintsts = { .b.sofintr = 1 };
+						FIQ_WRITE((dwc_regs_base + 0x14), gintsts.d32);
+					}
+				}
+
+				if(fiq_split_enable && gintsts.b.hcintr)
+				{
+					int i;
+					haint_data_t    haint;
+					haintmsk_data_t haintmsk;
+
+					haint.d32 = FIQ_READ(dwc_regs_base + 0x414);
+					haintmsk.d32 = FIQ_READ(dwc_regs_base + 0x418);
+					haint.d32 &= haintmsk.d32;
+					haint_saved.d32 |= haint.d32;
+
+					fiq_print(FIQDBG_INT, "hcintr");
+					fiq_print(FIQDBG_INT, "%08x", FIQ_READ(dwc_regs_base + 0x414));
+
+					// Go through each channel that has an enabled interrupt
+					for(i = 0; i < 16; i++)
+						if((haint.d32 >> i) & 1)
+							if(fiq_hcintr_handle(i, hfnum))
+								haint_saved.d32 &= ~(1 << i); /* this was handled */
+
+					/* If we've handled all host channel interrupts then don't trigger the interrupt */
+					if(haint_saved.d32 == 0)
+					{
+						handled.b.hcintr = 1;
+					}
+					else
+					{
+						/* Make sure we keep the channel interrupt unmasked when triggering the IRQ */
+						keep.b.hcintr = 1;
+					}
 
-			g_work_expected = 0;
+					{
+						gintsts_data_t gintsts = { .b.hcintr = 1 };
+
+						// Always clear the channel interrupt
+						FIQ_WRITE((dwc_regs_base + 0x14), gintsts.d32);
+					}
+				}
+			}
+			else
+			{
+				last_sof = -1;
+			}
 		}
-		else
+
+		// Mask out the interrupts triggered - those handled - don't mask out the ones we want to keep
+		gintmsk.d32 = keep.d32 | (gintmsk.d32 & ~(triggered.d32 & ~handled.d32));
+		// Save those that were triggered but not handled
+		gintsts_saved.d32 |= triggered.d32 & ~handled.d32;
+		FIQ_WRITE(dwc_regs_base + 0x18, gintmsk.d32);
+
+		// Clear and save any unhandled interrupts and trigger the interrupt
+		if(gintsts_saved.d32)
 		{
-			g_work_expected = 1;
 			/* To enable the MPHI interrupt  (INT 32)
 			 */
-			FIQ_WRITE(  c_mphi_regs.outdda, (int) dummy_send);
+			FIQ_WRITE( c_mphi_regs.outdda, (int) dummy_send);
 			FIQ_WRITE( c_mphi_regs.outddb, (1 << 29));
 
 			mphi_int_count++;
-			/* Clear the USB global interrupt so we don't just sit in the FIQ */
-			FIQ_MODIFY_IO_ADDRESS((USB_BASE + 0x8),1,0);
-
 		}
 	}
+	while(0);
+
 	mb();
 
 	/* exit back to normal mode restoring everything */
@@ -133,6 +477,7 @@ int32_t dwc_otg_hcd_handle_intr(dwc_otg_hcd_t * dwc_otg_hcd)
 
 	dwc_otg_core_if_t *core_if = dwc_otg_hcd->core_if;
 	gintsts_data_t gintsts;
+	gintmsk_data_t gintmsk;
 	hfnum_data_t hfnum;
 
 #ifdef DEBUG
@@ -140,6 +485,9 @@ int32_t dwc_otg_hcd_handle_intr(dwc_otg_hcd_t * dwc_otg_hcd)
 
 #endif
 
+	gintsts.d32 = DWC_READ_REG32(&core_if->core_global_regs->gintsts);
+	gintmsk.d32 = DWC_READ_REG32(&core_if->core_global_regs->gintmsk);
+
 	/* Exit from ISR if core is hibernated */
 	if (core_if->hibernation_suspend == 1) {
 		goto exit_handler_routine;
@@ -147,11 +495,18 @@ int32_t dwc_otg_hcd_handle_intr(dwc_otg_hcd_t * dwc_otg_hcd)
 	DWC_SPINLOCK(dwc_otg_hcd->lock);
 	/* Check if HOST Mode */
 	if (dwc_otg_is_host_mode(core_if)) {
-		gintsts.d32 = dwc_otg_read_core_intr(core_if);
+		local_fiq_disable();
+		gintmsk.d32 |= gintsts_saved.d32;
+		gintsts.d32 |= gintsts_saved.d32;
+		gintsts_saved.d32 = 0;
+		local_fiq_enable();
 		if (!gintsts.d32) {
 			goto exit_handler_routine;
 		}
+		gintsts.d32 &= gintmsk.d32;
+
 #ifdef DEBUG
+		// We should be OK doing this because the common interrupts should already have been serviced
 		/* Don't print debug message in the interrupt handler on SOF */
 #ifndef DEBUG_SOF
 		if (gintsts.d32 != DWC_SOF_INTR_MASK)
@@ -171,11 +526,12 @@ int32_t dwc_otg_hcd_handle_intr(dwc_otg_hcd_t * dwc_otg_hcd)
 		if (gintsts.b.sofintr && g_np_count == g_np_sent && dwc_frame_num_gt(g_next_sched_frame, hfnum.b.frnum))
 		{
 			/* Note, we should never get here if the FIQ is doing it's job properly*/
-			retval |= dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd, g_work_expected);
+			retval |= dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd);
 		}
 		else if (gintsts.b.sofintr) {
-			retval |= dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd, g_work_expected);
+			retval |= dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd);
 		}
+
 		if (gintsts.b.rxstsqlvl) {
 			retval |=
 			    dwc_otg_hcd_handle_rx_status_q_level_intr
@@ -190,7 +546,10 @@ int32_t dwc_otg_hcd_handle_intr(dwc_otg_hcd_t * dwc_otg_hcd)
 			/** @todo Implement i2cintr handler. */
 		}
 		if (gintsts.b.portintr) {
+
+			gintmsk_data_t gintmsk = { .b.portintr = 1};
 			retval |= dwc_otg_hcd_handle_port_intr(dwc_otg_hcd);
+			DWC_MODIFY_REG32(&core_if->core_global_regs->gintmsk, 0, gintmsk.d32);
 		}
 		if (gintsts.b.hcintr) {
 			retval |= dwc_otg_hcd_handle_hc_intr(dwc_otg_hcd);
@@ -227,26 +586,35 @@ exit_handler_routine:
 
 	if (fiq_fix_enable)
 	{
-		/* Clear the MPHI interrupt */
-		DWC_WRITE_REG32(c_mphi_regs.intstat, (1<<16));
-		if (mphi_int_count >= 60)
+		local_fiq_disable();
+		// Make sure that we don't clear the interrupt if we've still got pending work to do
+		if(gintsts_saved.d32 == 0)
 		{
-			DWC_WRITE_REG32(c_mphi_regs.ctrl, ((1<<31) + (1<<16)));
-			while(!(DWC_READ_REG32(c_mphi_regs.ctrl) & (1 << 17)))
-				;
-			DWC_WRITE_REG32(c_mphi_regs.ctrl, (1<<31));
-			mphi_int_count = 0;
+			/* Clear the MPHI interrupt */
+			DWC_WRITE_REG32(c_mphi_regs.intstat, (1<<16));
+			if (mphi_int_count >= 60)
+			{
+				DWC_WRITE_REG32(c_mphi_regs.ctrl, ((1<<31) + (1<<16)));
+				while(!(DWC_READ_REG32(c_mphi_regs.ctrl) & (1 << 17)))
+					;
+				DWC_WRITE_REG32(c_mphi_regs.ctrl, (1<<31));
+				mphi_int_count = 0;
+			}
+			int_done++;
 		}
-		int_done++;
+
+		// Unmask handled interrupts
+		FIQ_WRITE(dwc_regs_base + 0x18, gintmsk.d32);
+		//DWC_MODIFY_REG32((uint32_t *)IO_ADDRESS(USB_BASE + 0x8), 0 , 1);
+
+		local_fiq_enable();
+
 		if((jiffies / HZ) > last_time)
 		{
 			/* Once a second output the fiq and irq numbers, useful for debug */
 			last_time = jiffies / HZ;
 			DWC_DEBUGPL(DBG_USER, "int_done = %d fiq_done = %d\n", int_done, fiq_done);
 		}
-
-		/* Re-Enable FIQ interrupt from USB peripheral */
-		DWC_MODIFY_REG32((uint32_t *)IO_ADDRESS(USB_BASE + 0x8), 0 , 1);
 	}
 
 	DWC_SPINUNLOCK(dwc_otg_hcd->lock);
@@ -294,13 +662,12 @@ static inline void track_missed_sofs(uint16_t curr_frame_number)
  * (micro)frame. Periodic transactions may be queued to the controller for the
  * next (micro)frame.
  */
-int32_t dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd_t * hcd, int32_t work_expected)
+int32_t dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd_t * hcd)
 {
 	hfnum_data_t hfnum;
 	dwc_list_link_t *qh_entry;
 	dwc_otg_qh_t *qh;
 	dwc_otg_transaction_type_e tr_type;
-	gintsts_data_t gintsts = {.d32 = 0 };
 	int did_something = 0;
 	int32_t next_sched_frame = -1;
 
@@ -326,6 +693,7 @@ int32_t dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd_t * hcd, int32_t work_expected)
 		qh = DWC_LIST_ENTRY(qh_entry, dwc_otg_qh_t, qh_list_entry);
 		qh_entry = qh_entry->next;
 		if (dwc_frame_num_le(qh->sched_frame, hcd->frame_number)) {
+
 			/*
 			 * Move QH to the ready list to be executed next
 			 * (micro)frame.
@@ -351,15 +719,10 @@ int32_t dwc_otg_hcd_handle_sof_intr(dwc_otg_hcd_t * hcd, int32_t work_expected)
 		dwc_otg_hcd_queue_transactions(hcd, tr_type);
 		did_something = 1;
 	}
-	if(work_expected && !did_something)
-		DWC_DEBUGPL(DBG_USER, "Nothing to do !! frame = %x, g_next_sched_frame = %x\n", (int) hfnum.b.frnum, g_next_sched_frame);
-	if(!work_expected && did_something)
-		DWC_DEBUGPL(DBG_USER, "Unexpected work done !! frame = %x, g_next_sched_frame = %x\n", (int) hfnum.b.frnum, g_next_sched_frame);
-
 
 	/* Clear interrupt */
-	gintsts.b.sofintr = 1;
-	DWC_WRITE_REG32(&hcd->core_if->core_global_regs->gintsts, gintsts.d32);
+	//gintsts.b.sofintr = 1;
+	//DWC_WRITE_REG32(&hcd->core_if->core_global_regs->gintsts, gintsts.d32);
 
 	return 1;
 }
@@ -643,6 +1006,15 @@ int32_t dwc_otg_hcd_handle_hc_intr(dwc_otg_hcd_t * dwc_otg_hcd)
 
 	haint.d32 = dwc_otg_read_host_all_channels_intr(dwc_otg_hcd->core_if);
 
+	// Overwrite with saved interrupts from fiq handler
+	if(fiq_split_enable)
+	{
+		local_fiq_disable();
+		haint.d32 = haint_saved.d32;
+		haint_saved.d32 = 0;
+		local_fiq_enable();
+	}
+
 	for (i = 0; i < dwc_otg_hcd->core_if->core_params->host_channels; i++) {
 		if (haint.b2.chint & (1 << i)) {
 			retval |= dwc_otg_hcd_handle_hc_n_intr(dwc_otg_hcd, i);
@@ -683,7 +1055,10 @@ static uint32_t get_actual_xfer_length(dwc_hc_t * hc,
 				*short_read = (hctsiz.b.xfersize != 0);
 			}
 		} else if (hc->qh->do_split) {
-			length = qtd->ssplit_out_xfer_count;
+			if(fiq_split_enable)
+				length = split_out_xfersize[hc->hc_num];
+			else
+				length = qtd->ssplit_out_xfer_count;
 		} else {
 			length = hc->xfer_len;
 		}
@@ -727,7 +1102,6 @@ static int update_urb_state_xfer_comp(dwc_hc_t * hc,
 					     DWC_OTG_HC_XFER_COMPLETE,
 					     &short_read);
 
-
 	/* non DWORD-aligned buffer case handling. */
 	if (hc->align_buff && xfer_length && hc->ep_is_in) {
 		dwc_memcpy(urb->buf + urb->actual_length, hc->qh->dw_align_buf,
@@ -930,6 +1304,9 @@ static void release_channel(dwc_otg_hcd_t * hcd,
 	int free_qtd;
 	dwc_irqflags_t flags;
 	dwc_spinlock_t *channel_lock = hcd->channel_lock;
+#ifdef FIQ_DEBUG
+	int endp = qtd->urb ? qtd->urb->pipe_info.ep_num : 0;
+#endif
 
 	DWC_DEBUGPL(DBG_HCDV, "  %s: channel %d, halt_status %d, xfer_len %d\n",
 		    __func__, hc->hc_num, halt_status, hc->xfer_len);
@@ -1008,9 +1385,24 @@ cleanup:
 
 		DWC_SPINLOCK_IRQSAVE(channel_lock, &flags);
 		hcd->available_host_channels++;
+		fiq_print(FIQDBG_PORTHUB, "AHC = %d ", hcd->available_host_channels);
 		DWC_SPINUNLOCK_IRQRESTORE(channel_lock, flags);
 	}
 
+	if(fiq_split_enable && hc->do_split)
+	{
+		if(!(hcd->hub_port[hc->hub_addr] & (1 << hc->port_addr)))
+		{
+			fiq_print(FIQDBG_ERR, "PRTNOTAL");
+			//BUG();
+		}
+
+		hcd->hub_port[hc->hub_addr] &= ~(1 << hc->port_addr);
+		hcd->hub_port_alloc[hc->hub_addr * 16 + hc->port_addr] = -1;
+
+		fiq_print(FIQDBG_PORTHUB, "H%dP%d:RR%d", hc->hub_addr, hc->port_addr, endp);
+	}
+
 	/* Try to queue more transfers now that there's a free channel. */
 	tr_type = dwc_otg_hcd_select_transactions(hcd);
 	if (tr_type != DWC_OTG_TRANSACTION_NONE) {
@@ -1633,8 +2025,10 @@ static int32_t handle_hc_nyet_intr(dwc_otg_hcd_t * hcd,
 		    hc->ep_type == DWC_OTG_EP_TYPE_ISOC) {
 			int frnum = dwc_otg_hcd_get_frame_number(hcd);
 
+			// With the FIQ running we only ever see the failed NYET
 			if (dwc_full_frame_num(frnum) !=
-			    dwc_full_frame_num(hc->qh->sched_frame)) {
+			    dwc_full_frame_num(hc->qh->sched_frame) ||
+			    fiq_split_enable) {
 				/*
 				 * No longer in the same full speed frame.
 				 * Treat this as a transaction error.
@@ -2012,10 +2406,10 @@ static inline int halt_status_ok(dwc_otg_hcd_t * hcd,
 static void handle_hc_chhltd_intr_dma(dwc_otg_hcd_t * hcd,
 				      dwc_hc_t * hc,
 				      dwc_otg_hc_regs_t * hc_regs,
-				      dwc_otg_qtd_t * qtd)
+				      dwc_otg_qtd_t * qtd,
+				      hcint_data_t hcint,
+				      hcintmsk_data_t hcintmsk)
 {
-	hcint_data_t hcint;
-	hcintmsk_data_t hcintmsk;
 	int out_nak_enh = 0;
 
 	/* For core with OUT NAK enhancement, the flow for high-
@@ -2047,8 +2441,11 @@ static void handle_hc_chhltd_intr_dma(dwc_otg_hcd_t * hcd,
 	}
 
 	/* Read the HCINTn register to determine the cause for the halt. */
-	hcint.d32 = DWC_READ_REG32(&hc_regs->hcint);
-	hcintmsk.d32 = DWC_READ_REG32(&hc_regs->hcintmsk);
+	if(!fiq_split_enable)
+	{
+		hcint.d32 = DWC_READ_REG32(&hc_regs->hcint);
+		hcintmsk.d32 = DWC_READ_REG32(&hc_regs->hcintmsk);
+	}
 
 	if (hcint.b.xfercomp) {
 		/** @todo This is here because of a possible hardware bug.  Spec
@@ -2161,13 +2558,15 @@ static void handle_hc_chhltd_intr_dma(dwc_otg_hcd_t * hcd,
 static int32_t handle_hc_chhltd_intr(dwc_otg_hcd_t * hcd,
 				     dwc_hc_t * hc,
 				     dwc_otg_hc_regs_t * hc_regs,
-				     dwc_otg_qtd_t * qtd)
+				     dwc_otg_qtd_t * qtd,
+				     hcint_data_t hcint,
+				     hcintmsk_data_t hcintmsk)
 {
 	DWC_DEBUGPL(DBG_HCDI, "--Host Channel %d Interrupt: "
 		    "Channel Halted--\n", hc->hc_num);
 
 	if (hcd->core_if->dma_enable) {
-		handle_hc_chhltd_intr_dma(hcd, hc, hc_regs, qtd);
+		handle_hc_chhltd_intr_dma(hcd, hc, hc_regs, qtd, hcint, hcintmsk);
 	} else {
 #ifdef DEBUG
 		if (!halt_status_ok(hcd, hc, hc_regs, qtd)) {
@@ -2184,7 +2583,7 @@ static int32_t handle_hc_chhltd_intr(dwc_otg_hcd_t * hcd,
 int32_t dwc_otg_hcd_handle_hc_n_intr(dwc_otg_hcd_t * dwc_otg_hcd, uint32_t num)
 {
 	int retval = 0;
-	hcint_data_t hcint;
+	hcint_data_t hcint, hcint_orig;
 	hcintmsk_data_t hcintmsk;
 	dwc_hc_t *hc;
 	dwc_otg_hc_regs_t *hc_regs;
@@ -2197,12 +2596,23 @@ int32_t dwc_otg_hcd_handle_hc_n_intr(dwc_otg_hcd_t * dwc_otg_hcd, uint32_t num)
 	qtd = DWC_CIRCLEQ_FIRST(&hc->qh->qtd_list);
 
 	hcint.d32 = DWC_READ_REG32(&hc_regs->hcint);
+	hcint_orig = hcint;
 	hcintmsk.d32 = DWC_READ_REG32(&hc_regs->hcintmsk);
 	DWC_DEBUGPL(DBG_HCDV,
 		    "  hcint 0x%08x, hcintmsk 0x%08x, hcint&hcintmsk 0x%08x\n",
 		    hcint.d32, hcintmsk.d32, (hcint.d32 & hcintmsk.d32));
 	hcint.d32 = hcint.d32 & hcintmsk.d32;
 
+	if(fiq_split_enable)
+	{
+		// replace with the saved interrupts from the fiq handler
+		local_fiq_disable();
+		hcint_orig.d32 = hcint_saved[num].d32;
+		hcint.d32 = hcint_orig.d32 & hcintmsk_saved[num].d32;
+		hcint_saved[num].d32 = 0;
+		local_fiq_enable();
+	}
+
 	if (!dwc_otg_hcd->core_if->dma_enable) {
 		if (hcint.b.chhltd && hcint.d32 != 0x2) {
 			hcint.b.chhltd = 0;
@@ -2220,7 +2630,7 @@ int32_t dwc_otg_hcd_handle_hc_n_intr(dwc_otg_hcd_t * dwc_otg_hcd, uint32_t num)
 		hcint.b.nyet = 0;
 	}
 	if (hcint.b.chhltd) {
-		retval |= handle_hc_chhltd_intr(dwc_otg_hcd, hc, hc_regs, qtd);
+		retval |= handle_hc_chhltd_intr(dwc_otg_hcd, hc, hc_regs, qtd, hcint_orig, hcintmsk_saved[num]);
 	}
 	if (hcint.b.ahberr) {
 		retval |= handle_hc_ahberr_intr(dwc_otg_hcd, hc, hc_regs, qtd);
diff --git a/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c b/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c
index fef557d..0d6f5f4 100644
--- a/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c
@@ -392,7 +392,11 @@ static struct dwc_otg_hcd_function_ops hcd_fops = {
 static struct fiq_handler fh = {
   .name = "usb_fiq",
 };
-static uint8_t fiqStack[1024];
+struct fiq_stack_s {
+	int magic1;
+	uint8_t stack[2048];
+	int magic2;
+} fiq_stack;
 
 extern mphi_regs_t c_mphi_regs;
 /**
@@ -434,9 +438,11 @@ int hcd_init(dwc_bus_dev_t *_dev)
 		memset(&regs,0,sizeof(regs));
 		regs.ARM_r8 = (long)dwc_otg_hcd_handle_fiq;
 		regs.ARM_r9 = (long)0;
-		regs.ARM_sp = (long)fiqStack + sizeof(fiqStack) - 4;
+		regs.ARM_sp = (long)fiq_stack.stack + sizeof(fiq_stack.stack) - 4;
 		set_fiq_regs(&regs);
-		}
+		fiq_stack.magic1 = 0xdeadbeef;
+		fiq_stack.magic2 = 0xaa995566;
+	}
 
 	/*
 	 * Allocate memory for the base HCD plus the DWC OTG HCD.
@@ -459,6 +465,8 @@ int hcd_init(dwc_bus_dev_t *_dev)
 
 	if (fiq_fix_enable)
 	{
+		volatile extern void *dwc_regs_base;
+
 		//Set the mphi periph to  the required registers
 		c_mphi_regs.base    = otg_dev->os_dep.mphi_base;
 		c_mphi_regs.ctrl    = otg_dev->os_dep.mphi_base + 0x4c;
@@ -466,6 +474,8 @@ int hcd_init(dwc_bus_dev_t *_dev)
 		c_mphi_regs.outddb  = otg_dev->os_dep.mphi_base + 0x2c;
 		c_mphi_regs.intstat = otg_dev->os_dep.mphi_base + 0x50;
 
+		dwc_regs_base = otg_dev->os_dep.base;
+
 		//Enable mphi peripheral
 		writel((1<<31),c_mphi_regs.ctrl);
 #ifdef DEBUG
@@ -839,6 +849,8 @@ static int dwc_otg_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                 usb_hcd_unlink_urb_from_ep(hcd, urb);
 #endif
         	DWC_SPINUNLOCK_IRQRESTORE(dwc_otg_hcd->lock, flags);
+
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
                 usb_hcd_giveback_urb(hcd, urb);
 #else
diff --git a/drivers/usb/host/dwc_otg/dwc_otg_hcd_queue.c b/drivers/usb/host/dwc_otg/dwc_otg_hcd_queue.c
index b3e6e52..8125307 100644
--- a/drivers/usb/host/dwc_otg/dwc_otg_hcd_queue.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd_queue.c
@@ -41,6 +41,7 @@
 
 #include "dwc_otg_hcd.h"
 #include "dwc_otg_regs.h"
+#include "dwc_otg_mphi_fix.h"
 
 extern bool microframe_schedule;
 
@@ -191,6 +192,7 @@ void qh_init(dwc_otg_hcd_t * hcd, dwc_otg_qh_t * qh, dwc_otg_hcd_urb_t * urb)
 			    dwc_otg_hcd_get_ep_num(&urb->pipe_info), hub_addr,
 			    hub_port);
 		qh->do_split = 1;
+		qh->skip_count = 0;
 	}
 
 	if (qh->ep_type == UE_INTERRUPT || qh->ep_type == UE_ISOCHRONOUS) {
@@ -737,6 +739,9 @@ void dwc_otg_hcd_qh_remove(dwc_otg_hcd_t * hcd, dwc_otg_qh_t * qh)
 			    hcd->non_periodic_qh_ptr->next;
 		}
 		DWC_LIST_REMOVE_INIT(&qh->qh_list_entry);
+
+		// If we've removed the last non-periodic entry then there are none left!
+		g_np_count = g_np_sent;
 	} else {
 		deschedule_periodic(hcd, qh);
 		hcd->periodic_qh_count--;
@@ -766,21 +771,21 @@ void dwc_otg_hcd_qh_deactivate(dwc_otg_hcd_t * hcd, dwc_otg_qh_t * qh,
 {	
 	if (dwc_qh_is_non_per(qh)) {
 
-	dwc_otg_qh_t *qh_tmp;
-	dwc_list_link_t *qh_list;
-	DWC_LIST_FOREACH(qh_list, &hcd->non_periodic_sched_inactive)
-	{
-		qh_tmp = DWC_LIST_ENTRY(qh_list, struct dwc_otg_qh, qh_list_entry);
-		if(qh_tmp == qh)
+		dwc_otg_qh_t *qh_tmp;
+		dwc_list_link_t *qh_list;
+		DWC_LIST_FOREACH(qh_list, &hcd->non_periodic_sched_inactive)
 		{
-			/*
-			 *  FIQ is being disabled because this one nevers gets a np_count increment
-			 *  This is still not absolutely correct, but it should fix itself with
-			 *  just an unnecessary extra interrupt
-			 */
-			g_np_sent = g_np_count;
+			qh_tmp = DWC_LIST_ENTRY(qh_list, struct dwc_otg_qh, qh_list_entry);
+			if(qh_tmp == qh)
+			{
+				/*
+				 *  FIQ is being disabled because this one nevers gets a np_count increment
+				 *  This is still not absolutely correct, but it should fix itself with
+				 *  just an unnecessary extra interrupt
+				 */
+				g_np_sent = g_np_count;
+			}
 		}
-	}
 
 
 		dwc_otg_hcd_qh_remove(hcd, qh);
diff --git a/drivers/usb/host/dwc_otg/dwc_otg_mphi_fix.h b/drivers/usb/host/dwc_otg/dwc_otg_mphi_fix.h
index 22f28e1..ca17379 100755
--- a/drivers/usb/host/dwc_otg/dwc_otg_mphi_fix.h
+++ b/drivers/usb/host/dwc_otg/dwc_otg_mphi_fix.h
@@ -1,10 +1,7 @@
 #ifndef __DWC_OTG_MPHI_FIX_H__
 #define __DWC_OTG_MPHI_FIX_H__
-
-#define FIQ_WRITE_IO_ADDRESS(_addr_,_data_) *(volatile uint32_t *) IO_ADDRESS(_addr_) = _data_
-#define FIQ_READ_IO_ADDRESS(_addr_) *(volatile uint32_t *) IO_ADDRESS(_addr_) 
-#define FIQ_MODIFY_IO_ADDRESS(_addr_,_clear_,_set_) FIQ_WRITE_IO_ADDRESS(_addr_ , (FIQ_READ_IO_ADDRESS(_addr_)&~_clear_)|_set_)
-#define FIQ_WRITE(_addr_,_data_) *(volatile uint32_t *) _addr_ = _data_
+#define FIQ_WRITE(_addr_,_data_) (*(volatile uint32_t *) (_addr_) = (_data_))
+#define FIQ_READ(_addr_) (*(volatile uint32_t *) (_addr_))
 
 typedef struct {
 	volatile void* base;
@@ -12,13 +9,13 @@ typedef struct {
 	volatile void* outdda;
 	volatile void* outddb;
 	volatile void* intstat;
-} mphi_regs_t; 
+} mphi_regs_t;
 
 void dwc_debug_print_core_int_reg(gintsts_data_t gintsts, const char* function_name);
 void dwc_debug_core_int_mask(gintsts_data_t gintmsk, const char* function_name);
 void dwc_debug_otg_int(gotgint_data_t gotgint, const char* function_name);
 
-
+extern gintsts_data_t gintsts_saved;
 
 #ifdef DEBUG
 #define DWC_DBG_PRINT_CORE_INT(_arg_) dwc_debug_print_core_int_reg(_arg_,__func__)
@@ -30,7 +27,22 @@ void dwc_debug_otg_int(gotgint_data_t gotgint, const char* function_name);
 #define DWC_DBG_PRINT_CORE_INT_MASK(_arg_)
 #define DWC_DBG_PRINT_OTG_INT(_arg_)
 
+#endif
+
+typedef enum {
+	FIQDBG_SCHED = (1 << 0),
+	FIQDBG_INT   = (1 << 1),
+	FIQDBG_ERR   = (1 << 2),
+	FIQDBG_PORTHUB = (1 << 3),
+} FIQDBG_T;
 
+void _fiq_print(FIQDBG_T dbg_lvl, char *fmt, ...);
+#ifdef FIQ_DEBUG
+#define fiq_print _fiq_print
+#else
+#define fiq_print(x, y, ...)
 #endif
 
+extern bool fiq_fix_enable, nak_holdoff_enable, fiq_split_enable;
+
 #endif
diff --git a/drivers/usb/host/dwc_otg/dwc_otg_pcd_intr.c b/drivers/usb/host/dwc_otg/dwc_otg_pcd_intr.c
index 27061d3..9720937 100644
--- a/drivers/usb/host/dwc_otg/dwc_otg_pcd_intr.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_pcd_intr.c
@@ -4276,7 +4276,7 @@ do { \
 									&& (pcd->ep0state == EP0_OUT_DATA_PHASE))
 									status.d32 = core_if->dev_if->out_desc_addr->status.d32;
 								if (pcd->ep0state == EP0_OUT_STATUS_PHASE)
-									status.d32 = status.d32 = core_if->dev_if->
+									status.d32 = core_if->dev_if->
 									out_desc_addr->status.d32;
 
 								if (status.b.sr) {
-- 
1.9.1