From 11a3d270436137fa485257eab8153ce5d307cfa7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 12 Mar 2018 20:51:55 +0100 Subject: kernel: add hardware offload patch for flow tables support Supports offloading through VLAN, bridge and PPPoE devices as well Signed-off-by: Felix Fietkau --- ...f_flow_table-add-hardware-offload-support.patch | 580 +++++++++++++++++++++ 1 file changed, 580 insertions(+) create mode 100644 target/linux/generic/pending-4.14/640-netfilter-nf_flow_table-add-hardware-offload-support.patch (limited to 'target/linux/generic/pending-4.14/640-netfilter-nf_flow_table-add-hardware-offload-support.patch') diff --git a/target/linux/generic/pending-4.14/640-netfilter-nf_flow_table-add-hardware-offload-support.patch b/target/linux/generic/pending-4.14/640-netfilter-nf_flow_table-add-hardware-offload-support.patch new file mode 100644 index 0000000..7dc4a01 --- /dev/null +++ b/target/linux/generic/pending-4.14/640-netfilter-nf_flow_table-add-hardware-offload-support.patch @@ -0,0 +1,580 @@ +From: Pablo Neira Ayuso +Date: Thu, 11 Jan 2018 16:32:00 +0100 +Subject: [PATCH] netfilter: nf_flow_table: add hardware offload support + +This patch adds the infrastructure to offload flows to hardware, in case +the nic/switch comes with built-in flow tables capabilities. + +If the hardware comes with no hardware flow tables or they have +limitations in terms of features, the existing infrastructure falls back +to the software flow table implementation. + +The software flow table garbage collector skips entries that resides in +the hardware, so the hardware will be responsible for releasing this +flow table entry too via flow_offload_dead(). + +Hardware configuration, either to add or to delete entries, is done from +the hardware offload workqueue, to ensure this is done from user context +given that we may sleep when grabbing the mdio mutex. + +Signed-off-by: Pablo Neira Ayuso +--- + create mode 100644 net/netfilter/nf_flow_table_hw.c + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -826,6 +826,13 @@ struct xfrmdev_ops { + }; + #endif + ++struct flow_offload; ++ ++enum flow_offload_type { ++ FLOW_OFFLOAD_ADD = 0, ++ FLOW_OFFLOAD_DEL, ++}; ++ + /* + * This structure defines the management hooks for network devices. + * The following hooks can be defined; unless noted otherwise, they are +@@ -1057,6 +1064,10 @@ struct xfrmdev_ops { + * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, + * u16 flags); + * ++ * int (*ndo_flow_offload)(enum flow_offload_type type, ++ * struct flow_offload *flow); ++ * Adds/deletes flow entry to/from net device flowtable. ++ * + * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); + * Called to change device carrier. Soft-devices (like dummy, team, etc) + * which do not represent real hardware may define this to allow their +@@ -1281,6 +1292,8 @@ struct net_device_ops { + int (*ndo_bridge_dellink)(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags); ++ int (*ndo_flow_offload)(enum flow_offload_type type, ++ struct flow_offload *flow); + int (*ndo_change_carrier)(struct net_device *dev, + bool new_carrier); + int (*ndo_get_phys_port_id)(struct net_device *dev, +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -20,11 +20,17 @@ struct nf_flowtable_type { + struct module *owner; + }; + ++enum nf_flowtable_flags { ++ NF_FLOWTABLE_F_HW = 0x1, ++}; ++ + struct nf_flowtable { + struct list_head list; + struct rhashtable rhashtable; + const struct nf_flowtable_type *type; ++ u32 flags; + struct delayed_work gc_work; ++ possible_net_t ft_net; + }; + + enum flow_offload_tuple_dir { +@@ -69,6 +75,7 @@ struct flow_offload_tuple_rhash { + #define FLOW_OFFLOAD_DNAT 0x2 + #define FLOW_OFFLOAD_DYING 0x4 + #define FLOW_OFFLOAD_TEARDOWN 0x8 ++#define FLOW_OFFLOAD_HW 0x10 + + struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; +@@ -126,6 +133,22 @@ unsigned int nf_flow_offload_ip_hook(voi + unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + ++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct); ++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow); ++ ++struct nf_flow_table_hw { ++ struct module *owner; ++ void (*add)(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct); ++ void (*del)(struct net *net, struct flow_offload *flow); ++}; ++ ++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload); ++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload); ++ ++extern struct work_struct nf_flow_offload_hw_work; ++ + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ + MODULE_ALIAS("nf-flowtable-" __stringify(family)) + +--- a/include/uapi/linux/netfilter/nf_tables.h ++++ b/include/uapi/linux/netfilter/nf_tables.h +@@ -1341,6 +1341,7 @@ enum nft_object_attributes { + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) + * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64) ++ * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32) + */ + enum nft_flowtable_attributes { + NFTA_FLOWTABLE_UNSPEC, +@@ -1350,6 +1351,7 @@ enum nft_flowtable_attributes { + NFTA_FLOWTABLE_USE, + NFTA_FLOWTABLE_HANDLE, + NFTA_FLOWTABLE_PAD, ++ NFTA_FLOWTABLE_FLAGS, + __NFTA_FLOWTABLE_MAX + }; + #define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -686,6 +686,15 @@ config NF_FLOW_TABLE + + To compile it as a module, choose M here. + ++config NF_FLOW_TABLE_HW ++ tristate "Netfilter flow table hardware offload module" ++ depends on NF_FLOW_TABLE ++ help ++ This option adds hardware offload support for the flow table core ++ infrastructure. ++ ++ To compile it as a module, choose M here. ++ + config NETFILTER_XTABLES + tristate "Netfilter Xtables support (required for ip_tables)" + default m if NETFILTER_ADVANCED=n +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -116,6 +116,7 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_t + nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o + + obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o ++obj-$(CONFIG_NF_FLOW_TABLE_HW) += nf_flow_table_hw.o + + # generic X tables + obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -199,10 +199,16 @@ int flow_offload_add(struct nf_flowtable + } + EXPORT_SYMBOL_GPL(flow_offload_add); + ++static inline bool nf_flow_in_hw(const struct flow_offload *flow) ++{ ++ return flow->flags & FLOW_OFFLOAD_HW; ++} ++ + static void flow_offload_del(struct nf_flowtable *flow_table, + struct flow_offload *flow) + { + struct flow_offload_entry *e; ++ struct net *net = read_pnet(&flow_table->ft_net); + + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, +@@ -214,6 +220,9 @@ static void flow_offload_del(struct nf_f + e = container_of(flow, struct flow_offload_entry, flow); + clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); + ++ if (nf_flow_in_hw(flow)) ++ nf_flow_offload_hw_del(net, flow); ++ + flow_offload_free(flow); + } + +@@ -307,6 +316,7 @@ static int nf_flow_offload_gc_step(struc + rhashtable_walk_start(&hti); + + while ((tuplehash = rhashtable_walk_next(&hti))) { ++ bool teardown; + if (IS_ERR(tuplehash)) { + err = PTR_ERR(tuplehash); + if (err != -EAGAIN) +@@ -319,9 +329,13 @@ static int nf_flow_offload_gc_step(struc + + flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + +- if (nf_flow_has_expired(flow) || +- (flow->flags & (FLOW_OFFLOAD_DYING | +- FLOW_OFFLOAD_TEARDOWN))) ++ teardown = flow->flags & (FLOW_OFFLOAD_DYING | ++ FLOW_OFFLOAD_TEARDOWN); ++ ++ if (nf_flow_in_hw(flow) && !teardown) ++ continue; ++ ++ if (nf_flow_has_expired(flow) || teardown) + flow_offload_del(flow_table, flow); + } + out: +@@ -456,10 +470,43 @@ int nf_flow_dnat_port(const struct flow_ + } + EXPORT_SYMBOL_GPL(nf_flow_dnat_port); + ++static const struct nf_flow_table_hw __rcu *nf_flow_table_hw_hook __read_mostly; ++ ++static int nf_flow_offload_hw_init(struct nf_flowtable *flow_table) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ if (!rcu_access_pointer(nf_flow_table_hw_hook)) ++ request_module("nf-flow-table-hw"); ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (!offload) ++ goto err_no_hw_offload; ++ ++ if (!try_module_get(offload->owner)) ++ goto err_no_hw_offload; ++ ++ rcu_read_unlock(); ++ ++ return 0; ++ ++err_no_hw_offload: ++ rcu_read_unlock(); ++ ++ return -EOPNOTSUPP; ++} ++ + int nf_flow_table_init(struct nf_flowtable *flowtable) + { + int err; + ++ if (flowtable->flags & NF_FLOWTABLE_F_HW) { ++ err = nf_flow_offload_hw_init(flowtable); ++ if (err) ++ return err; ++ } ++ + INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); + + err = rhashtable_init(&flowtable->rhashtable, +@@ -497,6 +544,8 @@ static void nf_flow_table_iterate_cleanu + { + nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); + flush_delayed_work(&flowtable->gc_work); ++ if (flowtable->flags & NF_FLOWTABLE_F_HW) ++ flush_work(&nf_flow_offload_hw_work); + } + + void nf_flow_table_cleanup(struct net *net, struct net_device *dev) +@@ -510,6 +559,26 @@ void nf_flow_table_cleanup(struct net *n + } + EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); + ++struct work_struct nf_flow_offload_hw_work; ++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_work); ++ ++/* Give the hardware workqueue the chance to remove entries from hardware.*/ ++static void nf_flow_offload_hw_free(struct nf_flowtable *flowtable) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ flush_work(&nf_flow_offload_hw_work); ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (!offload) { ++ rcu_read_unlock(); ++ return; ++ } ++ module_put(offload->owner); ++ rcu_read_unlock(); ++} ++ + void nf_flow_table_free(struct nf_flowtable *flow_table) + { + mutex_lock(&flowtable_lock); +@@ -519,9 +588,58 @@ void nf_flow_table_free(struct nf_flowta + nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); + WARN_ON(!nf_flow_offload_gc_step(flow_table)); + rhashtable_destroy(&flow_table->rhashtable); ++ if (flow_table->flags & NF_FLOWTABLE_F_HW) ++ nf_flow_offload_hw_free(flow_table); + } + EXPORT_SYMBOL_GPL(nf_flow_table_free); + ++/* Must be called from user context. */ ++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (offload) ++ offload->add(net, flow, ct); ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_add); ++ ++/* Must be called from user context. */ ++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (offload) ++ offload->del(net, flow); ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_del); ++ ++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload) ++{ ++ if (rcu_access_pointer(nf_flow_table_hw_hook)) ++ return -EBUSY; ++ ++ rcu_assign_pointer(nf_flow_table_hw_hook, offload); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_hw_register); ++ ++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload) ++{ ++ WARN_ON(rcu_access_pointer(nf_flow_table_hw_hook) != offload); ++ rcu_assign_pointer(nf_flow_table_hw_hook, NULL); ++ ++ synchronize_rcu(); ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_hw_unregister); ++ + static int nf_flow_table_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) + { +--- /dev/null ++++ b/net/netfilter/nf_flow_table_hw.c +@@ -0,0 +1,169 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static DEFINE_SPINLOCK(flow_offload_hw_pending_list_lock); ++static LIST_HEAD(flow_offload_hw_pending_list); ++ ++static DEFINE_MUTEX(nf_flow_offload_hw_mutex); ++ ++struct flow_offload_hw { ++ struct list_head list; ++ enum flow_offload_type type; ++ struct flow_offload *flow; ++ struct nf_conn *ct; ++ possible_net_t flow_hw_net; ++}; ++ ++static int do_flow_offload_hw(struct net *net, struct flow_offload *flow, ++ int type) ++{ ++ struct net_device *indev; ++ int ret, ifindex; ++ ++ ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx; ++ indev = dev_get_by_index(net, ifindex); ++ if (WARN_ON(!indev)) ++ return 0; ++ ++ mutex_lock(&nf_flow_offload_hw_mutex); ++ ret = indev->netdev_ops->ndo_flow_offload(type, flow); ++ mutex_unlock(&nf_flow_offload_hw_mutex); ++ ++ dev_put(indev); ++ ++ return ret; ++} ++ ++static void flow_offload_hw_work_add(struct flow_offload_hw *offload) ++{ ++ struct net *net; ++ int ret; ++ ++ if (nf_ct_is_dying(offload->ct)) ++ return; ++ ++ net = read_pnet(&offload->flow_hw_net); ++ ret = do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_ADD); ++ if (ret >= 0) ++ offload->flow->flags |= FLOW_OFFLOAD_HW; ++} ++ ++static void flow_offload_hw_work_del(struct flow_offload_hw *offload) ++{ ++ struct net *net = read_pnet(&offload->flow_hw_net); ++ ++ do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_DEL); ++} ++ ++static void flow_offload_hw_work(struct work_struct *work) ++{ ++ struct flow_offload_hw *offload, *next; ++ LIST_HEAD(hw_offload_pending); ++ ++ spin_lock_bh(&flow_offload_hw_pending_list_lock); ++ list_replace_init(&flow_offload_hw_pending_list, &hw_offload_pending); ++ spin_unlock_bh(&flow_offload_hw_pending_list_lock); ++ ++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) { ++ switch (offload->type) { ++ case FLOW_OFFLOAD_ADD: ++ flow_offload_hw_work_add(offload); ++ break; ++ case FLOW_OFFLOAD_DEL: ++ flow_offload_hw_work_del(offload); ++ break; ++ } ++ if (offload->ct) ++ nf_conntrack_put(&offload->ct->ct_general); ++ list_del(&offload->list); ++ kfree(offload); ++ } ++} ++ ++static void flow_offload_queue_work(struct flow_offload_hw *offload) ++{ ++ spin_lock_bh(&flow_offload_hw_pending_list_lock); ++ list_add_tail(&offload->list, &flow_offload_hw_pending_list); ++ spin_unlock_bh(&flow_offload_hw_pending_list_lock); ++ ++ schedule_work(&nf_flow_offload_hw_work); ++} ++ ++static void flow_offload_hw_add(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct) ++{ ++ struct flow_offload_hw *offload; ++ ++ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC); ++ if (!offload) ++ return; ++ ++ nf_conntrack_get(&ct->ct_general); ++ offload->type = FLOW_OFFLOAD_ADD; ++ offload->ct = ct; ++ offload->flow = flow; ++ write_pnet(&offload->flow_hw_net, net); ++ ++ flow_offload_queue_work(offload); ++} ++ ++static void flow_offload_hw_del(struct net *net, struct flow_offload *flow) ++{ ++ struct flow_offload_hw *offload; ++ ++ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC); ++ if (!offload) ++ return; ++ ++ offload->type = FLOW_OFFLOAD_DEL; ++ offload->ct = NULL; ++ offload->flow = flow; ++ write_pnet(&offload->flow_hw_net, net); ++ ++ flow_offload_queue_work(offload); ++} ++ ++static const struct nf_flow_table_hw flow_offload_hw = { ++ .add = flow_offload_hw_add, ++ .del = flow_offload_hw_del, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init nf_flow_table_hw_module_init(void) ++{ ++ INIT_WORK(&nf_flow_offload_hw_work, flow_offload_hw_work); ++ nf_flow_table_hw_register(&flow_offload_hw); ++ ++ return 0; ++} ++ ++static void __exit nf_flow_table_hw_module_exit(void) ++{ ++ struct flow_offload_hw *offload, *next; ++ LIST_HEAD(hw_offload_pending); ++ ++ nf_flow_table_hw_unregister(&flow_offload_hw); ++ cancel_work_sync(&nf_flow_offload_hw_work); ++ ++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) { ++ if (offload->ct) ++ nf_conntrack_put(&offload->ct->ct_general); ++ list_del(&offload->list); ++ kfree(offload); ++ } ++} ++ ++module_init(nf_flow_table_hw_module_init); ++module_exit(nf_flow_table_hw_module_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso "); ++MODULE_ALIAS("nf-flow-table-hw"); +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4866,6 +4866,14 @@ static int nf_tables_flowtable_parse_hoo + if (err < 0) + goto err1; + ++ for (i = 0; i < n; i++) { ++ if (flowtable->data.flags & NF_FLOWTABLE_F_HW && ++ !dev_array[i]->netdev_ops->ndo_flow_offload) { ++ err = -EOPNOTSUPP; ++ goto err1; ++ } ++ } ++ + ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL); + if (!ops) { + err = -ENOMEM; +@@ -4996,10 +5004,19 @@ static int nf_tables_newflowtable(struct + } + + flowtable->data.type = type; ++ write_pnet(&flowtable->data.ft_net, net); ++ + err = type->init(&flowtable->data); + if (err < 0) + goto err3; + ++ if (nla[NFTA_FLOWTABLE_FLAGS]) { ++ flowtable->data.flags = ++ ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); ++ if (flowtable->data.flags & ~NF_FLOWTABLE_F_HW) ++ goto err4; ++ } ++ + err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], + flowtable); + if (err < 0) +@@ -5097,7 +5114,8 @@ static int nf_tables_fill_flowtable_info + nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || + nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || + nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle), +- NFTA_FLOWTABLE_PAD)) ++ NFTA_FLOWTABLE_PAD) || ++ nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags))) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -121,6 +121,9 @@ static void nft_flow_offload_eval(const + if (ret < 0) + goto err_flow_add; + ++ if (flowtable->flags & NF_FLOWTABLE_F_HW) ++ nf_flow_offload_hw_add(nft_net(pkt), flow, ct); ++ + return; + + err_flow_add: -- cgit v1.1