diff --git a/.gitignore b/.gitignore index cbe03d8..0c0e452 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ modules.order Module.symvers compile_commands.json + +.vscode/ diff --git a/hwmon/xmc.c b/hwmon/xmc.c index 8e19db2..1c418ac 100644 --- a/hwmon/xmc.c +++ b/hwmon/xmc.c @@ -542,6 +542,9 @@ struct xocl_xmc { struct xocl_xmc_privdata *priv_data; }; +int xmc_probe(struct platform_device *pdev); +int xocl_init_xmc(void); +void xocl_fini_xmc(void); #ifdef XMC_MB static int load_xmc(struct xocl_xmc *xmc); diff --git a/hwmon/xocl_ctx.c b/hwmon/xocl_ctx.c index ba3bdf6..0692241 100644 --- a/hwmon/xocl_ctx.c +++ b/hwmon/xocl_ctx.c @@ -22,7 +22,7 @@ */ DEFINE_MUTEX(xocl_drvinst_lock); struct xocl_drvinst *xocl_drvinst_array[XOCL_MAX_DEVICES * 64]; - + void xocl_drvinst_close(void *data); void *xocl_drvinst_alloc(struct device *dev, u32 size) { struct xocl_drvinst *drvinstp = NULL; diff --git a/onic.h b/onic.h index 2b11c11..1fcc057 100644 --- a/onic.h +++ b/onic.h @@ -19,6 +19,9 @@ #include #include +#include +#include +#include #include "onic_hardware.h" @@ -31,8 +34,24 @@ /* flag bits */ #define ONIC_FLAG_MASTER_PF 0 +/* XDP */ +#define ONIC_XDP_PASS BIT(0) +#define ONIC_XDP_CONSUMED BIT(1) +#define ONIC_XDP_TX BIT(2) +#define ONIC_XDP_REDIR BIT(3) + +enum onic_tx_buf_type { + ONIC_TX_SKB = BIT(0), + ONIC_TX_XDPF = BIT(1), + ONIC_TX_XDPF_XMIT = BIT(2), +}; + struct onic_tx_buffer { - struct sk_buff *skb; + enum onic_tx_buf_type type; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; dma_addr_t dma_addr; u32 len; u64 time_stamp; @@ -66,6 +85,11 @@ struct onic_tx_queue { struct onic_tx_buffer *buffer; struct onic_ring ring; struct onic_q_vector *vector; + + struct { + u64 xdp_xmit; + u64 xdp_xmit_err; + } xdp_tx_stats; }; struct onic_rx_queue { @@ -78,6 +102,18 @@ struct onic_rx_queue { struct onic_q_vector *vector; struct napi_struct napi; + struct bpf_prog *xdp_prog; + struct xdp_rxq_info xdp_rxq; + struct page_pool *page_pool; + + struct { + u64 xdp_redirect; + u64 xdp_pass; + u64 xdp_drop; + u64 xdp_tx; + u64 xdp_tx_err; + } xdp_rx_stats; + }; struct onic_q_vector { @@ -87,6 +123,7 @@ struct onic_q_vector { int numa_node; }; + /** * struct onic_private - OpenNIC driver private data **/ @@ -104,7 +141,8 @@ struct onic_private { u16 num_rx_queues; struct net_device *netdev; - struct rtnl_link_stats64 netdev_stats; + struct bpf_prog *xdp_prog; + struct rtnl_link_stats64 *netdev_stats; spinlock_t tx_lock; spinlock_t rx_lock; diff --git a/onic_common.c b/onic_common.c index 6943d87..7a1bbc9 100644 --- a/onic_common.c +++ b/onic_common.c @@ -16,6 +16,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ +#include #include "onic_common.h" void print_raw_data(const u8 *data, u32 len) diff --git a/onic_ethtool.c b/onic_ethtool.c index 20cf5f9..a9ad108 100644 --- a/onic_ethtool.c +++ b/onic_ethtool.c @@ -14,17 +14,31 @@ * The full GNU General Public License is included in this distribution in * the file called "COPYING". */ +#include #include #include #include +#include #include "onic.h" #include "onic_register.h" extern const char onic_drv_name[]; extern const char onic_drv_ver[]; +void onic_set_ethtool_ops(struct net_device *netdev); +// netdev stats are stats kept by the driver, like xdp stats, onic_stats are kept in the NIC and accessed via the on-board registers +enum { NETDEV_STATS, ONIC_STATS }; + +enum { + ETHTOOL_XDP_REDIRECT, + ETHTOOL_XDP_PASS, + ETHTOOL_XDP_DROP, + ETHTOOL_XDP_TX, + ETHTOOL_XDP_TX_ERR, + ETHTOOL_XDP_XMIT, + ETHTOOL_XDP_XMIT_ERR, +}; -enum {NETDEV_STATS, ONIC_STATS}; struct onic_stats { char stat_string[ETH_GSTRING_LEN]; @@ -34,7 +48,7 @@ struct onic_stats { int stat1_offset; }; -#define _STAT(_name, _stat0, _stat1) { \ +#define _STAT_ONIC(_name, _stat0, _stat1) { \ .stat_string = _name, \ .type = ONIC_STATS, \ .sizeof_stat = sizeof(u32), \ @@ -42,184 +56,220 @@ struct onic_stats { .stat1_offset = _stat1, \ } + +#define _STAT_NETDEV(_name,_stat) {\ + .stat_string = _name, \ + .type = NETDEV_STATS, \ + .sizeof_stat = sizeof(u64), \ + .stat0_offset = _stat, \ + .stat1_offset = _stat, \ +} + static const struct onic_stats onic_gstrings_stats[] = { - _STAT("stat_tx_total_pkts", + _STAT_ONIC("stat_tx_total_pkts", CMAC_OFFSET_STAT_TX_TOTAL_PKTS(0), CMAC_OFFSET_STAT_TX_TOTAL_PKTS(1)), - _STAT("stat_tx_total_good_pkts", + _STAT_ONIC("stat_tx_total_good_pkts", CMAC_OFFSET_STAT_TX_TOTAL_GOOD_PKTS(0), CMAC_OFFSET_STAT_TX_TOTAL_GOOD_PKTS(1)), - _STAT("stat_tx_total_bytes", + _STAT_ONIC("stat_tx_total_bytes", CMAC_OFFSET_STAT_TX_TOTAL_BYTES(0), CMAC_OFFSET_STAT_TX_TOTAL_BYTES(1)), - _STAT("stat_tx_total_good_bytes", + _STAT_ONIC("stat_tx_total_good_bytes", CMAC_OFFSET_STAT_TX_TOTAL_GOOD_BYTES(0), CMAC_OFFSET_STAT_TX_TOTAL_GOOD_BYTES(1)), - _STAT("stat_tx_pkt_64_bytes", + _STAT_ONIC("stat_tx_pkt_64_bytes", CMAC_OFFSET_STAT_TX_PKT_64_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_64_BYTES(1)), - _STAT("stat_tx_pkt_65_127_bytes", + _STAT_ONIC("stat_tx_pkt_65_127_bytes", CMAC_OFFSET_STAT_TX_PKT_65_127_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_65_127_BYTES(1)), - _STAT("stat_tx_pkt_128_255_bytes", + _STAT_ONIC("stat_tx_pkt_128_255_bytes", CMAC_OFFSET_STAT_TX_PKT_128_255_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_128_255_BYTES(1)), - _STAT("stat_tx_pkt_256_511_bytes", + _STAT_ONIC("stat_tx_pkt_256_511_bytes", CMAC_OFFSET_STAT_TX_PKT_256_511_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_256_511_BYTES(1)), - _STAT("stat_tx_pkt_512_1023_bytes", + _STAT_ONIC("stat_tx_pkt_512_1023_bytes", CMAC_OFFSET_STAT_TX_PKT_512_1023_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_512_1023_BYTES(1)), - _STAT("stat_tx_pkt_1024_1518_bytes", + _STAT_ONIC("stat_tx_pkt_1024_1518_bytes", CMAC_OFFSET_STAT_TX_PKT_1024_1518_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_1024_1518_BYTES(1)), - _STAT("stat_tx_pkt_1519_1522_bytes", + _STAT_ONIC("stat_tx_pkt_1519_1522_bytes", CMAC_OFFSET_STAT_TX_PKT_1519_1522_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_1519_1522_BYTES(1)), - _STAT("stat_tx_pkt_1523_1548_bytes", + _STAT_ONIC("stat_tx_pkt_1523_1548_bytes", CMAC_OFFSET_STAT_TX_PKT_1523_1548_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_1523_1548_BYTES(1)), - _STAT("stat_tx_pkt_1549_2047_bytes", + _STAT_ONIC("stat_tx_pkt_1549_2047_bytes", CMAC_OFFSET_STAT_TX_PKT_1549_2047_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_1549_2047_BYTES(1)), - _STAT("stat_tx_pkt_2048_4095_bytes", + _STAT_ONIC("stat_tx_pkt_2048_4095_bytes", CMAC_OFFSET_STAT_TX_PKT_2048_4095_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_2048_4095_BYTES(1)), - _STAT("stat_tx_pkt_4096_8191_bytes", + _STAT_ONIC("stat_tx_pkt_4096_8191_bytes", CMAC_OFFSET_STAT_TX_PKT_4096_8191_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_4096_8191_BYTES(1)), - _STAT("stat_tx_pkt_8192_9215_bytes", + _STAT_ONIC("stat_tx_pkt_8192_9215_bytes", CMAC_OFFSET_STAT_TX_PKT_8192_9215_BYTES(0), CMAC_OFFSET_STAT_TX_PKT_8192_9215_BYTES(1)), - _STAT("stat_tx_pkt_large", + _STAT_ONIC("stat_tx_pkt_large", CMAC_OFFSET_STAT_TX_PKT_LARGE(0), CMAC_OFFSET_STAT_TX_PKT_LARGE(1)), - _STAT("stat_tx_pkt_small", + _STAT_ONIC("stat_tx_pkt_small", CMAC_OFFSET_STAT_TX_PKT_SMALL(0), CMAC_OFFSET_STAT_TX_PKT_SMALL(1)), - _STAT("stat_tx_bad_fcs", + _STAT_ONIC("stat_tx_bad_fcs", CMAC_OFFSET_STAT_TX_BAD_FCS(0), CMAC_OFFSET_STAT_TX_BAD_FCS(1)), - _STAT("stat_tx_unicast", + _STAT_ONIC("stat_tx_unicast", CMAC_OFFSET_STAT_TX_UNICAST(0), CMAC_OFFSET_STAT_TX_UNICAST(1)), - _STAT("stat_tx_multicast", + _STAT_ONIC("stat_tx_multicast", CMAC_OFFSET_STAT_TX_MULTICAST(0), CMAC_OFFSET_STAT_TX_MULTICAST(1)), - _STAT("stat_tx_broadcast", + _STAT_ONIC("stat_tx_broadcast", CMAC_OFFSET_STAT_TX_BROADCAST(0), CMAC_OFFSET_STAT_TX_BROADCAST(1)), - _STAT("stat_tx_vlan", + _STAT_ONIC("stat_tx_vlan", CMAC_OFFSET_STAT_TX_VLAN(0), CMAC_OFFSET_STAT_TX_VLAN(1)), - _STAT("stat_tx_pause", + _STAT_ONIC("stat_tx_pause", CMAC_OFFSET_STAT_TX_PAUSE(0), CMAC_OFFSET_STAT_TX_PAUSE(1)), - _STAT("stat_tx_user_pause", + _STAT_ONIC("stat_tx_user_pause", CMAC_OFFSET_STAT_TX_USER_PAUSE(0), CMAC_OFFSET_STAT_TX_USER_PAUSE(1)), - _STAT("stat_rx_total_pkts", + _STAT_ONIC("stat_rx_total_pkts", CMAC_OFFSET_STAT_RX_TOTAL_PKTS(0), CMAC_OFFSET_STAT_RX_TOTAL_PKTS(1)), - _STAT("stat_rx_total_good_pkts", + _STAT_ONIC("stat_rx_total_good_pkts", CMAC_OFFSET_STAT_RX_TOTAL_GOOD_PKTS(0), CMAC_OFFSET_STAT_RX_TOTAL_GOOD_PKTS(1)), - _STAT("stat_rx_total_bytes", + _STAT_ONIC("stat_rx_total_bytes", CMAC_OFFSET_STAT_RX_TOTAL_BYTES(0), CMAC_OFFSET_STAT_RX_TOTAL_BYTES(1)), - _STAT("stat_rx_total_good_bytes", + _STAT_ONIC("stat_rx_total_good_bytes", CMAC_OFFSET_STAT_RX_TOTAL_GOOD_BYTES(0), CMAC_OFFSET_STAT_RX_TOTAL_GOOD_BYTES(1)), - _STAT("stat_rx_pkt_64_bytes", + _STAT_ONIC("stat_rx_pkt_64_bytes", CMAC_OFFSET_STAT_RX_PKT_64_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_64_BYTES(1)), - _STAT("stat_rx_pkt_65_127_bytes", + _STAT_ONIC("stat_rx_pkt_65_127_bytes", CMAC_OFFSET_STAT_RX_PKT_65_127_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_65_127_BYTES(1)), - _STAT("stat_rx_pkt_128_255_bytes", + _STAT_ONIC("stat_rx_pkt_128_255_bytes", CMAC_OFFSET_STAT_RX_PKT_128_255_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_128_255_BYTES(1)), - _STAT("stat_rx_pkt_256_511_bytes", + _STAT_ONIC("stat_rx_pkt_256_511_bytes", CMAC_OFFSET_STAT_RX_PKT_256_511_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_256_511_BYTES(1)), - _STAT("stat_rx_pkt_512_1023_bytes", + _STAT_ONIC("stat_rx_pkt_512_1023_bytes", CMAC_OFFSET_STAT_RX_PKT_512_1023_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_512_1023_BYTES(1)), - _STAT("stat_rx_pkt_1024_1518_bytes", + _STAT_ONIC("stat_rx_pkt_1024_1518_bytes", CMAC_OFFSET_STAT_RX_PKT_1024_1518_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_1024_1518_BYTES(1)), - _STAT("stat_rx_pkt_1519_1522_bytes", + _STAT_ONIC("stat_rx_pkt_1519_1522_bytes", CMAC_OFFSET_STAT_RX_PKT_1519_1522_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_1519_1522_BYTES(1)), - _STAT("stat_rx_pkt_1523_1548_bytes", + _STAT_ONIC("stat_rx_pkt_1523_1548_bytes", CMAC_OFFSET_STAT_RX_PKT_1523_1548_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_1523_1548_BYTES(1)), - _STAT("stat_rx_pkt_1549_2047_bytes", + _STAT_ONIC("stat_rx_pkt_1549_2047_bytes", CMAC_OFFSET_STAT_RX_PKT_1549_2047_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_1549_2047_BYTES(1)), - _STAT("stat_rx_pkt_2048_4095_bytes", + _STAT_ONIC("stat_rx_pkt_2048_4095_bytes", CMAC_OFFSET_STAT_RX_PKT_2048_4095_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_2048_4095_BYTES(1)), - _STAT("stat_rx_pkt_4096_8191_bytes", + _STAT_ONIC("stat_rx_pkt_4096_8191_bytes", CMAC_OFFSET_STAT_RX_PKT_4096_8191_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_4096_8191_BYTES(1)), - _STAT("stat_rx_pkt_8192_9215_bytes", + _STAT_ONIC("stat_rx_pkt_8192_9215_bytes", CMAC_OFFSET_STAT_RX_PKT_8192_9215_BYTES(0), CMAC_OFFSET_STAT_RX_PKT_8192_9215_BYTES(1)), - _STAT("stat_rx_pkt_large", + _STAT_ONIC("stat_rx_pkt_large", CMAC_OFFSET_STAT_RX_PKT_LARGE(0), CMAC_OFFSET_STAT_RX_PKT_LARGE(1)), - _STAT("stat_rx_pkt_small", + _STAT_ONIC("stat_rx_pkt_small", CMAC_OFFSET_STAT_RX_PKT_SMALL(0), CMAC_OFFSET_STAT_RX_PKT_SMALL(1)), - _STAT("stat_rx_undersize", + _STAT_ONIC("stat_rx_undersize", CMAC_OFFSET_STAT_RX_UNDERSIZE(0), CMAC_OFFSET_STAT_RX_UNDERSIZE(1)), - _STAT("stat_rx_fragment", + _STAT_ONIC("stat_rx_fragment", CMAC_OFFSET_STAT_RX_FRAGMENT(0), CMAC_OFFSET_STAT_RX_FRAGMENT(1)), - _STAT("stat_rx_oversize", + _STAT_ONIC("stat_rx_oversize", CMAC_OFFSET_STAT_RX_OVERSIZE(0), CMAC_OFFSET_STAT_RX_OVERSIZE(1)), - _STAT("stat_rx_toolong", + _STAT_ONIC("stat_rx_toolong", CMAC_OFFSET_STAT_RX_TOOLONG(0), CMAC_OFFSET_STAT_RX_TOOLONG(1)), - _STAT("stat_rx_jabber", + _STAT_ONIC("stat_rx_jabber", CMAC_OFFSET_STAT_RX_JABBER(0), CMAC_OFFSET_STAT_RX_JABBER(1)), - _STAT("stat_rx_bad_fcs", + _STAT_ONIC("stat_rx_bad_fcs", CMAC_OFFSET_STAT_RX_BAD_FCS(0), CMAC_OFFSET_STAT_RX_BAD_FCS(1)), - _STAT("stat_rx_pkt_bad_fcs", + _STAT_ONIC("stat_rx_pkt_bad_fcs", CMAC_OFFSET_STAT_RX_PKT_BAD_FCS(0), CMAC_OFFSET_STAT_RX_PKT_BAD_FCS(1)), - _STAT("stat_rx_stomped_fcs", + _STAT_ONIC("stat_rx_stomped_fcs", CMAC_OFFSET_STAT_RX_STOMPED_FCS(0), CMAC_OFFSET_STAT_RX_STOMPED_FCS(1)), - _STAT("stat_rx_unicast", + _STAT_ONIC("stat_rx_unicast", CMAC_OFFSET_STAT_RX_UNICAST(0), CMAC_OFFSET_STAT_RX_UNICAST(1)), - _STAT("stat_rx_multicast", + _STAT_ONIC("stat_rx_multicast", CMAC_OFFSET_STAT_RX_MULTICAST(0), CMAC_OFFSET_STAT_RX_MULTICAST(1)), - _STAT("stat_rx_broadcast", + _STAT_ONIC("stat_rx_broadcast", CMAC_OFFSET_STAT_RX_BROADCAST(0), CMAC_OFFSET_STAT_RX_BROADCAST(1)), - _STAT("stat_rx_vlan", + _STAT_ONIC("stat_rx_vlan", CMAC_OFFSET_STAT_RX_VLAN(0), CMAC_OFFSET_STAT_RX_VLAN(1)), - _STAT("stat_rx_pause", + _STAT_ONIC("stat_rx_pause", CMAC_OFFSET_STAT_RX_PAUSE(0), CMAC_OFFSET_STAT_RX_PAUSE(1)), - _STAT("stat_rx_user_pause", + _STAT_ONIC("stat_rx_user_pause", CMAC_OFFSET_STAT_RX_USER_PAUSE(0), CMAC_OFFSET_STAT_RX_USER_PAUSE(1)), - _STAT("stat_rx_inrangeerr", + _STAT_ONIC("stat_rx_inrangeerr", CMAC_OFFSET_STAT_RX_INRANGEERR(0), CMAC_OFFSET_STAT_RX_INRANGEERR(1)), - _STAT("stat_rx_truncated", + _STAT_ONIC("stat_rx_truncated", CMAC_OFFSET_STAT_RX_TRUNCATED(0), CMAC_OFFSET_STAT_RX_TRUNCATED(1)), + _STAT_ONIC("stat_adapt_tx_sent", + CMAC_ADPT_OFFSET_TX_PKT_RECV(0), + CMAC_ADPT_OFFSET_TX_PKT_RECV(1)), + _STAT_ONIC("stat_adapt_tx_drop", + CMAC_ADPT_OFFSET_TX_PKT_DROP(0), + CMAC_ADPT_OFFSET_TX_PKT_DROP(1)), + _STAT_ONIC("stat_adapt_rx_recv", + CMAC_ADPT_OFFSET_RX_PKT_RECV(0), + CMAC_ADPT_OFFSET_RX_PKT_RECV(1)), + _STAT_ONIC("stat_adapt_rx_drop", + CMAC_ADPT_OFFSET_RX_PKT_DROP(0), + CMAC_ADPT_OFFSET_RX_PKT_DROP(1)), + _STAT_ONIC("stat_adapt_rx_error", + CMAC_ADPT_OFFSET_RX_PKT_ERROR(0), + CMAC_ADPT_OFFSET_RX_PKT_ERROR(1)), + + + + + + _STAT_NETDEV("rx_xdp_redirect", ETHTOOL_XDP_REDIRECT), + _STAT_NETDEV("rx_xdp_pass", ETHTOOL_XDP_PASS ), + _STAT_NETDEV("rx_xdp_drop", ETHTOOL_XDP_DROP ), + _STAT_NETDEV("rx_xdp_tx",ETHTOOL_XDP_TX ), + _STAT_NETDEV("rx_xdp_tx_errors", ETHTOOL_XDP_TX_ERR ), + _STAT_NETDEV("tx_xdp_xmit", ETHTOOL_XDP_XMIT ), + _STAT_NETDEV("tx_xdp_xmit_errors", ETHTOOL_XDP_XMIT_ERR ), }; #define ONIC_QUEUE_STATS_LEN 0 @@ -231,10 +281,10 @@ static void onic_get_drvinfo(struct net_device *netdev, { struct onic_private *priv = netdev_priv(netdev); - strlcpy(drvinfo->driver, onic_drv_name, sizeof(drvinfo->driver)); - strlcpy(drvinfo->version, onic_drv_ver, + strscpy(drvinfo->driver, onic_drv_name, sizeof(drvinfo->driver)); + strscpy(drvinfo->version, onic_drv_ver, sizeof(drvinfo->version)); - strlcpy(drvinfo->bus_info, pci_name(priv->pdev), + strscpy(drvinfo->bus_info, pci_name(priv->pdev), sizeof(drvinfo->bus_info)); } @@ -268,10 +318,34 @@ static void onic_get_ethtool_stats(struct net_device *netdev, struct onic_private *priv = netdev_priv(netdev); struct onic_hardware *hw = &priv->hw; struct pci_dev *pdev = priv->pdev; - int i; + int i,j; u16 func_id; u32 off; + struct { + u64 xdp_redirect; + u64 xdp_pass; + u64 xdp_drop; + u64 xdp_tx; + u64 xdp_tx_err; + u64 xdp_xmit; + u64 xdp_xmit_err; + } global_xdp_stats = {0}; + + + for (j =0; j < priv->num_rx_queues; j++) { + global_xdp_stats.xdp_redirect += priv->rx_queue[j]->xdp_rx_stats.xdp_redirect; + global_xdp_stats.xdp_pass += priv->rx_queue[j]->xdp_rx_stats.xdp_pass; + global_xdp_stats.xdp_drop += priv->rx_queue[j]->xdp_rx_stats.xdp_drop; + global_xdp_stats.xdp_tx += priv->rx_queue[j]->xdp_rx_stats.xdp_tx; + global_xdp_stats.xdp_tx_err += priv->rx_queue[j]->xdp_rx_stats.xdp_tx_err; + + } + for (j =0; j < priv->num_tx_queues; j++) { + global_xdp_stats.xdp_xmit += priv->tx_queue[j]->xdp_tx_stats.xdp_xmit; + global_xdp_stats.xdp_xmit_err += priv->tx_queue[j]->xdp_tx_stats.xdp_xmit_err; + } + func_id = PCI_FUNC(pdev->devfn); // Note : @@ -283,10 +357,38 @@ static void onic_get_ethtool_stats(struct net_device *netdev, else onic_write_reg(hw, CMAC_OFFSET_TICK(1), 1); for (i = 0; i < ONIC_GLOBAL_STATS_LEN; i++) { + if (onic_gstrings_stats[i].type == ONIC_STATS) { if (func_id == 0) off = onic_gstrings_stats[i].stat0_offset; - else off = onic_gstrings_stats[i].stat1_offset; - data[i] = onic_read_reg(hw,off); + else + off = onic_gstrings_stats[i].stat1_offset; + data[i] = onic_read_reg(hw, off); + } else { + switch (onic_gstrings_stats[i].stat0_offset) { + + case ETHTOOL_XDP_REDIRECT: + data[i] = global_xdp_stats.xdp_redirect; + break; + case ETHTOOL_XDP_PASS: + data[i] = global_xdp_stats.xdp_pass; + break; + case ETHTOOL_XDP_DROP: + data[i] = global_xdp_stats.xdp_drop; + break; + case ETHTOOL_XDP_TX: + data[i] = global_xdp_stats.xdp_tx; + break; + case ETHTOOL_XDP_TX_ERR: + data[i] = global_xdp_stats.xdp_tx_err; + break; + case ETHTOOL_XDP_XMIT: + data[i] = global_xdp_stats.xdp_xmit; + break; + case ETHTOOL_XDP_XMIT_ERR: + data[i] = global_xdp_stats.xdp_xmit_err; + break; + } + } } return; @@ -310,12 +412,126 @@ static int onic_get_sset_count(struct net_device *netdev, int sset) return ONIC_STATS_LEN; } +static u32 onic_get_rxfh_indir_size(struct net_device *dev) +{ + return INDIRECTION_TABLE_SIZE; +} + +static u32 onic_get_rxfh_key_size(struct net_device *netdev) +{ + return ONIC_EN_RSS_KEY_SIZE; +} + + +static int onic_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, + u8 *hfunc) +{ + struct onic_private *priv = netdev_priv(dev); + u32 n = onic_get_rxfh_indir_size(dev); + u16 func_id = PCI_FUNC(priv->pdev->devfn); + u32 i; + + if (ring_index) { + for (i = 0; i < n; i++) { + ring_index[i] = 0xFFFF & onic_read_reg(&priv->hw, QDMA_FUNC_OFFSET_INDIR_TABLE(func_id,i)); + + } + } + + if (key) { + for (i = 0; i < ONIC_EN_RSS_KEY_SIZE/4; i++) { + u32 val = onic_read_reg(&priv->hw, QDMA_FUNC_OFFSET_HASH_KEY(func_id,i)); + memcpy(&key[i*4],&val, 4); + } + } + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + return 0; +} + + + +static int onic_set_rxfh(struct net_device *dev, const u32 *ring_index, + const u8 *key, const u8 hfunc) +{ + + + struct onic_private *priv = netdev_priv(dev); + int n = onic_get_rxfh_indir_size(dev); + u16 func_id = PCI_FUNC(priv->pdev->devfn); + int i=0; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (ring_index) { + for (i = 0; i < n; i++) { + // Check that the ring index is within the number of rx queues + if (ring_index[i] >= priv->num_rx_queues) { + printk("error in onic_set_rxfh: ring_index >= priv->num_rx_queues\n"); + return -EINVAL; + } + onic_write_reg(&priv->hw, QDMA_FUNC_OFFSET_INDIR_TABLE(func_id,i), ring_index[i]); + } + } + + if (key) { + for (i = 0; i < ONIC_EN_RSS_KEY_SIZE/4; i++) { + u32 val; + memcpy(&val, &key[i*4], 4); + onic_write_reg(&priv->hw, QDMA_FUNC_OFFSET_HASH_KEY(func_id,i), val); + } + } + return 0; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 8, 0) +static int onic_set_rxfh_new(struct net_device *dev, + struct ethtool_rxfh_param *rxfh_param, + struct netlink_ext_ack *extack) { + return onic_set_rxfh(dev, rxfh_param->indir, rxfh_param->key, + rxfh_param->hfunc); +} + +static int onic_get_rxfh_new(struct net_device *dev, + struct ethtool_rxfh_param *rxfh) { + return onic_get_rxfh(dev, rxfh->indir, rxfh->key, &rxfh->hfunc); +} +#endif + +static int onic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { + struct onic_private *priv = netdev_priv(dev); + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = priv->num_rx_queues; + return 0; + case ETHTOOL_GRXFH: + return -EOPNOTSUPP; + default: + return -EOPNOTSUPP; + } +} + + static const struct ethtool_ops onic_ethtool_ops = { - .get_drvinfo = onic_get_drvinfo, - .get_link = onic_get_link, - .get_ethtool_stats = onic_get_ethtool_stats, - .get_strings = onic_get_strings, - .get_sset_count = onic_get_sset_count, + .get_drvinfo = onic_get_drvinfo, + .get_link = onic_get_link, + .get_ethtool_stats = onic_get_ethtool_stats, + .get_strings = onic_get_strings, + .get_sset_count = onic_get_sset_count, + .get_rxfh_indir_size = onic_get_rxfh_indir_size, + .get_rxfh_key_size = onic_get_rxfh_key_size, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 8, 0) + .set_rxfh = onic_set_rxfh_new, + .get_rxfh = onic_get_rxfh_new, +#else + .get_rxfh = onic_get_rxfh, + .set_rxfh = onic_set_rxfh, +#endif + .get_rxnfc = onic_get_rxnfc, }; void onic_set_ethtool_ops(struct net_device *netdev) diff --git a/onic_main.c b/onic_main.c index b0eb3b1..5c4a5be 100644 --- a/onic_main.c +++ b/onic_main.c @@ -15,6 +15,7 @@ * the file called "COPYING". */ #include +#include #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include "onic.h" #include "onic_hardware.h" @@ -131,6 +133,15 @@ static const struct net_device_ops onic_netdev_ops = { .ndo_do_ioctl = onic_do_ioctl, .ndo_change_mtu = onic_change_mtu, .ndo_get_stats64 = onic_get_stats64, + .ndo_bpf = onic_xdp, +// For why we do this, see onic_netdev.c:onix_xdp_run +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) + .ndo_xdp_xmit = onic_xdp_xmit, +#elif defined(RHEL_RELEASE_CODE) +#if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 1)) + .ndo_xdp_xmit = onic_xdp_xmit, +#endif +#endif }; extern void onic_set_ethtool_ops(struct net_device *netdev); @@ -194,12 +205,14 @@ static int onic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) SET_NETDEV_DEV(netdev, &pdev->dev); netdev->netdev_ops = &onic_netdev_ops; onic_set_ethtool_ops(netdev); - +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,3,0) + xdp_set_features_flag(netdev, NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT); +#endif snprintf(dev_name, IFNAMSIZ, "onic%ds%df%d", pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); - strlcpy(netdev->name, dev_name, sizeof(netdev->name)); + strscpy(netdev->name, dev_name, sizeof(netdev->name)); memset(&saddr, 0, sizeof(struct sockaddr)); memcpy(saddr.sa_data, onic_default_dev_addr, 6); @@ -220,6 +233,12 @@ static int onic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&priv->tx_lock); spin_lock_init(&priv->rx_lock); + priv->netdev_stats = alloc_percpu(struct rtnl_link_stats64); + if (!priv->netdev_stats) { + dev_err(&pdev->dev, "error in allocating netdev_stats"); + goto free_netdev; + } + rv = onic_init_capacity(priv); if (rv < 0) { dev_err(&pdev->dev, "onic_init_capacity, err = %d", rv); diff --git a/onic_netdev.c b/onic_netdev.c index e477cf5..69e89f9 100644 --- a/onic_netdev.c +++ b/onic_netdev.c @@ -14,12 +14,25 @@ * The full GNU General Public License is included in this distribution in * the file called "COPYING". */ +#include +#include #include #include #include #include +#include +#include +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 6, 0) +#include +#include +#else +#include +#endif #include "onic_netdev.h" +#include "onic_hardware.h" #include "qdma_access/qdma_register.h" #include "onic.h" @@ -56,6 +69,8 @@ static void onic_tx_clean(struct onic_tx_queue *q) struct qdma_wb_stat wb; int work, i; + // this is a locking mechanism to guarantee that only one thread is cleaning the ring + // bitmask functions are atomic! if (test_and_set_bit(0, q->state)) return; @@ -72,11 +87,27 @@ static void onic_tx_clean(struct onic_tx_queue *q) for (i = 0; i < work; ++i) { struct onic_tx_buffer *buf = &q->buffer[ring->next_to_clean]; - struct sk_buff *skb = buf->skb; - dma_unmap_single(&priv->pdev->dev, buf->dma_addr, buf->len, - DMA_TO_DEVICE); - dev_kfree_skb_any(skb); + + if (buf->type == ONIC_TX_SKB) { + // The packet originated from the kernel network stack + dma_unmap_single(&priv->pdev->dev, buf->dma_addr, buf->len, DMA_TO_DEVICE); + dev_kfree_skb_any(buf->skb); + buf->skb = NULL; + } else if (buf->type == ONIC_TX_XDPF) { + // The packet originated from a XDP_TX -> It comes from a page pool, no need to dma unmap + xdp_return_frame(buf->xdpf); + buf->xdpf = NULL; + } else if (buf->type == ONIC_TX_XDPF_XMIT) { + // The packet originated from the XDP program of another driver. + // It was mapped to a DMA address and needs to be unmapped + dma_unmap_single(&priv->pdev->dev, buf->dma_addr, buf->len, DMA_TO_DEVICE); + xdp_return_frame(buf->xdpf); + buf->xdpf = NULL; + } + else { + netdev_err(priv->netdev, "unknown buffer type %d\n", buf->type); + } onic_ring_increment_tail(ring); } @@ -105,6 +136,206 @@ static void onic_rx_refill(struct onic_rx_queue *q) ring->next_to_use %= onic_ring_get_real_count(ring); onic_set_rx_head(priv->hw.qdma, q->qid, ring->next_to_use); + +} + +static void onic_rx_page_refill(struct onic_rx_queue *q) +{ + struct onic_ring *desc_ring = &q->desc_ring; + struct qdma_c2h_st_desc desc; + struct page *pg; + u8 *desc_ptr = desc_ring->desc + QDMA_C2H_ST_DESC_SIZE * desc_ring->next_to_clean; + + pg = page_pool_dev_alloc_pages(q->page_pool); + + q->buffer[desc_ring->next_to_clean].pg = pg; + q->buffer[desc_ring->next_to_clean].offset = XDP_PACKET_HEADROOM; + + + desc.dst_addr = page_pool_get_dma_addr(pg) + XDP_PACKET_HEADROOM; + qdma_pack_c2h_st_desc(desc_ptr, &desc); +} + +static struct onic_tx_queue *onic_xdp_tx_queue_mapping(struct onic_private *priv) +{ + unsigned int r_idx = smp_processor_id(); + + if (r_idx >= priv->num_tx_queues) + r_idx = r_idx % priv->num_tx_queues; + + return priv->tx_queue[r_idx]; +} + +static int onic_xmit_xdp_ring(struct onic_private *priv,struct onic_tx_queue *tx_queue, struct xdp_frame *xdpf, bool dma_map) + { + u8 *desc_ptr; + dma_addr_t dma_addr; + struct onic_ring *ring; + struct qdma_h2c_st_desc desc; + bool debug = 1; + struct rtnl_link_stats64 *pcpu_stats_pointer; + enum onic_tx_buf_type type; + + ring = &tx_queue->ring; + + onic_tx_clean(tx_queue); + + if (onic_ring_full(ring)) { + if (debug) + netdev_info(priv->netdev, "ring is full"); + return NETDEV_TX_BUSY; + } + + if (dma_map) { + /* ndo_xdp_dmit */ + dma_addr = dma_map_single(&priv->pdev->dev, xdpf->data,xdpf->len, DMA_TO_DEVICE); + type = ONIC_TX_XDPF_XMIT; + if (unlikely(dma_mapping_error(&priv->pdev->dev, dma_addr))) + return ONIC_XDP_CONSUMED; + } else { + /* ONIC_XDP_TX */ + struct page *page = virt_to_page(xdpf->data); + //TODO i don't get why adding the size of the xdp_frame struct to the dma_addr. mvneta does this + dma_addr = page_pool_get_dma_addr(page) + sizeof(*xdpf) + xdpf->headroom; + dma_sync_single_for_device(&priv->pdev->dev, dma_addr, + xdpf->len, DMA_BIDIRECTIONAL); + type = ONIC_TX_XDPF; + + } + + + + desc_ptr = ring->desc + QDMA_H2C_ST_DESC_SIZE * ring->next_to_use; + desc.len = xdpf->len; + desc.src_addr = dma_addr; + desc.metadata = xdpf->len; + qdma_pack_h2c_st_desc(desc_ptr, &desc); + + tx_queue->buffer[ring->next_to_use].xdpf = xdpf; + tx_queue->buffer[ring->next_to_use].type = type; + tx_queue->buffer[ring->next_to_use].dma_addr = dma_addr; + tx_queue->buffer[ring->next_to_use].len = xdpf->len; + + + pcpu_stats_pointer = this_cpu_ptr(priv->netdev_stats); + pcpu_stats_pointer->tx_packets++; + pcpu_stats_pointer->tx_bytes += xdpf->len; + onic_ring_increment_head(ring); + + return ONIC_XDP_TX; +} + +static int onic_xdp_xmit_back(struct onic_rx_queue *q, struct xdp_buff *xdp_buff) { + struct onic_private *priv = netdev_priv(q->netdev); + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp_buff); + struct onic_tx_queue *tx_queue; + struct netdev_queue *nq; + u32 ret = 0, cpu = smp_processor_id(); + + if (unlikely(!xdpf)){ + q->xdp_rx_stats.xdp_tx_err++; + return ONIC_XDP_CONSUMED; + } + + tx_queue = q->xdp_prog ? priv->tx_queue[q->qid] : NULL; + if (unlikely(!tx_queue)){ + q->xdp_rx_stats.xdp_tx_err++; + return -ENXIO; + } + + nq = netdev_get_tx_queue(tx_queue->netdev, tx_queue->qid); + + __netif_tx_lock(nq, cpu); + ret = onic_xmit_xdp_ring(priv, tx_queue, xdpf,false); + q->xdp_rx_stats.xdp_tx++; + + wmb(); + onic_set_tx_head(priv->hw.qdma, tx_queue->qid, tx_queue->ring.next_to_use); + + __netif_tx_unlock(nq); + + return ret; +} + +static void *onic_run_xdp(struct onic_rx_queue *rx_queue, struct xdp_buff *xdp_buff, struct onic_private *priv) { + int err, result = ONIC_XDP_PASS; + struct bpf_prog *xdp_prog; + u32 act; + struct page *page = virt_to_page(xdp_buff->data_hard_start); + + xdp_prog = rx_queue->xdp_prog; + if (!xdp_prog){ + goto out; + } + + act = bpf_prog_run_xdp(xdp_prog, xdp_buff); + + switch (act){ + case XDP_PASS: + rx_queue->xdp_rx_stats.xdp_pass++; + break; +// Since before 5.3.0 the xmit_more hint was tied to skbs, and in XDP we run +// before skb allocation, we cannot correctly implement onic_xdp_xmit_frame and +// thus XDP_TX and XDP_REDIRECT +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) + case XDP_TX: + result = onic_xdp_xmit_back(rx_queue, xdp_buff); + if (result == ONIC_XDP_CONSUMED) { + goto out_failure; + } + break; + case XDP_REDIRECT: + err = xdp_do_redirect(rx_queue->netdev, xdp_buff, xdp_prog); + if (err) { + result = ONIC_XDP_CONSUMED; + goto out_failure; + } + result = ONIC_XDP_REDIR; + rx_queue->xdp_rx_stats.xdp_redirect++; + break; +#elif defined(RHEL_RELEASE_CODE) +#if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 1)) + case XDP_TX: + result = onic_xdp_xmit_back(rx_queue, xdp_buff); + if (result == ONIC_XDP_CONSUMED) + goto out_failure; + break; + case XDP_REDIRECT: + err = xdp_do_redirect(rx_queue->netdev, xdp_buff, xdp_prog); + if (err) { + ret = ONIC_XDP_CONSUMED; + goto out_failure; + } + result = ONIC_XDP_REDIR; + rx_queue->xdp_rx_stats.xdp_redirect++; + break; +#endif +#else + case XDP_TX: + fallthrough; + case XDP_REDIRECT + fallthrough; +#endif + default: +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,17,0) + bpf_warn_invalid_xdp_action(priv->netdev, xdp_prog, act); +#else + bpf_warn_invalid_xdp_action(act); +#endif + fallthrough; + case XDP_ABORTED: +out_failure: + trace_xdp_exception(rx_queue->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + result = ONIC_XDP_CONSUMED; + rx_queue->xdp_rx_stats.xdp_drop++; + page_pool_recycle_direct(rx_queue->page_pool, page); + break; + } + +out: + return ERR_PTR(-result); } static int onic_rx_poll(struct napi_struct *napi, int budget) @@ -125,6 +356,12 @@ static int onic_rx_poll(struct napi_struct *napi, int budget) bool napi_cmpl_rval = 0; bool flipped = 0; bool debug = 0; + void *res; + + struct xdp_buff xdp; + unsigned int xdp_xmit = 0; + struct rtnl_link_stats64 *pcpu_stats_pointer; + pcpu_stats_pointer = this_cpu_ptr(priv->netdev_stats); for (i = 0; i < priv->num_tx_queues; i++) onic_tx_clean(priv->tx_queue[i]); @@ -170,8 +407,7 @@ static int onic_rx_poll(struct napi_struct *napi, int budget) if (cmpl.err == 1) { if (debug) - netdev_info(q->netdev, - "completion error detected in cmpl entry!"); + netdev_info(q->netdev, "completion error detected in cmpl entry!"); // todo: need to handle the error ... onic_qdma_clear_error_interrupt(priv->hw.qdma); } @@ -181,26 +417,61 @@ static int onic_rx_poll(struct napi_struct *napi, int budget) struct onic_rx_buffer *buf = &q->buffer[desc_ring->next_to_clean]; struct sk_buff *skb; + int len = cmpl.pkt_len; - u8 *data; - skb = napi_alloc_skb(napi, len); - if (!skb) { - rv = -ENOMEM; - break; - } - /* maximum packet size is 1514, less than the page size */ - data = (u8 *)(page_address(buf->pg) + buf->offset); - skb_put_data(skb, data, len); - skb->protocol = eth_type_trans(skb, q->netdev); - skb->ip_summed = CHECKSUM_NONE; - skb_record_rx_queue(skb, qid); - rv = napi_gro_receive(napi, skb); - if (rv < 0) { - netdev_err(q->netdev, "napi_gro_receive, err = %d", rv); - break; + + xdp_init_buff(&xdp, PAGE_SIZE, &q->xdp_rxq); + + dma_sync_single_for_cpu(&priv->pdev->dev, + page_pool_get_dma_addr(buf->pg) + + buf->offset, + len, DMA_FROM_DEVICE); + + xdp_prepare_buff(&xdp, page_address(buf->pg), buf->offset, len, false); + + res = onic_run_xdp(q, &xdp,priv); + if (IS_ERR(res)) { + unsigned int xdp_res = -PTR_ERR(res); + + if (xdp_res & (ONIC_XDP_TX | ONIC_XDP_REDIR)) { + xdp_xmit |= xdp_res; + } + + // Allocate skb only if we are continuing to process the packet + if (xdp_res & ONIC_XDP_PASS) { + + // allocate a new skb structure around the data + skb = napi_build_skb(xdp.data_hard_start, PAGE_SIZE); + + if (!skb) { + rv = -ENOMEM; + break; + } + + // mark the skb for page_pool recycling + skb_mark_for_recycle(skb); + // reserve space in the skb for the data for the xdp headroom + skb_reserve(skb, xdp.data - xdp.data_hard_start); + // set the data pointer + skb_put(skb, xdp.data_end - xdp.data); + + skb->protocol = eth_type_trans(skb, q->netdev); + skb->ip_summed = CHECKSUM_NONE; + skb_record_rx_queue(skb, qid); + rv = napi_gro_receive(napi, skb); + if (rv < 0) { + netdev_err(q->netdev, "napi_gro_receive, err = %d", rv); + break; + } + } } - priv->netdev_stats.rx_packets++; - priv->netdev_stats.rx_bytes += len; + + + // here the page where packet data was written has either been recycled or marked for recycling + onic_rx_page_refill(q); + + pcpu_stats_pointer->rx_packets++; + pcpu_stats_pointer->rx_bytes += len; onic_ring_increment_tail(desc_ring); @@ -249,12 +520,14 @@ static int onic_rx_poll(struct napi_struct *napi, int budget) (QDMA_C2H_CMPL_SIZE * cmpl_ring->next_to_clean); if ((++work) >= budget) { + if (xdp_xmit & ONIC_XDP_REDIR) + xdp_do_flush(); if (debug) netdev_info(q->netdev, "watchdog work %u, budget %u", work, budget); napi_complete(napi); - napi_reschedule(napi); + napi_schedule(napi); goto out_of_budget; } @@ -268,6 +541,9 @@ static int onic_rx_poll(struct napi_struct *napi, int budget) cmpl.color); } + if (xdp_xmit & ONIC_XDP_REDIR) + xdp_do_flush(); + if (cmpl_ring->next_to_clean == cmpl_stat.pidx) { if (debug) netdev_info( @@ -306,8 +582,8 @@ static int onic_rx_poll(struct napi_struct *napi, int budget) netdev_info( q->netdev, "rx_poll returning work %u, rx_packets %lld, rx_bytes %lld", - work, priv->netdev_stats.rx_packets, - priv->netdev_stats.rx_bytes); + work, pcpu_stats_pointer->rx_packets, + pcpu_stats_pointer->rx_bytes); return work; } @@ -317,10 +593,13 @@ static void onic_clear_tx_queue(struct onic_private *priv, u16 qid) struct onic_ring *ring; u32 size; int real_count; + int i; if (!q) return; + onic_tx_clean(q); + onic_qdma_clear_tx_queue(priv->hw.qdma, qid); ring = &q->ring; @@ -328,10 +607,18 @@ static void onic_clear_tx_queue(struct onic_private *priv, u16 qid) size = QDMA_H2C_ST_DESC_SIZE * real_count + QDMA_WB_STAT_SIZE; size = ALIGN(size, PAGE_SIZE); + for (i = 0; i < real_count; ++i) { + if ((q->buffer[i].type & ONIC_TX_SKB ) && q->buffer[i].skb) { + netdev_err(priv->netdev, "Weird, skb is not NULL\n"); + } else if ((q->buffer[i].type & (ONIC_TX_XDPF || ONIC_TX_XDPF_XMIT)) && q->buffer[i].xdpf) { + netdev_err(priv->netdev, "Weird, skb is not NULL\n"); + } + } + if (ring->desc) dma_free_coherent(&priv->pdev->dev, size, ring->desc, ring->dma_addr); - kfree(q->buffer); + if (q->buffer) kfree(q->buffer); kfree(q); priv->tx_queue[qid] = NULL; } @@ -384,6 +671,9 @@ static int onic_init_tx_queue(struct onic_private *priv, u16 qid) ring->next_to_clean = 0; ring->color = 0; + netdev_info(dev, "TX queue %d, ring count %d, ring size %d, real_count %d", + qid, ring->count, size, real_count); + /* initialize TX buffers */ q->buffer = kcalloc(real_count, sizeof(struct onic_tx_buffer), GFP_KERNEL); @@ -443,20 +733,66 @@ static void onic_clear_rx_queue(struct onic_private *priv, u16 qid) for (i = 0; i < real_count; ++i) { struct page *pg = q->buffer[i].pg; - __free_pages(pg, 0); + page_pool_put_full_page(q->page_pool, pg, false); } - kfree(q->buffer); + if (q->buffer) kfree(q->buffer); + if (xdp_rxq_info_is_reg(&q->xdp_rxq)) + xdp_rxq_info_unreg(&q->xdp_rxq); + page_pool_destroy(q->page_pool); + q->page_pool = NULL; kfree(q); priv->rx_queue[qid] = NULL; } + +static int onic_create_page_pool(struct onic_private *priv, struct onic_rx_queue *q, int size) { + struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog); + struct page_pool_params pp_params = { + .order = 0, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .pool_size = size, + .nid = dev_to_node(&priv->pdev->dev), + .dev = &priv->pdev->dev, + .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + .offset = XDP_PACKET_HEADROOM, + .max_len = priv->netdev->mtu + ETH_HLEN, + }; + int err; + + q->page_pool = page_pool_create(&pp_params); + if (IS_ERR(q->page_pool)) { + err = PTR_ERR(q->page_pool); + q->page_pool = NULL; + return err; + } + + err = xdp_rxq_info_reg(&q->xdp_rxq, priv->netdev, q->qid, 0); + if (err < 0) + goto err_free_pp; + + err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_POOL, + q->page_pool); + if (err) + goto err_unregister_rxq; + + return 0; + +err_unregister_rxq: + xdp_rxq_info_unreg(&q->xdp_rxq); +err_free_pp: + page_pool_destroy(q->page_pool); + q->page_pool = NULL; + return err; +} + static int onic_init_rx_queue(struct onic_private *priv, u16 qid) { - const u8 bufsz_idx = 13; - const u8 desc_rngcnt_idx = 13; + // TODO: make these configurable via ethtool + const u8 bufsz_idx = 8; + const u8 desc_rngcnt_idx = 8; //const u8 cmpl_rngcnt_idx = 15; - const u8 cmpl_rngcnt_idx = 13; + const u8 cmpl_rngcnt_idx = 8; struct net_device *dev = priv->netdev; struct onic_rx_queue *q; struct onic_ring *ring; @@ -483,6 +819,8 @@ static int onic_init_rx_queue(struct onic_private *priv, u16 qid) q->vector = priv->q_vector[vid]; q->qid = qid; + q->xdp_prog = priv->xdp_prog; + /* allocate DMA memory for RX descriptor ring */ ring = &q->desc_ring; ring->count = onic_ring_count(desc_rngcnt_idx); @@ -510,16 +848,22 @@ static int onic_init_rx_queue(struct onic_private *priv, u16 qid) goto clear_rx_queue; } + rv = onic_create_page_pool(priv, q, real_count); + if (rv < 0) + goto clear_rx_queue; + + for (i = 0; i < real_count; ++i) { - struct page *pg = dev_alloc_pages(0); + struct page *pg = page_pool_dev_alloc_pages(q->page_pool); if (!pg) { + netdev_err(dev, "page_pool_dev_alloc_pages failed at %d", i); rv = -ENOMEM; goto clear_rx_queue; } q->buffer[i].pg = pg; - q->buffer[i].offset = 0; + q->buffer[i].offset = XDP_PACKET_HEADROOM; } /* map pages and initialize descriptors */ @@ -529,10 +873,7 @@ static int onic_init_rx_queue(struct onic_private *priv, u16 qid) struct page *pg = q->buffer[i].pg; unsigned int offset = q->buffer[i].offset; - desc.dst_addr = dma_map_page(&priv->pdev->dev, pg, 0, PAGE_SIZE, - DMA_FROM_DEVICE); - desc.dst_addr += offset; - + desc.dst_addr = page_pool_get_dma_addr(pg) + offset; qdma_pack_c2h_st_desc(desc_ptr, &desc); } @@ -688,10 +1029,11 @@ netdev_tx_t onic_xmit_frame(struct sk_buff *skb, struct net_device *dev) u8 *desc_ptr; int rv; bool debug = 0; - + struct rtnl_link_stats64 *pcpu_stats_pointer; + pcpu_stats_pointer = this_cpu_ptr(priv->netdev_stats); q = priv->tx_queue[qid]; ring = &q->ring; - + onic_tx_clean(q); if (onic_ring_full(ring)) { @@ -711,8 +1053,8 @@ netdev_tx_t onic_xmit_frame(struct sk_buff *skb, struct net_device *dev) if (unlikely(dma_mapping_error(&priv->pdev->dev, dma_addr))) { dev_kfree_skb(skb); - priv->netdev_stats.tx_dropped++; - priv->netdev_stats.tx_errors++; + pcpu_stats_pointer->tx_dropped++; + pcpu_stats_pointer->tx_errors++; return NETDEV_TX_OK; } @@ -722,18 +1064,19 @@ netdev_tx_t onic_xmit_frame(struct sk_buff *skb, struct net_device *dev) desc.metadata = skb->len; qdma_pack_h2c_st_desc(desc_ptr, &desc); + q->buffer[ring->next_to_use].type = ONIC_TX_SKB; q->buffer[ring->next_to_use].skb = skb; q->buffer[ring->next_to_use].dma_addr = dma_addr; q->buffer[ring->next_to_use].len = skb->len; - priv->netdev_stats.tx_packets++; - priv->netdev_stats.tx_bytes += skb->len; + pcpu_stats_pointer->tx_packets++; + pcpu_stats_pointer->tx_bytes += skb->len; onic_ring_increment_head(ring); #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) if (onic_ring_full(ring) || !netdev_xmit_more()) { -#elif defined(RHEL_RELEASE_CODE) +#elif defined(RHEL_RELEASE_CODE) #if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 1) if (onic_ring_full(ring) || !netdev_xmit_more()) { #endif @@ -772,15 +1115,126 @@ int onic_change_mtu(struct net_device *dev, int mtu) return 0; } -inline void onic_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +inline void onic_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct onic_private *priv = netdev_priv(dev); + struct rtnl_link_stats64 *pcpu_ptr; + struct rtnl_link_stats64 total_stats = { }; + unsigned int cpu; + for_each_possible_cpu(cpu) { + pcpu_ptr = per_cpu_ptr(priv->netdev_stats, cpu); + + + total_stats.rx_packets += pcpu_ptr->rx_packets; + total_stats.rx_bytes += pcpu_ptr->rx_bytes; + total_stats.tx_packets += pcpu_ptr->tx_packets; + total_stats.tx_bytes += pcpu_ptr->tx_bytes; + total_stats.tx_errors += pcpu_ptr->tx_errors; + total_stats.tx_dropped += pcpu_ptr->tx_dropped; + } + + stats->tx_packets = total_stats.tx_packets; + stats->tx_bytes = total_stats.tx_bytes; + stats->rx_packets = total_stats.rx_packets; + stats->rx_bytes = total_stats.rx_bytes; + stats->tx_dropped = total_stats.tx_dropped; + stats->tx_errors = total_stats.tx_errors; +} + + +static int onic_setup_xdp_prog(struct net_device *dev, struct bpf_prog *prog) { + + struct onic_private *priv = netdev_priv(dev); + bool running = netif_running(dev); + + bool need_reset; + + struct bpf_prog *old_prog = xchg(&priv->xdp_prog, prog); + need_reset = (!!prog != !!old_prog); + + if (need_reset && running) { + onic_stop_netdev(dev); + } else { + int i; + for (i = 0; i < priv->num_rx_queues; i++) { + xchg(&priv->rx_queue[i]->xdp_prog, priv->xdp_prog); + } + } + if (old_prog){ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,3,0) + xdp_features_clear_redirect_target(dev); +#endif + bpf_prog_put(old_prog); + } + + if (!need_reset) + return 0; + + if (running) + onic_open_netdev(dev); + - stats->tx_packets = priv->netdev_stats.tx_packets; - stats->tx_bytes = priv->netdev_stats.tx_bytes; - stats->rx_packets = priv->netdev_stats.rx_packets; - stats->rx_bytes = priv->netdev_stats.rx_bytes; - stats->tx_dropped = priv->netdev_stats.tx_dropped; - stats->tx_errors = priv->netdev_stats.tx_errors; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,3,0) + if (need_reset && prog) + xdp_features_set_redirect_target(dev, false); +#endif + return 0; +} + +int onic_xdp(struct net_device *dev, struct netdev_bpf *xdp) { + switch (xdp->command) { + case XDP_SETUP_PROG: + return onic_setup_xdp_prog(dev, xdp->prog); + default: + return -EINVAL; + } } + +int onic_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags) { + struct onic_private *priv = netdev_priv(dev); + struct onic_ring *tx_ring; + struct onic_tx_queue *tx_queue; + struct netdev_queue *nq; + int i, drops = 0, cpu; + + + cpu = smp_processor_id(); + + tx_queue = onic_xdp_tx_queue_mapping(priv); + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)){ + netdev_err(dev, "Invalid flags"); + tx_queue->xdp_tx_stats.xdp_xmit_err++; + return -EINVAL; + } + + tx_ring = &tx_queue->ring; + nq = netdev_get_tx_queue(tx_queue->netdev, tx_queue->qid); + + __netif_tx_lock(nq, cpu); + for (i = 0; i < n; i++) { + struct xdp_frame *frame = frames[i]; + int err; + + err = 0; + err = onic_xmit_xdp_ring(priv, tx_queue, frame, true); + if (err != ONIC_XDP_TX) { + xdp_return_frame_rx_napi(frame); + netdev_err(dev, "Failed to transmit frame"); + tx_queue->xdp_tx_stats.xdp_xmit_err++; + drops++; + } else { + tx_queue->xdp_tx_stats.xdp_xmit++; + } + } + + if (flags & XDP_XMIT_FLUSH) { + wmb(); + onic_set_tx_head(priv->hw.qdma, tx_queue->qid, tx_queue->ring.next_to_use); + } + __netif_tx_unlock(nq); + + return n - drops; +} + diff --git a/onic_netdev.h b/onic_netdev.h index eb2e997..6c45205 100644 --- a/onic_netdev.h +++ b/onic_netdev.h @@ -49,4 +49,8 @@ void onic_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); int onic_poll(struct napi_struct *napi, int budget); +int onic_xdp(struct net_device *dev, struct netdev_bpf *xdp); + +int onic_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); #endif diff --git a/onic_register.h b/onic_register.h index 368f74e..3cf8248 100644 --- a/onic_register.h +++ b/onic_register.h @@ -260,9 +260,15 @@ static inline void onic_write_reg(struct onic_hardware *hw, u32 offset, u32 val) #define CMAC_OFFSET_STAT_RX_RSFEC_CW_INC(i) (CMAC_OFFSET(i) + 0x103C) #define CMAC_ADPT_OFFSET_TX_PKT_RECV(i) (CMAC_ADPT_OFFSET(i) + 0x0) -#define CMAC_ADPT_OFFSET_TX_PKT_DROP(i) (CMAC_ADPT_OFFSET(i) + 0x4) -#define CMAC_ADPT_OFFSET_RX_PKT_RECV(i) (CMAC_ADPT_OFFSET(i) + 0x10) -#define CMAC_ADPT_OFFSET_RX_PKT_DROP(i) (CMAC_ADPT_OFFSET(i) + 0x14) -#define CMAC_ADPT_OFFSET_RX_PKT_ERROR(i) (CMAC_ADPT_OFFSET(i) + 0x18) +#define CMAC_ADPT_OFFSET_TX_PKT_DROP(i) (CMAC_ADPT_OFFSET(i) + 0x10) +#define CMAC_ADPT_OFFSET_RX_PKT_RECV(i) (CMAC_ADPT_OFFSET(i) + 0x20) +#define CMAC_ADPT_OFFSET_RX_PKT_DROP(i) (CMAC_ADPT_OFFSET(i) + 0x30) +#define CMAC_ADPT_OFFSET_RX_PKT_ERROR(i) (CMAC_ADPT_OFFSET(i) + 0x40) + +/* INDIRECTION TABLE*/ +#define INDIRECTION_TABLE_BASE_ADDR QDMA_FUNC_OFFSET_INDIR_TABLE(0,0) +#define INDIRECTION_TABLE_SIZE 0x80 +#define ONIC_EN_RSS_KEY_SIZE 40 + #endif diff --git a/qdma_access/qdma_export.h b/qdma_access/qdma_export.h index ac4742b..2c8d23d 100644 --- a/qdma_access/qdma_export.h +++ b/qdma_access/qdma_export.h @@ -76,6 +76,10 @@ struct qdma_c2h_st_desc { #define QDMA_WB_STAT_DW_PIDX_MASK GENMASK_ULL(15, 0) #define QDMA_WB_STAT_DW_CIDX_MASK GENMASK_ULL(31, 16) +/** +* pidx is the producer index +* cidx is the consumer index +*/ struct qdma_wb_stat { u16 pidx; u16 cidx;