Linux数据报文接收发送总结7
2.4 網卡驅動初始化
每一個驅動程序(不僅僅只是網卡驅動)會使用 module_init 向內核注冊一個初始化函數,當驅動被加載時,內核會調用這個函數。比如igb網卡驅動的代碼位于drivers/net/ethernet/intel/igb/igb_main.c
//file: drivers/net/ethernet/intel/igb/igb_main.c static struct pci_driver igb_driver = {.name = igb_driver_name,.id_table = igb_pci_tbl, // 根據此ID匹配,來綁定驅動.probe = igb_probe, // 重點函數,硬件的初始化函數都在這里做.remove = igb_remove,...... }; staticint __init igb_init_module(void){......ret = pci_register_driver(&igb_driver);return ret; }驅動的pci_register_driver調用完成后,Linux內核就知道了該驅動的相關信息,比如igb網卡驅動的igb_driver_name和igb_probe函數地址等等。當網卡設備被識別以后,內核會調用其驅動的probe方法(igb_driver的probe方法是igb_probe)。驅動probe方法執行的目的就是讓設備ready,對于igb網卡,其igb_probe位于drivers/net/ethernet/intel/igb/igb_main.c下。主要執行的操作如下:
/*** igb_probe - Device Initialization Routine* @pdev: PCI device information struct* @ent: entry in igb_pci_tbl** Returns 0 on success, negative on failure** igb_probe initializes an adapter identified by a pci_dev structure.* The OS initialization, configuring of the adapter private structure,* and a hardware reset occur.**/// 當insmod 驅動ko 或 有pci設備枚舉上來時,通過ID進行匹配和綁定驅動,即會調用此函數,。//此過程是由pci總線來調用來。 static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) {struct net_device *netdev;struct igb_adapter *adapter;struct e1000_hw *hw;u16 eeprom_data = 0;s32 ret_val;static int global_quad_port_a; /* global quad port a indication */const struct e1000_info *ei = igb_info_tbl[ent->driver_data];int err, pci_using_dac;u8 part_str[E1000_PBANUM_LENGTH];/* Catch broken hardware that put the wrong VF device ID in* the PCIe SR-IOV capability.*/if (pdev->is_virtfn) {WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",pci_name(pdev), pdev->vendor, pdev->device);return -EINVAL;}err = pci_enable_device_mem(pdev);if (err)return err;pci_using_dac = 0;err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));if (!err) {pci_using_dac = 1;} else {err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));if (err) {dev_err(&pdev->dev,"No usable DMA configuration, aborting\n");goto err_dma;}}err = pci_request_selected_regions(pdev, pci_select_bars(pdev,IORESOURCE_MEM),igb_driver_name);if (err)goto err_pci_reg;pci_enable_pcie_error_reporting(pdev);pci_set_master(pdev);pci_save_state(pdev);err = -ENOMEM;netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), //申請網絡設備,此參數包括了申請priv成員的大小IGB_MAX_TX_QUEUES);if (!netdev)goto err_alloc_etherdev;SET_NETDEV_DEV(netdev, &pdev->dev);pci_set_drvdata(pdev, netdev);adapter = netdev_priv(netdev);adapter->netdev = netdev;adapter->pdev = pdev;hw = &adapter->hw;hw->back = adapter;adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);err = -EIO;adapter->io_addr = pci_iomap(pdev, 0, 0);if (!adapter->io_addr)goto err_ioremap;/* hw->hw_addr can be altered, we'll use adapter->io_addr for unmap */hw->hw_addr = adapter->io_addr;netdev->netdev_ops = &igb_netdev_ops; // 設備ops操作函數igb_set_ethtool_ops(netdev); // 注冊ethtool操作,即ethtool_opsnetdev->watchdog_timeo = 5 * HZ;strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);netdev->mem_start = pci_resource_start(pdev, 0);netdev->mem_end = pci_resource_end(pdev, 0);/* PCI config space info */hw->vendor_id = pdev->vendor;hw->device_id = pdev->device;hw->revision_id = pdev->revision;hw->subsystem_vendor_id = pdev->subsystem_vendor;hw->subsystem_device_id = pdev->subsystem_device;/* Copy the default MAC, PHY and NVM function pointers */memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));/* Initialize skew-specific constants */err = ei->get_invariants(hw);if (err)goto err_sw_init;/* setup the private structure */err = igb_sw_init(adapter);if (err)goto err_sw_init;igb_get_bus_info_pcie(hw); //獲取設備的硬件信息,從后面處理來看,這個設備特性還挺多。hw->phy.autoneg_wait_to_complete = false;/* Copper options */if (hw->phy.media_type == e1000_media_type_copper) {hw->phy.mdix = AUTO_ALL_MODES;hw->phy.disable_polarity_correction = false;hw->phy.ms_type = e1000_ms_hw_default;}if (igb_check_reset_block(hw))dev_info(&pdev->dev,"PHY reset is blocked due to SOL/IDER session.\n");/* features is initialized to 0 in allocation, it might have bits* set by igb_sw_init so we should use an or instead of an* assignment.*/netdev->features |= NETIF_F_SG |NETIF_F_IP_CSUM |NETIF_F_IPV6_CSUM |NETIF_F_TSO |NETIF_F_TSO6 |NETIF_F_RXHASH |NETIF_F_RXCSUM |NETIF_F_HW_VLAN_CTAG_RX |NETIF_F_HW_VLAN_CTAG_TX;/* copy netdev features into list of user selectable features */netdev->hw_features |= netdev->features;netdev->hw_features |= NETIF_F_RXALL;/* set this bit last since it cannot be part of hw_features */netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;netdev->vlan_features |= NETIF_F_TSO |NETIF_F_TSO6 |NETIF_F_IP_CSUM |NETIF_F_IPV6_CSUM |NETIF_F_SG;netdev->priv_flags |= IFF_SUPP_NOFCS;if (pci_using_dac) {netdev->features |= NETIF_F_HIGHDMA;netdev->vlan_features |= NETIF_F_HIGHDMA;}if (hw->mac.type >= e1000_82576) {netdev->hw_features |= NETIF_F_SCTP_CSUM;netdev->features |= NETIF_F_SCTP_CSUM;}netdev->priv_flags |= IFF_UNICAST_FLT;adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);/* before reading the NVM, reset the controller to put the device in a* known good starting state*/hw->mac.ops.reset_hw(hw);/* make sure the NVM is good , i211/i210 parts can have special NVM* that doesn't contain a checksum*/switch (hw->mac.type) {case e1000_i210:case e1000_i211:if (igb_get_flash_presence_i210(hw)) {if (hw->nvm.ops.validate(hw) < 0) {dev_err(&pdev->dev,"The NVM Checksum Is Not Valid\n");err = -EIO;goto err_eeprom;}}break;default:if (hw->nvm.ops.validate(hw) < 0) {dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");err = -EIO;goto err_eeprom;}break;}/* copy the MAC address out of the NVM */if (hw->mac.ops.read_mac_addr(hw))dev_err(&pdev->dev, "NVM Read Error\n");memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);if (!is_valid_ether_addr(netdev->dev_addr)) {dev_err(&pdev->dev, "Invalid MAC Address\n");err = -EIO;goto err_eeprom;}/* get firmware version for ethtool -i */igb_set_fw_version(adapter);/* configure RXPBSIZE and TXPBSIZE */if (hw->mac.type == e1000_i210) {wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);}setup_timer(&adapter->watchdog_timer, igb_watchdog,(unsigned long) adapter);setup_timer(&adapter->phy_info_timer, igb_update_phy_info,(unsigned long) adapter); //設備驅動的功能,在異常時進行恢復INIT_WORK(&adapter->reset_task, igb_reset_task);INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);/* Initialize link properties that are user-changeable */adapter->fc_autoneg = true;hw->mac.autoneg = true;hw->phy.autoneg_advertised = 0x2f;hw->fc.requested_mode = e1000_fc_default;hw->fc.current_mode = e1000_fc_default;igb_validate_mdi_setting(hw);/* By default, support wake on port A */if (hw->bus.func == 0)adapter->flags |= IGB_FLAG_WOL_SUPPORTED;/* Check the NVM for wake support on non-port A ports */if (hw->mac.type >= e1000_82580)hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,&eeprom_data);else if (hw->bus.func == 1)hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);if (eeprom_data & IGB_EEPROM_APME)adapter->flags |= IGB_FLAG_WOL_SUPPORTED;/* now that we have the eeprom settings, apply the special cases where* the eeprom may be wrong or the board simply won't support wake on* lan on a particular port*/switch (pdev->device) {case E1000_DEV_ID_82575GB_QUAD_COPPER:adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;break;case E1000_DEV_ID_82575EB_FIBER_SERDES:case E1000_DEV_ID_82576_FIBER:case E1000_DEV_ID_82576_SERDES:/* Wake events only supported on port A for dual fiber* regardless of eeprom setting*/if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;break;case E1000_DEV_ID_82576_QUAD_COPPER:case E1000_DEV_ID_82576_QUAD_COPPER_ET2:/* if quad port adapter, disable WoL on all but port A */if (global_quad_port_a != 0)adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;elseadapter->flags |= IGB_FLAG_QUAD_PORT_A;/* Reset for multiple quad port adapters */if (++global_quad_port_a == 4)global_quad_port_a = 0;break;default:/* If the device can't wake, don't set software support */if (!device_can_wakeup(&adapter->pdev->dev))adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;}/* initialize the wol settings based on the eeprom settings */if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)adapter->wol |= E1000_WUFC_MAG;/* Some vendors want WoL disabled by default, but still supported */if ((hw->mac.type == e1000_i350) &&(pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {adapter->flags |= IGB_FLAG_WOL_SUPPORTED;adapter->wol = 0;}device_set_wakeup_enable(&adapter->pdev->dev,adapter->flags & IGB_FLAG_WOL_SUPPORTED);/* reset the hardware with the new settings */igb_reset(adapter);/* Init the I2C interface 還有個I2C的控制接口*/err = igb_init_i2c(adapter);if (err) {dev_err(&pdev->dev, "failed to init i2c interface\n");goto err_eeprom;}/* let the f/w know that the h/w is now under the control of the* driver.*/igb_get_hw_control(adapter);strcpy(netdev->name, "eth%d");err = register_netdev(netdev); // 注冊網絡設備if (err)goto err_register;/* carrier off reporting is important to ethtool even BEFORE open */netif_carrier_off(netdev);#ifdef CONFIG_IGB_DCAif (dca_add_requester(&pdev->dev) == 0) {adapter->flags |= IGB_FLAG_DCA_ENABLED;dev_info(&pdev->dev, "DCA enabled\n");igb_setup_dca(adapter);}#endif #ifdef CONFIG_IGB_HWMON/* Initialize the thermal sensor on i350 devices. */if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {u16 ets_word;/* Read the NVM to determine if this i350 device supports an* external thermal sensor.*/hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word);if (ets_word != 0x0000 && ets_word != 0xFFFF)adapter->ets = true;elseadapter->ets = false;if (igb_sysfs_init(adapter))dev_err(&pdev->dev,"failed to allocate sysfs resources\n");} else {adapter->ets = false;} #endif/* Check if Media Autosense is enabled */adapter->ei = *ei;if (hw->dev_spec._82575.mas_capable)igb_init_mas(adapter);/* do hw tstamp init after resetting */igb_ptp_init(adapter);dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");/* print bus type/speed/width info, not applicable to i354 */if (hw->mac.type != e1000_i354) {dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",netdev->name,((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :(hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :"unknown"),((hw->bus.width == e1000_bus_width_pcie_x4) ?"Width x4" :(hw->bus.width == e1000_bus_width_pcie_x2) ?"Width x2" :(hw->bus.width == e1000_bus_width_pcie_x1) ?"Width x1" : "unknown"), netdev->dev_addr);}if ((hw->mac.type >= e1000_i210 ||igb_get_flash_presence_i210(hw))) {ret_val = igb_read_part_string(hw, part_str,E1000_PBANUM_LENGTH);} else {ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND;}if (ret_val)strcpy(part_str, "Unknown");dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);dev_info(&pdev->dev,"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",(adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" :(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",adapter->num_rx_queues, adapter->num_tx_queues);if (hw->phy.media_type == e1000_media_type_copper) {switch (hw->mac.type) {case e1000_i350:case e1000_i210:case e1000_i211:/* Enable EEE for internal copper PHY devices */err = igb_set_eee_i350(hw, true, true);if ((!err) &&(!hw->dev_spec._82575.eee_disable)) {adapter->eee_advert =MDIO_EEE_100TX | MDIO_EEE_1000T;adapter->flags |= IGB_FLAG_EEE;}break;case e1000_i354:if ((rd32(E1000_CTRL_EXT) &E1000_CTRL_EXT_LINK_MODE_SGMII)) {err = igb_set_eee_i354(hw, true, true);if ((!err) &&(!hw->dev_spec._82575.eee_disable)) {adapter->eee_advert =MDIO_EEE_100TX | MDIO_EEE_1000T;adapter->flags |= IGB_FLAG_EEE;}}break;default:break;}}pm_runtime_put_noidle(&pdev->dev);return 0;err_register:igb_release_hw_control(adapter);memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); err_eeprom:if (!igb_check_reset_block(hw))igb_reset_phy(hw);if (hw->flash_address)iounmap(hw->flash_address); err_sw_init:kfree(adapter->shadow_vfta);igb_clear_interrupt_scheme(adapter); #ifdef CONFIG_PCI_IOVigb_disable_sriov(pdev); #endifpci_iounmap(pdev, adapter->io_addr); err_ioremap:free_netdev(netdev); err_alloc_etherdev:pci_release_selected_regions(pdev,pci_select_bars(pdev, IORESOURCE_MEM)); err_pci_reg: err_dma:pci_disable_device(pdev);return err; }?
第5步中我們看到,網卡驅動實現了ethtool所需要的接口,也在這里注冊完成函數地址的注冊。當 ethtool 發起一個系統調用之后,內核會找到對應操作的回調函數。對于igb網卡來說,其實現函數都在drivers/net/ethernet/intel/igb/igb_ethtool.c下。相信你這次能徹底理解ethtool的工作原理了吧?這個命令之所以能查看網卡收發包統計、能修改網卡自適應模式、能調整RX 隊列的數量和大小,是因為ethtool命令最終調用到了網卡驅動的相應方法,而不是ethtool本身有這個超能力。
第6步注冊的igb_netdev_ops中包含的是igb_open等函數,該函數在網卡被啟動的時候會被調用。
//file: drivers/net/ethernet/intel/igb/igb_main.c staticconststruct net_device_ops igb_netdev_ops = {.ndo_open = igb_open,.ndo_stop = igb_close,.ndo_start_xmit = igb_xmit_frame,.ndo_get_stats64 = igb_get_stats64,.ndo_set_rx_mode = igb_set_rx_mode,.ndo_set_mac_address = igb_set_mac,.ndo_change_mtu = igb_change_mtu,.ndo_do_ioctl = igb_ioctl,......第7步中,在igb_probe初始化過程中,還調用到了igb_alloc_q_vector。他注冊了一個NAPI機制所必須的poll函數,對于igb網卡驅動來說,這個函數就是igb_poll,如下代碼所示。
static int igb_alloc_q_vector(struct igb_adapter *adapter,int v_count, int v_idx,int txr_count, int txr_idx,int rxr_count, int rxr_idx){....../* initialize NAPI */netif_napi_add(adapter->netdev, &q_vector->napi,igb_poll, 64); }?
總結
以上是生活随笔為你收集整理的Linux数据报文接收发送总结7的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Linux数据报文接收发送总结6
- 下一篇: 7.沟通管理