A Discrete-Event Network Simulator
API
dpdk-net-device.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 NITK Surathkal
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation;
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16  *
17  * Author: Harsh Patel <thadodaharsh10@gmail.com>
18  * Hrishikesh Hiraskar <hrishihiraskar@gmail.com>
19  * Mohit P. Tahiliani <tahiliani@nitk.edu.in>
20  */
21 
22 #include "dpdk-net-device.h"
23 
24 #include "ns3/log.h"
25 #include "ns3/net-device-queue-interface.h"
26 #include "ns3/simulator.h"
27 #include "ns3/uinteger.h"
28 
29 #include <mutex>
30 #include <poll.h>
31 #include <rte_common.h>
32 #include <rte_cycles.h>
33 #include <rte_eal.h>
34 #include <rte_ethdev.h>
35 #include <rte_malloc.h>
36 #include <rte_mbuf.h>
37 #include <rte_mempool.h>
38 #include <rte_port.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <sys/signal.h>
42 #include <unistd.h>
43 
44 namespace ns3
45 {
46 
47 NS_LOG_COMPONENT_DEFINE("DpdkNetDevice");
48 
49 NS_OBJECT_ENSURE_REGISTERED(DpdkNetDevice);
50 
51 volatile bool DpdkNetDevice::m_forceQuit = false;
52 
53 TypeId
55 {
56  static TypeId tid =
57  TypeId("ns3::DpdkNetDevice")
59  .SetGroupName("FdNetDevice")
60  .AddConstructor<DpdkNetDevice>()
61  .AddAttribute("TxTimeout",
62  "The time to wait before transmitting burst from Tx buffer.",
63  TimeValue(MicroSeconds(2000)),
66  .AddAttribute("MaxRxBurst",
67  "Size of Rx Burst.",
68  UintegerValue(64),
70  MakeUintegerChecker<uint32_t>())
71  .AddAttribute("MaxTxBurst",
72  "Size of Tx Burst.",
73  UintegerValue(64),
75  MakeUintegerChecker<uint32_t>())
76  .AddAttribute("MempoolCacheSize",
77  "Size of mempool cache.",
78  UintegerValue(256),
80  MakeUintegerChecker<uint32_t>())
81  .AddAttribute("NbRxDesc",
82  "Number of Rx descriptors.",
83  UintegerValue(1024),
85  MakeUintegerChecker<uint16_t>())
86  .AddAttribute("NbTxDesc",
87  "Number of Tx descriptors.",
88  UintegerValue(1024),
90  MakeUintegerChecker<uint16_t>());
91  return tid;
92 }
93 
95  : m_mempool(nullptr)
96 {
97  NS_LOG_FUNCTION(this);
98 }
99 
101 {
102  NS_LOG_FUNCTION(this);
104  m_forceQuit = true;
105 
106  rte_eal_wait_lcore(1);
107  rte_eth_dev_stop(m_portId);
108  rte_eth_dev_close(m_portId);
109 }
110 
111 void
112 DpdkNetDevice::SetDeviceName(std::string deviceName)
113 {
114  NS_LOG_FUNCTION(this);
115 
116  m_deviceName = deviceName;
117 }
118 
119 void
121 {
122  NS_LOG_FUNCTION(this);
123 
124 #define CHECK_INTERVAL 100 /* 100ms */
125 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
126 
127  uint8_t printFlag = 0;
128  struct rte_eth_link link;
129 
130  for (uint8_t count = 0; count <= MAX_CHECK_TIME; count++)
131  {
132  uint8_t allPortsUp = 1;
133 
134  if (m_forceQuit)
135  {
136  return;
137  }
138  if ((1 << m_portId) == 0)
139  {
140  continue;
141  }
142  memset(&link, 0, sizeof(link));
143  rte_eth_link_get(m_portId, &link);
144  /* print link status if flag set */
145  if (printFlag == 1)
146  {
147  if (link.link_status)
148  {
149  continue;
150  }
151  else
152  {
153  printf("Port %d Link Down\n", m_portId);
154  }
155  continue;
156  }
157  /* clear allPortsUp flag if any link down */
158  if (link.link_status == ETH_LINK_DOWN)
159  {
160  allPortsUp = 0;
161  break;
162  }
163 
164  /* after finally printing all link status, get out */
165  if (printFlag == 1)
166  {
167  break;
168  }
169 
170  if (allPortsUp == 0)
171  {
172  fflush(stdout);
173  rte_delay_ms(CHECK_INTERVAL);
174  }
175 
176  /* set the printFlag if all ports up or timeout */
177  if (allPortsUp == 1 || count == (MAX_CHECK_TIME - 1))
178  {
179  printFlag = 1;
180  }
181  }
182 }
183 
184 void
186 {
187  if (signum == SIGINT || signum == SIGTERM)
188  {
189  printf("\n\nSignal %d received, preparing to exit...\n", signum);
190  m_forceQuit = true;
191  }
192 }
193 
194 void
196 {
197  int queueId = 0;
198  rte_eth_tx_buffer_flush(m_portId, queueId, m_txBuffer);
199 }
200 
201 void
203 {
204  int queueId = 0;
205  m_rxBuffer->length = rte_eth_rx_burst(m_portId, queueId, m_rxBuffer->pkts, m_maxRxPktBurst);
206 
207  for (uint16_t i = 0; i < m_rxBuffer->length; i++)
208  {
209  struct rte_mbuf* pkt = nullptr;
210  pkt = m_rxBuffer->pkts[i];
211 
212  if (!pkt)
213  {
214  continue;
215  }
216 
217  uint8_t* buf = rte_pktmbuf_mtod(pkt, uint8_t*);
218  size_t length = pkt->data_len;
219  FdNetDevice::ReceiveCallback(buf, length);
220  }
221 
222  m_rxBuffer->length = 0;
223 }
224 
225 int
227 {
228  DpdkNetDevice* dpdkNetDevice = (DpdkNetDevice*)arg;
229  unsigned lcoreId;
230  lcoreId = rte_lcore_id();
231  if (lcoreId != 1)
232  {
233  return 0;
234  }
235 
236  while (!m_forceQuit)
237  {
238  dpdkNetDevice->HandleRx();
239  }
240 
241  return 0;
242 }
243 
244 bool
246 {
247  // Refer https://mails.dpdk.org/archives/users/2018-December/003822.html
248  return true;
249 }
250 
251 void
252 DpdkNetDevice::InitDpdk(int argc, char** argv, std::string dpdkDriver)
253 {
254  NS_LOG_FUNCTION(this << argc << argv);
255 
256  NS_LOG_INFO("Binding device to DPDK");
257  std::string command;
258  command.append("dpdk-devbind.py --force ");
259  command.append("--bind=");
260  command.append(dpdkDriver);
261  command.append(" ");
262  command.append(m_deviceName);
263  printf("Executing: %s\n", command.c_str());
264  if (system(command.c_str()))
265  {
266  rte_exit(EXIT_FAILURE, "Execution failed - bye\n");
267  }
268 
269  // wait for the device to bind to Dpdk
270  sleep(5); /* 5 seconds */
271 
272  NS_LOG_INFO("Initialize DPDK EAL");
273  int ret = rte_eal_init(argc, argv);
274  if (ret < 0)
275  {
276  rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
277  }
278 
279  m_forceQuit = false;
280  signal(SIGINT, SignalHandler);
281  signal(SIGTERM, SignalHandler);
282 
283  unsigned nbPorts = rte_eth_dev_count_avail();
284  if (nbPorts == 0)
285  {
286  rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
287  }
288 
289  NS_LOG_INFO("Get port id of the device");
290  if (rte_eth_dev_get_port_by_name(m_deviceName.c_str(), &m_portId) != 0)
291  {
292  rte_exit(EXIT_FAILURE, "Cannot get port id - bye\n");
293  }
294 
295  // Set number of logical cores to 2
296  unsigned int nbLcores = 2;
297 
298  unsigned int nbMbufs = RTE_MAX(nbPorts * (m_nbRxDesc + m_nbTxDesc + m_maxRxPktBurst +
299  m_maxTxPktBurst + nbLcores * m_mempoolCacheSize),
300  8192U);
301 
302  NS_LOG_INFO("Create the mbuf pool");
303  m_mempool = rte_pktmbuf_pool_create("mbuf_pool",
304  nbMbufs,
306  0,
307  RTE_MBUF_DEFAULT_BUF_SIZE,
308  rte_socket_id());
309 
310  if (!m_mempool)
311  {
312  rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
313  }
314 
315  NS_LOG_INFO("Initialize port");
316  static struct rte_eth_conf portConf = {};
317  portConf.rxmode = {};
318  portConf.rxmode.split_hdr_size = 0;
319  portConf.txmode = {};
320  portConf.txmode.mq_mode = ETH_MQ_TX_NONE;
321 
322  struct rte_eth_rxconf reqConf;
323  struct rte_eth_txconf txqConf;
324  struct rte_eth_conf localPortConf = portConf;
325  struct rte_eth_dev_info devInfo;
326 
327  fflush(stdout);
328  rte_eth_dev_info_get(m_portId, &devInfo);
329  if (devInfo.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
330  {
331  localPortConf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
332  }
333  ret = rte_eth_dev_configure(m_portId, 1, 1, &localPortConf);
334  if (ret < 0)
335  {
336  rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", ret, m_portId);
337  }
338 
339  ret = rte_eth_dev_adjust_nb_rx_tx_desc(m_portId, &m_nbRxDesc, &m_nbTxDesc);
340  if (ret < 0)
341  {
342  rte_exit(EXIT_FAILURE,
343  "Cannot adjust number of descriptors: err=%d, port=%u\n",
344  ret,
345  m_portId);
346  }
347 
348  NS_LOG_INFO("Initialize one Rx queue");
349  fflush(stdout);
350  reqConf = devInfo.default_rxconf;
351  reqConf.offloads = localPortConf.rxmode.offloads;
352  ret = rte_eth_rx_queue_setup(m_portId,
353  0,
354  m_nbRxDesc,
355  rte_eth_dev_socket_id(m_portId),
356  &reqConf,
357  m_mempool);
358  if (ret < 0)
359  {
360  rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n", ret, m_portId);
361  }
362 
363  NS_LOG_INFO("Initialize one Tx queue per port");
364  fflush(stdout);
365  txqConf = devInfo.default_txconf;
366  txqConf.offloads = localPortConf.txmode.offloads;
367  ret =
368  rte_eth_tx_queue_setup(m_portId, 0, m_nbTxDesc, rte_eth_dev_socket_id(m_portId), &txqConf);
369  if (ret < 0)
370  {
371  rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n", ret, m_portId);
372  }
373 
374  NS_LOG_INFO("Initialize Tx buffers");
375  m_txBuffer = (rte_eth_dev_tx_buffer*)rte_zmalloc_socket("tx_buffer",
376  RTE_ETH_TX_BUFFER_SIZE(m_maxTxPktBurst),
377  0,
378  rte_eth_dev_socket_id(m_portId));
379  NS_LOG_INFO("Initialize Rx buffers");
380  m_rxBuffer = (rte_eth_dev_tx_buffer*)rte_zmalloc_socket("rx_buffer",
381  RTE_ETH_TX_BUFFER_SIZE(m_maxRxPktBurst),
382  0,
383  rte_eth_dev_socket_id(m_portId));
384  if (!m_txBuffer || !m_rxBuffer)
385  {
386  rte_exit(EXIT_FAILURE, "Cannot allocate buffer for rx/tx on port %u\n", m_portId);
387  }
388 
389  rte_eth_tx_buffer_init(m_txBuffer, m_maxTxPktBurst);
390  rte_eth_tx_buffer_init(m_rxBuffer, m_maxRxPktBurst);
391 
392  NS_LOG_INFO("Start the device");
393  ret = rte_eth_dev_start(m_portId);
394  if (ret < 0)
395  {
396  rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", ret, m_portId);
397  }
398 
399  rte_eth_promiscuous_enable(m_portId);
400 
402 
403  NS_LOG_INFO("Launching core threads");
404  rte_eal_mp_remote_launch(LaunchCore, this, CALL_MASTER);
405 }
406 
407 uint8_t*
409 {
410  struct rte_mbuf* pkt = rte_pktmbuf_alloc(m_mempool);
411  if (!pkt)
412  {
413  return nullptr;
414  }
415  uint8_t* buf = rte_pktmbuf_mtod(pkt, uint8_t*);
416  return buf;
417 }
418 
419 void
421 {
422  struct rte_mbuf* pkt;
423 
424  if (!buf)
425  {
426  return;
427  }
428  pkt = (struct rte_mbuf*)RTE_PTR_SUB(buf, sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
429 
430  rte_pktmbuf_free(pkt);
431 }
432 
433 ssize_t
434 DpdkNetDevice::Write(uint8_t* buffer, size_t length)
435 {
436  struct rte_mbuf** pkt = new struct rte_mbuf*[1];
437  int queueId = 0;
438 
439  if (!buffer || m_txBuffer->length == m_maxTxPktBurst)
440  {
441  NS_LOG_ERROR("Error allocating mbuf" << buffer);
442  return -1;
443  }
444 
445  pkt[0] = (struct rte_mbuf*)RTE_PTR_SUB(buffer, sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
446 
447  pkt[0]->pkt_len = length;
448  pkt[0]->data_len = length;
449  rte_eth_tx_buffer(m_portId, queueId, m_txBuffer, pkt[0]);
450 
451  if (m_txBuffer->length == 1)
452  {
453  // If this is a first packet in buffer, schedule a tx.
456  }
457 
458  return length;
459 }
460 
461 void
463 {
464  std::unique_lock lock{m_pendingReadMutex};
465 
466  while (!m_pendingQueue.empty())
467  {
468  std::pair<uint8_t*, ssize_t> next = m_pendingQueue.front();
469  m_pendingQueue.pop();
470 
471  FreeBuffer(next.first);
472  }
473 }
474 
475 } // namespace ns3
a NetDevice to read/write network traffic from/into a Dpdk enabled port.
static int LaunchCore(void *arg)
A function to handle rx & tx operations.
virtual void FreeBuffer(uint8_t *buf)
Free the given packet buffer.
bool IsLinkUp() const
Check the status of the link.
uint32_t m_maxRxPktBurst
Size of Rx burst.
void InitDpdk(int argc, char **argv, std::string dpdkDriver)
Initialize Dpdk.
void SetDeviceName(std::string deviceName)
Set device name.
void HandleTx()
Transmit packets in burst from the tx_buffer to the nic.
static void SignalHandler(int signum)
A signal handler for SIGINT and SIGTERM signals.
~DpdkNetDevice()
Destructor for the DpdkNetDevice.
struct rte_eth_dev_tx_buffer * m_txBuffer
Buffer to handle burst transmission.
struct rte_eth_dev_tx_buffer * m_rxBuffer
Buffer to handle burst reception.
uint32_t m_maxTxPktBurst
Size of Tx burst.
static TypeId GetTypeId()
Get the type ID.
EventId m_txEvent
Event for stale packet transmission.
std::string m_deviceName
The device name;.
void DoFinishStoppingDevice()
Complete additional actions, if any, to tear down the device.
static volatile bool m_forceQuit
Condition variable for Dpdk to stop.
uint16_t m_nbTxDesc
Number of Tx descriptors.
uint16_t m_nbRxDesc
Number of Rx descriptors.
struct rte_mempool * m_mempool
Packet memory pool.
void CheckAllPortsLinkStatus()
Check the link status of all ports in up to 9s and print them finally.
uint16_t m_portId
The port number of the device to be used.
virtual uint8_t * AllocateBuffer(size_t len)
Allocate packet buffer.
DpdkNetDevice()
Constructor for the DpdkNetDevice.
void HandleRx()
Receive packets in burst from the nic to the rx_buffer.
Time m_txTimeout
The time to wait before transmitting burst from Tx buffer.
ssize_t Write(uint8_t *buffer, size_t length)
Write packet data to device.
uint32_t m_mempoolCacheSize
Mempool cache size.
a NetDevice to read/write network traffic from/into a file descriptor.
Definition: fd-net-device.h:84
std::mutex m_pendingReadMutex
Mutex to increase pending read counter.
std::queue< std::pair< uint8_t *, ssize_t > > m_pendingQueue
Number of packets that were received and scheduled for read but not yet read.
Callback< bool, Ptr< NetDevice >, Ptr< const Packet >, uint16_t, const Address & > ReceiveCallback
Definition: net-device.h:322
static EventId Schedule(const Time &delay, FUNC f, Ts &&... args)
Schedule an event to expire after delay.
Definition: simulator.h:568
static void Cancel(const EventId &id)
Set the cancel bit on this event: the event's associated function will not be invoked when it expires...
Definition: simulator.cc:276
AttributeValue implementation for Time.
Definition: nstime.h:1423
a unique identifier for an interface.
Definition: type-id.h:60
TypeId SetParent(TypeId tid)
Set the parent TypeId.
Definition: type-id.cc:935
Hold an unsigned integer type.
Definition: uinteger.h:45
#define MAX_CHECK_TIME
#define CHECK_INTERVAL
Ptr< const AttributeAccessor > MakeTimeAccessor(T1 a1)
Create an AttributeAccessor for a class data member, or a lone class get functor or set method.
Definition: nstime.h:1424
Ptr< const AttributeAccessor > MakeUintegerAccessor(T1 a1)
Create an AttributeAccessor for a class data member, or a lone class get functor or set method.
Definition: uinteger.h:46
#define NS_LOG_ERROR(msg)
Use NS_LOG to output a message of level LOG_ERROR.
Definition: log.h:254
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:202
#define NS_LOG_FUNCTION(parameters)
If log level LOG_FUNCTION is enabled, this macro will output all input parameters separated by ",...
#define NS_LOG_INFO(msg)
Use NS_LOG to output a message of level LOG_INFO.
Definition: log.h:275
#define NS_OBJECT_ENSURE_REGISTERED(type)
Register an Object subclass with the TypeId system.
Definition: object-base.h:46
Time MicroSeconds(uint64_t value)
Construct a Time in the indicated unit.
Definition: nstime.h:1360
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Ptr< const AttributeChecker > MakeTimeChecker(const Time min, const Time max)
Helper to make a Time checker with bounded range.
Definition: time.cc:535