FD.io VPP  v21.06-3-gbb25fbf28
Vector Packet Processing
mc_socket.c
Go to the documentation of this file.
1 /*
2  * mc_socket.c: socket based multicast for vlib mc
3  *
4  * Copyright (c) 2010 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #include <vlib/vlib.h>
19 #include <vlib/unix/mc_socket.h>
20 
21 #include <sys/ioctl.h> /* for FIONBIO */
22 #include <netinet/tcp.h> /* for TCP_NODELAY */
23 #include <net/if.h> /* for struct ifreq */
24 
25 static u8 *
26 format_socket_peer_id (u8 * s, va_list * args)
27 {
28  u64 peer_id_as_u64 = va_arg (*args, u64);
29  mc_peer_id_t peer_id;
30  peer_id.as_u64 = peer_id_as_u64;
32  u32 p = mc_socket_peer_id_get_port (peer_id);
33 
34  s = format (s, "%U:%04x", format_network_address, AF_INET, &a, ntohs (p));
35 
36  return s;
37 }
38 
39 typedef void (mc_msg_handler_t) (mc_main_t * mcm, void *msg,
40  u32 buffer_index);
41 
42 always_inline void
43 msg_handler (mc_main_t * mcm,
44  u32 buffer_index, u32 handler_frees_buffer, void *_h)
45 {
46  vlib_main_t *vm = mcm->vlib_main;
47  mc_msg_handler_t *h = _h;
48  vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
49  void *the_msg = vlib_buffer_get_current (b);
50 
51  h (mcm, the_msg, buffer_index);
52  if (!handler_frees_buffer)
53  vlib_buffer_free_one (vm, buffer_index);
54 }
55 
56 static uword
58  u32 buffer_index, struct iovec **iovs_return)
59 {
60  struct iovec *i;
62  u32 bi = buffer_index;
63  u32 l = 0;
64 
65  while (1)
66  {
67  b = vlib_get_buffer (vm, bi);
68  vec_add2 (*iovs_return, i, 1);
69  i->iov_base = vlib_buffer_get_current (b);
70  i->iov_len = b->current_length;
71  l += i->iov_len;
72  if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
73  break;
74  bi = b->next_buffer;
75  }
76 
77  return l;
78 }
79 
80 static clib_error_t *
82  int socket, struct sockaddr_in *tx_addr, u32 buffer_index)
83 {
84  vlib_main_t *vm = msm->mc_main.vlib_main;
85  struct msghdr h;
86  word n_bytes, n_bytes_tx, n_retries;
87 
88  clib_memset (&h, 0, sizeof (h));
89  h.msg_name = tx_addr;
90  h.msg_namelen = sizeof (tx_addr[0]);
91 
92  if (msm->iovecs)
93  _vec_len (msm->iovecs) = 0;
94 
95  n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs);
96  ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size);
97  if (n_bytes > msm->mc_main.transport.max_packet_size)
98  clib_error ("sending packet larger than interface MTU %d bytes", n_bytes);
99 
100  h.msg_iov = msm->iovecs;
101  h.msg_iovlen = vec_len (msm->iovecs);
102 
103  n_retries = 0;
104  while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes
105  && errno == EAGAIN)
106  n_retries++;
107  if (n_bytes_tx != n_bytes)
108  {
109  clib_unix_warning ("sendmsg");
110  return 0;
111  }
112  if (n_retries)
113  {
114  ELOG_TYPE_DECLARE (e) =
115  {
116  .format = "sendmsg-helper: %d retries",.format_args = "i4",};
117  struct
118  {
119  u32 retries;
120  } *ed = 0;
121 
122  ed = ELOG_DATA (&vm->elog_main, e);
123  ed->retries = n_retries;
124  }
125  return 0;
126 }
127 
128 static clib_error_t *
129 tx_buffer (void *transport, mc_transport_type_t type, u32 buffer_index)
130 {
131  mc_socket_main_t *msm = (mc_socket_main_t *) transport;
132  vlib_main_t *vm = msm->mc_main.vlib_main;
135  error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index);
136  if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY)
137  vlib_buffer_free_one (vm, buffer_index);
138  return error;
139 }
140 
141 static clib_error_t *
142 tx_ack (void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index)
143 {
144  struct sockaddr_in tx_addr;
145  mc_socket_main_t *msm = (mc_socket_main_t *) transport;
146  vlib_main_t *vm = msm->mc_main.vlib_main;
148 
149  clib_memset (&tx_addr, 0, sizeof (tx_addr));
150  tx_addr.sin_family = AF_INET;
151  tx_addr.sin_addr.s_addr = mc_socket_peer_id_get_address (dest_peer_id);
152  tx_addr.sin_port = mc_socket_peer_id_get_port (dest_peer_id);
153 
154  error = sendmsg_helper (msm, msm->ack_socket, &tx_addr, buffer_index);
155  vlib_buffer_free_one (vm, buffer_index);
156  return error;
157 }
158 
159 static clib_error_t *
161  int socket,
162  struct sockaddr_in *rx_addr,
163  u32 * buffer_index, u32 drop_message)
164 {
165  vlib_main_t *vm = msm->mc_main.vlib_main;
166  vlib_buffer_t *b;
167  uword n_left, n_alloc, n_mtu, i, i_rx;
168  const uword buffer_size = vlib_buffer_get_default_data_size (vm);
169  word n_bytes_left;
170 
171  /* Make sure we have at least a MTU worth of buffers. */
172  n_mtu = msm->rx_mtu_n_buffers;
173  n_left = vec_len (msm->rx_buffers);
174  if (n_left < n_mtu)
175  {
176  uword max_alloc = 8 * n_mtu;
177  vec_validate (msm->rx_buffers, max_alloc - 1);
178  n_alloc =
179  vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left);
180  _vec_len (msm->rx_buffers) = n_left + n_alloc;
181  }
182 
183  ASSERT (vec_len (msm->rx_buffers) >= n_mtu);
184  vec_validate (msm->iovecs, n_mtu - 1);
185 
186  /* Allocate RX buffers from end of rx_buffers.
187  Turn them into iovecs to pass to readv. */
188  i_rx = vec_len (msm->rx_buffers) - 1;
189  for (i = 0; i < n_mtu; i++)
190  {
191  b = vlib_get_buffer (vm, msm->rx_buffers[i_rx - i]);
192  msm->iovecs[i].iov_base = b->data;
193  msm->iovecs[i].iov_len = buffer_size;
194  }
195  _vec_len (msm->iovecs) = n_mtu;
196 
197  {
198  struct msghdr h;
199 
200  clib_memset (&h, 0, sizeof (h));
201  if (rx_addr)
202  {
203  h.msg_name = rx_addr;
204  h.msg_namelen = sizeof (rx_addr[0]);
205  }
206  h.msg_iov = msm->iovecs;
207  h.msg_iovlen = vec_len (msm->iovecs);
208 
209  n_bytes_left = recvmsg (socket, &h, 0);
210  if (n_bytes_left < 0)
211  return clib_error_return_unix (0, "recvmsg");
212  }
213 
214  if (drop_message)
215  {
216  *buffer_index = ~0;
217  return 0;
218  }
219 
220  *buffer_index = msm->rx_buffers[i_rx];
221  while (1)
222  {
223  b = vlib_get_buffer (vm, msm->rx_buffers[i_rx]);
224 
225  b->flags = 0;
226  b->current_data = 0;
227  b->current_length =
228  n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
229 
230  n_bytes_left -= buffer_size;
231 
232  if (n_bytes_left <= 0)
233  break;
234 
235  i_rx--;
236  b->flags |= VLIB_BUFFER_NEXT_PRESENT;
237  b->next_buffer = msm->rx_buffers[i_rx];
238  }
239 
240  _vec_len (msm->rx_buffers) = i_rx;
241 
242  return 0 /* no error */ ;
243 }
244 
245 static clib_error_t *
247 {
249  mc_main_t *mcm = &msm->mc_main;
251  &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP];
253  u32 bi = 0;
254 
255  error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
256  0);
257  if (!error)
258  msg_handler (mcm, bi,
259  /* handler_frees_buffer */ 0,
260  mc_msg_master_assert_handler);
261 
262  return error;
263 }
264 
265 static clib_error_t *
267 {
269  mc_main_t *mcm = &msm->mc_main;
270  vlib_main_t *vm = msm->mc_main.vlib_main;
271  mc_multicast_socket_t *ms_to_relay =
272  &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY];
273  mc_multicast_socket_t *ms_from_relay =
274  &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
276  u32 bi = 0;
277  u32 is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
278 
279  /* Not the ordering master? Turf the msg */
280  error = recvmsg_helper (msm, ms_to_relay->socket, /* rx_addr */ 0, &bi,
281  /* drop_message */ !is_master);
282 
283  /* If we are the master, number and rebroadcast the msg. */
284  if (!error && is_master)
285  {
287  mc_msg_user_request_t *mp = vlib_buffer_get_current (b);
288  mp->global_sequence = clib_host_to_net_u32 (mcm->relay_global_sequence);
289  mcm->relay_global_sequence++;
290  error =
291  sendmsg_helper (msm, ms_from_relay->socket, &ms_from_relay->tx_addr,
292  bi);
293  vlib_buffer_free_one (vm, bi);
294  }
295 
296  return error;
297 }
298 
299 static clib_error_t *
301 {
303  mc_main_t *mcm = &msm->mc_main;
305  &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
307  u32 bi = 0;
308 
309  error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
310  0);
311  if (!error)
312  {
313  msg_handler (mcm, bi, /* handler_frees_buffer */ 1,
314  mc_msg_user_request_handler);
315  }
316  return error;
317 }
318 
319 static clib_error_t *
321 {
323  mc_main_t *mcm = &msm->mc_main;
324  vlib_main_t *vm = mcm->vlib_main;
325  mc_multicast_socket_t *ms = &msm->multicast_sockets[MC_TRANSPORT_JOIN];
327  u32 bi = 0;
328 
329  error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
330  0);
331  if (!error)
332  {
334  mc_msg_join_or_leave_request_t *mp = vlib_buffer_get_current (b);
335 
336  switch (clib_host_to_net_u32 (mp->type))
337  {
338  case MC_MSG_TYPE_join_or_leave_request:
339  msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
340  mc_msg_join_or_leave_request_handler);
341  break;
342 
343  case MC_MSG_TYPE_join_reply:
344  msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
345  mc_msg_join_reply_handler);
346  break;
347 
348  default:
349  ASSERT (0);
350  break;
351  }
352  }
353  return error;
354 }
355 
356 static clib_error_t *
358 {
360  mc_main_t *mcm = &msm->mc_main;
362  u32 bi = 0;
363 
364  error = recvmsg_helper (msm, msm->ack_socket, /* rx_addr */ 0, &bi,
365  /* drop_message */ 0);
366  if (!error)
367  msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
368  mc_msg_user_ack_handler);
369  return error;
370 }
371 
372 static void
375  clib_file_t * uf)
376 {
378  clib_file_del (um, uf);
379  vec_free (c->input_vector);
380  vec_free (c->output_vector);
381  pool_put (msm->catchups, c);
382 }
383 
384 static mc_socket_catchup_t *
386  int file_descriptor)
387 {
388  uword *p =
389  hash_get (msm->catchup_index_by_file_descriptor, file_descriptor);
390  return p ? pool_elt_at_index (msm->catchups, p[0]) : 0;
391 }
392 
393 static clib_error_t *
395 {
398  mc_main_t *mcm = &msm->mc_main;
401  word l, n, is_eof;
402 
403  l = vec_len (c->input_vector);
404  vec_resize (c->input_vector, 4096);
405  n =
406  read (uf->file_descriptor, c->input_vector + l,
407  vec_len (c->input_vector) - l);
408  is_eof = n == 0;
409 
410  if (n < 0)
411  {
412  if (errno == EAGAIN)
413  n = 0;
414  else
415  {
416  catchup_cleanup (msm, c, um, uf);
417  return clib_error_return_unix (0, "read");
418  }
419  }
420 
421  _vec_len (c->input_vector) = l + n;
422 
423  if (is_eof && vec_len (c->input_vector) > 0)
424  {
425  if (is_server)
426  {
427  mc_msg_catchup_request_handler (mcm, (void *) c->input_vector,
428  c - msm->catchups);
429  _vec_len (c->input_vector) = 0;
430  }
431  else
432  {
433  mc_msg_catchup_reply_handler (mcm, (void *) c->input_vector,
434  c - msm->catchups);
435  c->input_vector = 0; /* reply handler is responsible for freeing vector */
436  catchup_cleanup (msm, c, um, uf);
437  }
438  }
439 
440  return 0 /* no error */ ;
441 }
442 
443 static clib_error_t *
445 {
446  return catchup_socket_read_ready (uf, /* is_server */ 1);
447 }
448 
449 static clib_error_t *
451 {
452  if (MC_EVENT_LOGGING)
453  {
455  vlib_main_t *vm = msm->mc_main.vlib_main;
456 
457  ELOG_TYPE (e, "catchup_client_read_ready");
458  ELOG (&vm->elog_main, e, 0);
459  }
460  return catchup_socket_read_ready (uf, /* is_server */ 0);
461 }
462 
463 static clib_error_t *
465 {
470  clib_error_t *error = 0;
471  int n;
472 
473  if (c->connect_in_progress)
474  {
475  u32 len, value;
476 
477  c->connect_in_progress = 0;
478  len = sizeof (value);
479  if (getsockopt (c->socket, SOL_SOCKET, SO_ERROR, &value, &len) < 0)
480  {
481  error = clib_error_return_unix (0, "getsockopt SO_ERROR");
482  goto error_quit;
483  }
484  if (value != 0)
485  {
486  error =
488  "connect fails");
489  goto error_quit;
490  }
491  }
492 
493  while (1)
494  {
495  u32 n_this_write;
496 
497  n_this_write =
498  clib_min (vec_len (c->output_vector) - c->output_vector_n_written,
499  msm->rx_mtu_n_bytes -
500  64 /* ip + tcp + option allowance */ );
501 
502  if (n_this_write <= 0)
503  break;
504 
505  do
506  {
507  n = write (uf->file_descriptor,
508  c->output_vector + c->output_vector_n_written,
509  n_this_write);
510  }
511  while (n < 0 && errno == EAGAIN);
512 
513  if (n < 0)
514  {
515  error = clib_error_return_unix (0, "write");
516  goto error_quit;
517  }
518  c->output_vector_n_written += n;
519  }
520 
521  if (c->output_vector_n_written >= vec_len (c->output_vector))
522  {
523  if (!is_server)
524  {
527  /* Send EOF to other side. */
528  shutdown (uf->file_descriptor, SHUT_WR);
529  return error;
530  }
531  else
532  {
533  error_quit:
534  catchup_cleanup (msm, c, um, uf);
535  }
536  }
537  return error;
538 }
539 
540 static clib_error_t *
542 {
543  return catchup_socket_write_ready (uf, /* is_server */ 1);
544 }
545 
546 static clib_error_t *
548 {
549  return catchup_socket_write_ready (uf, /* is_server */ 0);
550 }
551 
552 static clib_error_t *
554 {
559  catchup_cleanup (msm, c, um, uf);
560  return clib_error_return (0, "error");
561 }
562 
563 static clib_error_t *
565 {
567  struct sockaddr_in client_addr;
568  int client_len;
570  clib_file_t template = { 0 };
571 
572  pool_get (msm->catchups, c);
573  clib_memset (c, 0, sizeof (c[0]));
574 
575  client_len = sizeof (client_addr);
576 
577  /* Acquires the non-blocking attrib from the server socket. */
578  c->socket = accept (uf->file_descriptor,
579  (struct sockaddr *) &client_addr,
580  (socklen_t *) & client_len);
581 
582  if (c->socket < 0)
583  {
584  pool_put (msm->catchups, c);
585  return clib_error_return_unix (0, "accept");
586  }
587 
588  if (MC_EVENT_LOGGING)
589  {
590  mc_main_t *mcm = &msm->mc_main;
591  vlib_main_t *vm = mcm->vlib_main;
592 
593  ELOG_TYPE_DECLARE (e) =
594  {
595  .format = "catchup accepted from 0x%lx",.format_args = "i4",};
596  struct
597  {
598  u32 addr;
599  } *ed = 0;
600 
601  ed = ELOG_DATA (&vm->elog_main, e);
602  ed->addr = ntohl (client_addr.sin_addr.s_addr);
603  }
604 
605  /* Disable the Nagle algorithm, ship catchup pkts immediately */
606  {
607  int one = 1;
608  if ((setsockopt (c->socket, IPPROTO_TCP,
609  TCP_NODELAY, (void *) &one, sizeof (one))) < 0)
610  {
611  clib_unix_warning ("catchup socket: set TCP_NODELAY");
612  }
613  }
614 
615  template.read_function = catchup_server_read_ready;
616  template.write_function = catchup_server_write_ready;
617  template.error_function = catchup_socket_error_ready;
618  template.file_descriptor = c->socket;
619  template.description = format (0, "multicast catchup socket");
620  template.private_data = pointer_to_uword (msm);
621  c->clib_file_index = clib_file_add (&file_main, &template);
622  hash_set (msm->catchup_index_by_file_descriptor, c->socket,
623  c - msm->catchups);
624 
625  return 0;
626 }
627 
628 /* Return and bind to an unused port. */
629 static word
631 {
632  for (; port < 1 << 16; port++)
633  {
634  struct sockaddr_in a;
635 
636  clib_memset (&a, 0, sizeof (a)); /* Warnings be gone */
637 
638  a.sin_family = PF_INET;
639  a.sin_addr.s_addr = INADDR_ANY;
640  a.sin_port = htons (port);
641 
642  if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
643  break;
644  }
645 
646  return port < 1 << 16 ? port : -1;
647 }
648 
649 static clib_error_t *
652  char *type, uword udp_port)
653 {
654  int one = 1;
655  struct ip_mreq mcast_req;
656 
657  if (!msm->multicast_ttl)
658  msm->multicast_ttl = 1;
659 
660  /* mastership (multicast) TX socket */
661  if ((ms->socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
662  return clib_error_return_unix (0, "%s socket", type);
663 
664  {
665  u8 ttl = msm->multicast_ttl;
666 
667  if ((setsockopt (ms->socket, IPPROTO_IP,
668  IP_MULTICAST_TTL, (void *) &ttl, sizeof (ttl))) < 0)
669  return clib_error_return_unix (0, "%s set multicast ttl", type);
670  }
671 
672  if (setsockopt (ms->socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one)) <
673  0)
674  return clib_error_return_unix (0, "%s setsockopt SO_REUSEADDR", type);
675 
676  clib_memset (&ms->tx_addr, 0, sizeof (ms->tx_addr));
677  ms->tx_addr.sin_family = AF_INET;
678  ms->tx_addr.sin_addr.s_addr =
680  ms->tx_addr.sin_port = htons (udp_port);
681 
682  if (bind (ms->socket, (struct sockaddr *) &ms->tx_addr,
683  sizeof (ms->tx_addr)) < 0)
684  return clib_error_return_unix (0, "%s bind", type);
685 
686  clib_memset (&mcast_req, 0, sizeof (mcast_req));
687  mcast_req.imr_multiaddr.s_addr =
689  mcast_req.imr_interface.s_addr = msm->if_ip4_address_net_byte_order;
690 
691  if ((setsockopt (ms->socket, IPPROTO_IP,
692  IP_ADD_MEMBERSHIP, (void *) &mcast_req,
693  sizeof (mcast_req))) < 0)
694  return clib_error_return_unix (0, "%s IP_ADD_MEMBERSHIP setsockopt",
695  type);
696 
697  if (ioctl (ms->socket, FIONBIO, &one) < 0)
698  return clib_error_return_unix (0, "%s set FIONBIO", type);
699 
700  /* FIXME remove this when we support tx_ready. */
701  {
702  u32 len = 1 << 20;
703  socklen_t sl = sizeof (len);
704  if (setsockopt (ms->socket, SOL_SOCKET, SO_SNDBUF, &len, sl) < 0)
705  clib_unix_error ("setsockopt");
706  }
707 
708  return 0;
709 }
710 
711 static clib_error_t *
713 {
714  int one = 1;
716  u32 port;
717 
720  0xffff - ((MC_N_TRANSPORT_TYPE + 2 /* ack socket, catchup socket */ )
721  - 1);
722 
724 
726  &msm->multicast_sockets
727  [MC_TRANSPORT_MASTERSHIP], "mastership",
728  port++);
729  if (error)
730  return error;
731 
733  &msm->multicast_sockets[MC_TRANSPORT_JOIN],
734  "join", port++);
735  if (error)
736  return error;
737 
739  &msm->multicast_sockets
740  [MC_TRANSPORT_USER_REQUEST_TO_RELAY],
741  "to relay", port++);
742  if (error)
743  return error;
744 
746  &msm->multicast_sockets
747  [MC_TRANSPORT_USER_REQUEST_FROM_RELAY],
748  "from relay", port++);
749  if (error)
750  return error;
751 
752  /* ACK rx socket */
753  msm->ack_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP);
754  if (msm->ack_socket < 0)
755  return clib_error_return_unix (0, "ack socket");
756 
758 
759  if (ioctl (msm->ack_socket, FIONBIO, &one) < 0)
760  return clib_error_return_unix (0, "ack socket FIONBIO");
761 
762  msm->catchup_server_socket = socket (AF_INET, SOCK_STREAM, 0);
763  if (msm->catchup_server_socket < 0)
764  return clib_error_return_unix (0, "catchup server socket");
765 
766  msm->catchup_tcp_port =
768 
769  if (ioctl (msm->catchup_server_socket, FIONBIO, &one) < 0)
770  return clib_error_return_unix (0, "catchup server socket FIONBIO");
771 
772  if (listen (msm->catchup_server_socket, 5) < 0)
773  return clib_error_return_unix (0, "catchup server socket listen");
774 
775  /* epoll setup for multicast mastership socket */
776  {
777  clib_file_t template = { 0 };
778 
780  template.file_descriptor =
781  msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket;
782  template.private_data = (uword) msm;
783  clib_file_add (&file_main, &template);
784 
785  /* epoll setup for multicast to_relay socket */
786  template.read_function = to_relay_socket_read_ready;
787  template.file_descriptor =
788  msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket;
789  template.private_data = (uword) msm;
790  template.description = format (0, "multicast to_relay socket");
791  clib_file_add (&file_main, &template);
792 
793  /* epoll setup for multicast from_relay socket */
794  template.read_function = from_relay_socket_read_ready;
795  template.file_descriptor =
796  msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket;
797  template.private_data = (uword) msm;
798  template.description = format (0, "multicast from_relay socket");
799  clib_file_add (&file_main, &template);
800 
801  template.read_function = join_socket_read_ready;
802  template.file_descriptor =
803  msm->multicast_sockets[MC_TRANSPORT_JOIN].socket;
804  template.private_data = (uword) msm;
805  template.description = format (0, "multicast join socket");
806  clib_file_add (&file_main, &template);
807 
808  /* epoll setup for ack rx socket */
809  template.read_function = ack_socket_read_ready;
810  template.file_descriptor = msm->ack_socket;
811  template.private_data = (uword) msm;
812  template.description = format (0, "multicast ack rx socket");
813  clib_file_add (&file_main, &template);
814 
815  /* epoll setup for TCP catchup server */
816  template.read_function = catchup_listen_read_ready;
817  template.file_descriptor = msm->catchup_server_socket;
818  template.private_data = (uword) msm;
819  template.description = format (0, "multicast tcp catchup socket");
820  clib_file_add (&file_main, &template);
821  }
822 
823  return 0;
824 }
825 
826 static void *
828  u8 * set_output_vector)
829 {
831  c->clib_file_index);
832  u8 *result = 0;
833 
834  if (set_output_vector)
835  c->output_vector = set_output_vector;
836  else
837  vec_add2 (c->output_vector, result, n_bytes);
838  if (vec_len (c->output_vector) > 0)
839  {
840  int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
842  if (!skip_update)
844  }
845  return result;
846 }
847 
848 static uword
849 catchup_request_fun (void *transport_main,
850  u32 stream_index, mc_peer_id_t catchup_peer_id)
851 {
852  mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
853  mc_main_t *mcm = &msm->mc_main;
854  vlib_main_t *vm = mcm->vlib_main;
856  struct sockaddr_in addr;
858  int one = 1;
859 
860  pool_get (msm->catchups, c);
861  clib_memset (c, 0, sizeof (*c));
862 
863  c->socket = socket (AF_INET, SOCK_STREAM, 0);
864  if (c->socket < 0)
865  {
866  clib_unix_warning ("socket");
867  return 0;
868  }
869 
870  if (ioctl (c->socket, FIONBIO, &one) < 0)
871  {
872  clib_unix_warning ("FIONBIO");
873  return 0;
874  }
875 
876  clib_memset (&addr, 0, sizeof (addr));
877  addr.sin_family = AF_INET;
878  addr.sin_addr.s_addr = mc_socket_peer_id_get_address (catchup_peer_id);
879  addr.sin_port = mc_socket_peer_id_get_port (catchup_peer_id);
880 
881  c->connect_in_progress = 1;
882 
883  if (MC_EVENT_LOGGING)
884  {
885  ELOG_TYPE_DECLARE (e) =
886  {
887  .format = "connecting to peer 0x%Lx",.format_args = "i8",};
888  struct
889  {
890  u64 peer;
891  } *ed;
892  ed = ELOG_DATA (&vm->elog_main, e);
893  ed->peer = catchup_peer_id.as_u64;
894  }
895 
896  if (connect (c->socket, (const void *) &addr, sizeof (addr))
897  < 0 && errno != EINPROGRESS)
898  {
899  clib_unix_warning ("connect to %U fails",
900  format_socket_peer_id, catchup_peer_id);
901  return 0;
902  }
903 
904  {
905  clib_file_t template = { 0 };
906 
908  template.write_function = catchup_client_write_ready;
909  template.error_function = catchup_socket_error_ready;
910  template.file_descriptor = c->socket;
911  template.private_data = (uword) msm;
912  template.description = format (0, "multicast socket");
913  c->clib_file_index = clib_file_add (um, &template);
914 
916  c - msm->catchups);
917  }
918 
919  {
920  mc_msg_catchup_request_t *mp;
921  mp = catchup_add_pending_output (c, sizeof (mp[0]), /* set_output_vector */
922  0);
923  mp->peer_id = msm->mc_main.transport.our_catchup_peer_id;
924  mp->stream_index = stream_index;
925  mc_byte_swap_msg_catchup_request (mp);
926  }
927 
928  return c - msm->catchups;
929 }
930 
931 static void
932 catchup_send_fun (void *transport_main, uword opaque, u8 * data)
933 {
934  mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
937 }
938 
939 static int
941 {
942  int fd;
943  struct ifreq ifr;
944  struct sockaddr_in *sa;
945 
946  /* Dig up our IP address */
947  fd = socket (PF_INET, AF_INET, 0);
948  if (fd < 0)
949  {
950  clib_unix_error ("socket");
951  return -1;
952  }
953 
954  ifr.ifr_addr.sa_family = AF_INET;
955  strncpy (ifr.ifr_name, if_name, sizeof (ifr.ifr_name) - 1);
956  if (ioctl (fd, SIOCGIFADDR, &ifr) < 0)
957  {
958  clib_unix_error ("ioctl(SIOCFIGADDR)");
959  close (fd);
960  return -1;
961  }
962 
963  sa = (void *) &ifr.ifr_addr;
964  clib_memcpy (ip4_address, &sa->sin_addr.s_addr, sizeof (ip4_address[0]));
965 
966  if (ioctl (fd, SIOCGIFMTU, &ifr) < 0)
967  {
968  close (fd);
969  return -1;
970  }
971  if (mtu)
972  *mtu = ifr.ifr_mtu - ( /* IP4 header */ 20 + /* UDP header */ 8);
973 
974  close (fd);
975 
976  return 0;
977 }
978 
979 clib_error_t *
980 mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
981  int n_intfcs_to_probe)
982 {
984  mc_main_t *mcm;
985  u32 mtu;
986 
987  mcm = &msm->mc_main;
988 
989  /* 239.255.0.7 */
992 
993  {
994  u32 i, a, win;
995 
996  win = 0;
997  if (msm->multicast_interface_name)
998  {
999  win =
1001  &mtu);
1002  }
1003  else
1004  {
1005  for (i = 0; i < n_intfcs_to_probe; i++)
1006  if (!find_interface_ip4_address (intfc_probe_list[i], &a, &mtu))
1007  {
1008  win = 1;
1009  msm->multicast_interface_name = intfc_probe_list[i];
1010  break;
1011  }
1012  }
1013 
1014  if (!win)
1015  return clib_error_return (0, "can't find interface ip4 address");
1016 
1018  }
1019 
1020  msm->rx_mtu_n_bytes = mtu;
1021  msm->rx_mtu_n_buffers =
1023  msm->rx_mtu_n_buffers +=
1025 
1026  error = socket_setup (msm);
1027  if (error)
1028  return error;
1029 
1030  mcm->transport.our_ack_peer_id =
1032  msm->ack_udp_port);
1033 
1034  mcm->transport.our_catchup_peer_id =
1036  msm->catchup_tcp_port);
1037 
1038  mcm->transport.tx_buffer = tx_buffer;
1039  mcm->transport.tx_ack = tx_ack;
1040  mcm->transport.catchup_request_fun = catchup_request_fun;
1041  mcm->transport.catchup_send_fun = catchup_send_fun;
1042  mcm->transport.format_peer_id = format_socket_peer_id;
1043  mcm->transport.opaque = msm;
1044  mcm->transport.max_packet_size = mtu;
1045 
1046  mc_main_init (mcm, "socket");
1047 
1048  return error;
1049 }
1050 
1051 /*
1052  * fd.io coding-style-patch-verification: ON
1053  *
1054  * Local Variables:
1055  * eval: (c-set-style "gnu")
1056  * End:
1057  */
find_and_bind_to_free_port
static word find_and_bind_to_free_port(word sock, word port)
Definition: mc_socket.c:630
vlib.h
clib_file::file_descriptor
u32 file_descriptor
Definition: file.h:54
vlib_buffer_t::next_buffer
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:149
to_relay_socket_read_ready
static clib_error_t * to_relay_socket_read_ready(clib_file_t *uf)
Definition: mc_socket.c:266
file_main
clib_file_main_t file_main
Definition: main.c:63
mc_socket_main_t::multicast_interface_name
char * multicast_interface_name
Definition: mc_socket.h:79
ntohs
#define ntohs(x)
Definition: af_xdp.bpf.c:29
ack_socket_read_ready
static clib_error_t * ack_socket_read_ready(clib_file_t *uf)
Definition: mc_socket.c:357
catchup_socket_write_ready
static clib_error_t * catchup_socket_write_ready(clib_file_t *uf, int is_server)
Definition: mc_socket.c:464
clib_memcpy
#define clib_memcpy(d, s, n)
Definition: string.h:197
vlib_get_buffer
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:111
tx_ack
static clib_error_t * tx_ack(void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index)
Definition: mc_socket.c:142
pointer_to_uword
static uword pointer_to_uword(const void *p)
Definition: types.h:131
catchup_send_fun
static void catchup_send_fun(void *transport_main, uword opaque, u8 *data)
Definition: mc_socket.c:932
pool_elt_at_index
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:553
mc_msg_handler_t
void() mc_msg_handler_t(mc_main_t *mcm, void *msg, u32 buffer_index)
Definition: mc_socket.c:39
mc_socket_main_t
Definition: mc_socket.h:43
ttl
u8 ttl
Definition: fib_types.api:26
catchup_socket_error_ready
static clib_error_t * catchup_socket_error_ready(clib_file_t *uf)
Definition: mc_socket.c:553
mc_socket_main_t::catchup_tcp_port
u32 catchup_tcp_port
Definition: mc_socket.h:76
tx_buffer
static clib_error_t * tx_buffer(void *transport, mc_transport_type_t type, u32 buffer_index)
Definition: mc_socket.c:129
clib_error_return
#define clib_error_return(e, args...)
Definition: error.h:99
clib_file::read_function
clib_file_function_t * read_function
Definition: file.h:67
clib_file::private_data
u64 private_data
Definition: file.h:64
pool_put
#define pool_put(P, E)
Free an object E in pool P.
Definition: pool.h:305
vm
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
Definition: nat44_ei.c:3047
mc_socket_main_t::multicast_ttl
u32 multicast_ttl
Definition: mc_socket.h:86
ELOG_TYPE
#define ELOG_TYPE(f, fmt)
Definition: elog.h:462
port
u16 port
Definition: lb_types.api:73
format_network_address
u8 * format_network_address(u8 *s, va_list *args)
Definition: unix-formats.c:212
find_interface_ip4_address
static int find_interface_ip4_address(char *if_name, u32 *ip4_address, u32 *mtu)
Definition: mc_socket.c:940
clib_file_main_t
Definition: file.h:85
mc_socket_main_t::rx_mtu_n_bytes
u32 rx_mtu_n_bytes
Definition: mc_socket.h:61
catchup_client_read_ready
static clib_error_t * catchup_client_read_ready(clib_file_t *uf)
Definition: mc_socket.c:450
addr
vhost_vring_addr_t addr
Definition: vhost_user.h:130
catchup_server_read_ready
static clib_error_t * catchup_server_read_ready(clib_file_t *uf)
Definition: mc_socket.c:444
catchup_server_write_ready
static clib_error_t * catchup_server_write_ready(clib_file_t *uf)
Definition: mc_socket.c:541
h
h
Definition: flowhash_template.h:372
clib_unix_warning
#define clib_unix_warning(format, args...)
Definition: error.h:68
error
Definition: cJSON.c:88
join_socket_read_ready
static clib_error_t * join_socket_read_ready(clib_file_t *uf)
Definition: mc_socket.c:320
catchup_client_write_ready
static clib_error_t * catchup_client_write_ready(clib_file_t *uf)
Definition: mc_socket.c:547
mc_multicast_socket_t::tx_addr
struct sockaddr_in tx_addr
Definition: mc_socket.h:27
catchup_request_fun
static uword catchup_request_fun(void *transport_main, u32 stream_index, mc_peer_id_t catchup_peer_id)
Definition: mc_socket.c:849
hash_set
#define hash_set(h, key, value)
Definition: hash.h:255
mc_socket_main_t::catchup_server_socket
int catchup_server_socket
Definition: mc_socket.h:54
vlib_buffer_t::current_data
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:119
mc_multicast_socket_t::socket
int socket
Definition: mc_socket.h:26
mc_socket_main_t::mc_main
mc_main_t mc_main
Definition: mc_socket.h:45
clib_error_return_code
#define clib_error_return_code(e, code, flags, args...)
Definition: error.h:93
mc_socket_main_t::iovecs
struct iovec * iovecs
Definition: mc_socket.h:70
vec_len
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
Definition: vec_bootstrap.h:142
setup_mutlicast_socket
static clib_error_t * setup_mutlicast_socket(mc_socket_main_t *msm, mc_multicast_socket_t *ms, char *type, uword udp_port)
Definition: mc_socket.c:650
ELOG_TYPE_DECLARE
#define ELOG_TYPE_DECLARE(f)
Definition: elog.h:442
UNIX_FILE_DATA_AVAILABLE_TO_WRITE
#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE
Definition: file.h:57
len
u8 len
Definition: ip_types.api:103
vec_add2
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:644
mc_socket_main_t::catchup_index_by_file_descriptor
uword * catchup_index_by_file_descriptor
Definition: mc_socket.h:59
vlib_buffer_alloc
static __clib_warn_unused_result u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
Definition: buffer_funcs.h:708
clib_file
Definition: file.h:51
find_catchup_from_file_descriptor
static mc_socket_catchup_t * find_catchup_from_file_descriptor(mc_socket_main_t *msm, int file_descriptor)
Definition: mc_socket.c:385
clib_unix_error
#define clib_unix_error(format, args...)
Definition: error.h:65
c
svmdb_client_t * c
Definition: vpp_get_metrics.c:48
mc_socket_set_peer_id
static_always_inline mc_peer_id_t mc_socket_set_peer_id(u32 address_net_byte_order, u32 port_host_byte_order)
Definition: mc_socket.h:109
peer
vl_api_address_t peer
Definition: teib.api:28
mc_socket_catchup_t
Definition: mc_socket.h:31
uword
u64 uword
Definition: types.h:112
hash_get
#define hash_get(h, key)
Definition: hash.h:249
catchup_listen_read_ready
static clib_error_t * catchup_listen_read_ready(clib_file_t *uf)
Definition: mc_socket.c:564
CLIB_ERROR_ERRNO_VALID
@ CLIB_ERROR_ERRNO_VALID
Definition: error_bootstrap.h:51
i
sll srl srl sll sra u16x4 i
Definition: vector_sse42.h:261
append_buffer_index_to_iovec
static uword append_buffer_index_to_iovec(vlib_main_t *vm, u32 buffer_index, struct iovec **iovs_return)
Definition: mc_socket.c:57
ELOG
#define ELOG(em, f, data)
Definition: elog.h:474
mc_multicast_socket_t
Definition: mc_socket.h:24
from_relay_socket_read_ready
static clib_error_t * from_relay_socket_read_ready(clib_file_t *uf)
Definition: mc_socket.c:300
pool_get
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
Definition: pool.h:255
vec_validate
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment)
Definition: vec.h:523
mc_socket_main_t::rx_mtu_n_buffers
u32 rx_mtu_n_buffers
Definition: mc_socket.h:64
mc_socket_main_t::base_multicast_udp_port_host_byte_order
u32 base_multicast_udp_port_host_byte_order
Definition: mc_socket.h:91
clib_file_main_t::file_pool
clib_file_t * file_pool
Definition: file.h:88
clib_file::flags
u32 flags
Definition: file.h:56
mc_socket_main_t::multicast_sockets
mc_multicast_socket_t multicast_sockets[MC_N_TRANSPORT_TYPE]
Definition: mc_socket.h:48
clib_min
#define clib_min(x, y)
Definition: clib.h:342
mc_socket_main_init
clib_error_t * mc_socket_main_init(mc_socket_main_t *msm, char **intfc_probe_list, int n_intfcs_to_probe)
Definition: mc_socket.c:980
mc_socket_main_t::multicast_tx_ip4_address_host_byte_order
u32 multicast_tx_ip4_address_host_byte_order
Definition: mc_socket.h:83
vlib_buffer_t::current_length
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:122
msg_handler
static void msg_handler(mc_main_t *mcm, u32 buffer_index, u32 handler_frees_buffer, void *_h)
Definition: mc_socket.c:43
data
u8 data[128]
Definition: ipsec_types.api:92
vec_free
#define vec_free(V)
Free vector's memory (no header).
Definition: vec.h:395
mc_socket_peer_id_get_address
static u32 mc_socket_peer_id_get_address(mc_peer_id_t i)
Definition: mc_socket.h:95
always_inline
#define always_inline
Definition: rdma_mlx5dv.h:23
mc_socket_main_t::catchups
mc_socket_catchup_t * catchups
Definition: mc_socket.h:57
mc_socket_peer_id_get_port
static u32 mc_socket_peer_id_get_port(mc_peer_id_t i)
Definition: mc_socket.h:103
u64
unsigned long u64
Definition: types.h:89
format
description fragment has unexpected format
Definition: map.api:433
ASSERT
#define ASSERT(truth)
Definition: error_bootstrap.h:69
clib_file_del
static void clib_file_del(clib_file_main_t *um, clib_file_t *f)
Definition: file.h:109
vlib_buffer_get_default_data_size
static_always_inline u32 vlib_buffer_get_default_data_size(vlib_main_t *vm)
Definition: buffer_funcs.h:122
u32
unsigned int u32
Definition: types.h:88
catchup_add_pending_output
static void * catchup_add_pending_output(mc_socket_catchup_t *c, uword n_bytes, u8 *set_output_vector)
Definition: mc_socket.c:827
socket_setup
static clib_error_t * socket_setup(mc_socket_main_t *msm)
Definition: mc_socket.c:712
n_bytes
u32 n_bytes
Definition: interface_output.c:401
ip4_address
manual_print typedef u8 ip4_address[4]
Definition: ip_types.api:18
n_left
u32 n_left
Definition: interface_output.c:1078
recvmsg_helper
static clib_error_t * recvmsg_helper(mc_socket_main_t *msm, int socket, struct sockaddr_in *rx_addr, u32 *buffer_index, u32 drop_message)
Definition: mc_socket.c:160
UNIX_FILE_UPDATE_MODIFY
@ UNIX_FILE_UPDATE_MODIFY
Definition: file.h:81
clib_error_return_unix
#define clib_error_return_unix(e, args...)
Definition: error.h:102
clib_file_add
static uword clib_file_add(clib_file_main_t *um, clib_file_t *template)
Definition: file.h:96
mc_socket_main_t::ack_socket
int ack_socket
Definition: mc_socket.h:51
vec_resize
#define vec_resize(V, N)
Resize a vector (no header, unspecified alignment) Add N elements to end of given vector V,...
Definition: vec.h:296
value
u8 value
Definition: qos.api:54
catchup_cleanup
static void catchup_cleanup(mc_socket_main_t *msm, mc_socket_catchup_t *c, clib_file_main_t *um, clib_file_t *uf)
Definition: mc_socket.c:373
hash_unset
#define hash_unset(h, key)
Definition: hash.h:261
mastership_socket_read_ready
static clib_error_t * mastership_socket_read_ready(clib_file_t *uf)
Definition: mc_socket.c:246
clib_memset
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
vlib_main_t
Definition: main.h:102
b
vlib_buffer_t ** b
Definition: nat44_ei_out2in.c:717
u8
unsigned char u8
Definition: types.h:56
clib_error_t
Definition: clib_error.h:21
a
a
Definition: bitmap.h:544
vlib_buffer_get_current
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:257
vlib_buffer_t::data
u8 data[]
Packet data.
Definition: buffer.h:204
clib_error
#define clib_error(format, args...)
Definition: error.h:62
word
i64 word
Definition: types.h:111
vlib_buffer_free_one
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
Definition: buffer_funcs.h:1015
mc_socket_main_t::rx_buffers
u32 * rx_buffers
Definition: mc_socket.h:67
ELOG_DATA
#define ELOG_DATA(em, f)
Definition: elog.h:484
catchup_socket_read_ready
static clib_error_t * catchup_socket_read_ready(clib_file_t *uf, int is_server)
Definition: mc_socket.c:394
mc_socket.h
mc_socket_main_t::ack_udp_port
u32 ack_udp_port
Definition: mc_socket.h:75
format_socket_peer_id
static u8 * format_socket_peer_id(u8 *s, va_list *args)
Definition: mc_socket.c:26
sendmsg_helper
static clib_error_t * sendmsg_helper(mc_socket_main_t *msm, int socket, struct sockaddr_in *tx_addr, u32 buffer_index)
Definition: mc_socket.c:81
mc_socket_main_t::if_ip4_address_net_byte_order
u32 if_ip4_address_net_byte_order
Definition: mc_socket.h:73
type
vl_api_fib_path_type_t type
Definition: fib_types.api:123
vlib_buffer_t::flags
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index,...
Definition: buffer.h:133
vlib_buffer_t
VLIB buffer representation.
Definition: buffer.h:111
clib_file_main_t::file_update
void(* file_update)(clib_file_t *file, clib_file_update_type_t update_type)
Definition: file.h:90