2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2013 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
18 * Intel MIC User Space Tools.
32 #include <sys/types.h>
35 #include <sys/socket.h>
36 #include <linux/virtio_ring.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_console.h>
39 #include <linux/virtio_blk.h>
40 #include <linux/version.h>
42 #include <linux/mic_ioctl.h>
43 #include <linux/mic_common.h>
44 #include <tools/endian.h>
46 static void *init_mic(void *arg);
49 static struct mic_info mic_list;
51 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
53 #define min_t(type, x, y) ({ \
56 __min1 < __min2 ? __min1 : __min2; })
58 /* align addr on a size boundary - adjust address up/down if needed */
59 #define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
60 #define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
62 /* align addr on a size boundary - adjust address up if needed */
63 #define _ALIGN(addr, size) _ALIGN_UP(addr, size)
65 /* to align the pointer to the (next) page boundary */
66 #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
68 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
71 #define MAX_GSO_SIZE (64 * 1024)
73 #define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74 #define MIC_DEVICE_PAGE_END 0x1000
76 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
77 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
81 struct mic_device_desc dd;
82 struct mic_vqconfig vqconfig[2];
83 __u32 host_features, guest_acknowledgements;
84 struct virtio_console_config cons_config;
85 } virtcons_dev_page = {
87 .type = VIRTIO_ID_CONSOLE,
88 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89 .feature_len = sizeof(virtcons_dev_page.host_features),
90 .config_len = sizeof(virtcons_dev_page.cons_config),
93 .num = htole16(MIC_VRING_ENTRIES),
96 .num = htole16(MIC_VRING_ENTRIES),
101 struct mic_device_desc dd;
102 struct mic_vqconfig vqconfig[2];
103 __u32 host_features, guest_acknowledgements;
104 struct virtio_net_config net_config;
105 } virtnet_dev_page = {
107 .type = VIRTIO_ID_NET,
108 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109 .feature_len = sizeof(virtnet_dev_page.host_features),
110 .config_len = sizeof(virtnet_dev_page.net_config),
113 .num = htole16(MIC_VRING_ENTRIES),
116 .num = htole16(MIC_VRING_ENTRIES),
119 .host_features = htole32(
120 1 << VIRTIO_NET_F_CSUM |
121 1 << VIRTIO_NET_F_GSO |
122 1 << VIRTIO_NET_F_GUEST_TSO4 |
123 1 << VIRTIO_NET_F_GUEST_TSO6 |
124 1 << VIRTIO_NET_F_GUEST_ECN),
130 static const char *mic_config_dir = "/etc/mpss";
131 static const char *virtblk_backend = "VIRTBLK_BACKEND";
133 struct mic_device_desc dd;
134 struct mic_vqconfig vqconfig[1];
135 __u32 host_features, guest_acknowledgements;
136 struct virtio_blk_config blk_config;
137 } virtblk_dev_page = {
139 .type = VIRTIO_ID_BLOCK,
140 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
141 .feature_len = sizeof(virtblk_dev_page.host_features),
142 .config_len = sizeof(virtblk_dev_page.blk_config),
145 .num = htole16(MIC_VRING_ENTRIES),
148 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
150 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
151 .capacity = htole64(0),
158 tap_configure(struct mic_info *mic, char *dev)
162 char ipaddr[IFNAMSIZ];
173 mpsslog("Configuring %s\n", dev);
174 ret = execvp("ip", ifargv);
176 mpsslog("%s execvp failed errno %s\n",
177 mic->name, strerror(errno));
182 mpsslog("%s fork failed errno %s\n",
183 mic->name, strerror(errno));
187 ret = waitpid(pid, NULL, 0);
189 mpsslog("%s waitpid failed errno %s\n",
190 mic->name, strerror(errno));
194 snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
205 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
206 ret = execvp("ip", ifargv);
208 mpsslog("%s execvp failed errno %s\n",
209 mic->name, strerror(errno));
214 mpsslog("%s fork failed errno %s\n",
215 mic->name, strerror(errno));
219 ret = waitpid(pid, NULL, 0);
221 mpsslog("%s waitpid failed errno %s\n",
222 mic->name, strerror(errno));
225 mpsslog("MIC name %s %s %d DONE!\n",
226 mic->name, __func__, __LINE__);
230 static int tun_alloc(struct mic_info *mic, char *dev)
237 fd = open("/dev/net/tun", O_RDWR);
239 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
243 memset(&ifr, 0, sizeof(ifr));
245 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
247 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
249 err = ioctl(fd, TUNSETIFF, (void *)&ifr);
251 mpsslog("%s %s %d TUNSETIFF failed %s\n",
252 mic->name, __func__, __LINE__, strerror(errno));
257 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
259 err = ioctl(fd, TUNSETOFFLOAD, offload);
261 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
262 mic->name, __func__, __LINE__, strerror(errno));
267 strcpy(dev, ifr.ifr_name);
268 mpsslog("Created TAP %s\n", dev);
273 #define NET_FD_VIRTIO_NET 0
277 static void set_dp(struct mic_info *mic, int type, void *dp)
280 case VIRTIO_ID_CONSOLE:
281 mic->mic_console.console_dp = dp;
284 mic->mic_net.net_dp = dp;
286 case VIRTIO_ID_BLOCK:
287 mic->mic_virtblk.block_dp = dp;
290 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
294 static void *get_dp(struct mic_info *mic, int type)
297 case VIRTIO_ID_CONSOLE:
298 return mic->mic_console.console_dp;
300 return mic->mic_net.net_dp;
301 case VIRTIO_ID_BLOCK:
302 return mic->mic_virtblk.block_dp;
304 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
309 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
311 struct mic_device_desc *d;
313 void *dp = get_dp(mic, type);
315 for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
316 i += mic_total_desc_size(d)) {
326 mpsslog("%s %s d-> type %d d %p\n",
327 mic->name, __func__, d->type, d);
329 if (d->type == (__u8)type)
332 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
336 /* See comments in vhost.c for explanation of next_desc() */
337 static unsigned next_desc(struct vring_desc *desc)
341 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
343 next = le16toh(desc->next);
347 /* Sum up all the IOVEC length */
349 sum_iovec_len(struct mic_copy_desc *copy)
354 for (i = 0; i < copy->iovcnt; i++)
355 sum += copy->iov[i].iov_len;
359 static inline void verify_out_len(struct mic_info *mic,
360 struct mic_copy_desc *copy)
362 if (copy->out_len != sum_iovec_len(copy)) {
363 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
364 mic->name, __func__, __LINE__,
365 copy->out_len, sum_iovec_len(copy));
366 assert(copy->out_len == sum_iovec_len(copy));
370 /* Display an iovec */
372 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
373 const char *s, int line)
377 for (i = 0; i < copy->iovcnt; i++)
378 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
379 mic->name, s, line, i,
380 copy->iov[i].iov_base, copy->iov[i].iov_len);
383 static inline __u16 read_avail_idx(struct mic_vring *vr)
385 return ACCESS_ONCE(vr->info->avail_idx);
388 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
389 struct mic_copy_desc *copy, ssize_t len)
391 copy->vr_idx = tx ? 0 : 1;
392 copy->update_used = true;
393 if (type == VIRTIO_ID_NET)
394 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
396 copy->iov[0].iov_len = len;
399 /* Central API which triggers the copies */
401 mic_virtio_copy(struct mic_info *mic, int fd,
402 struct mic_vring *vr, struct mic_copy_desc *copy)
406 ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
408 mpsslog("%s %s %d errno %s ret %d\n",
409 mic->name, __func__, __LINE__,
410 strerror(errno), ret);
415 static inline unsigned _vring_size(unsigned int num, unsigned long align)
417 return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
418 + align - 1) & ~(align - 1))
419 + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
423 * This initialization routine requires at least one
424 * vring i.e. vr0. vr1 is optional.
427 init_vr(struct mic_info *mic, int fd, int type,
428 struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
433 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
434 MIC_VIRTIO_RING_ALIGN) +
435 sizeof(struct _mic_vring_info));
436 va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
437 PROT_READ, MAP_SHARED, fd, 0);
438 if (MAP_FAILED == va) {
439 mpsslog("%s %s %d mmap failed errno %s\n",
440 mic->name, __func__, __LINE__,
444 set_dp(mic, type, va);
445 vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
446 vr0->info = vr0->va +
447 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
449 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
450 mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
451 __func__, mic->name, vr0->va, vr0->info, vr_size,
452 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
453 mpsslog("magic 0x%x expected 0x%x\n",
454 le32toh(vr0->info->magic), MIC_MAGIC + type);
455 assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
457 vr1->va = (struct mic_vring *)
458 &va[MIC_DEVICE_PAGE_END + vr_size];
459 vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
460 MIC_VIRTIO_RING_ALIGN);
462 MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
463 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
464 __func__, mic->name, vr1->va, vr1->info, vr_size,
465 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
466 mpsslog("magic 0x%x expected 0x%x\n",
467 le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
468 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
475 wait_for_card_driver(struct mic_info *mic, int fd, int type)
477 struct pollfd pollfd;
479 struct mic_device_desc *desc = get_device_desc(mic, type);
484 prev_status = desc->status;
486 mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
487 mic->name, __func__, type, desc->status);
490 pollfd.events = POLLIN;
492 err = poll(&pollfd, 1, -1);
494 mpsslog("%s %s poll failed %s\n",
495 mic->name, __func__, strerror(errno));
499 if (pollfd.revents) {
500 if (desc->status != prev_status) {
501 mpsslog("%s %s Waiting... desc-> type %d "
503 mic->name, __func__, type,
505 prev_status = desc->status;
507 if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
508 mpsslog("%s %s poll.revents %d\n",
509 mic->name, __func__, pollfd.revents);
510 mpsslog("%s %s desc-> type %d status 0x%x\n",
511 mic->name, __func__, type,
520 /* Spin till we have some descriptors */
522 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
524 __u16 avail_idx = read_avail_idx(vr);
526 while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
528 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
530 le16toh(vr->vr.avail->idx), vr->info->avail_idx);
537 virtio_net(void *arg)
539 static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
540 static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
541 struct iovec vnet_iov[2][2] = {
542 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
543 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
544 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
545 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
547 struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
548 struct mic_info *mic = (struct mic_info *)arg;
549 char if_name[IFNAMSIZ];
550 struct pollfd net_poll[MAX_NET_FD];
551 struct mic_vring tx_vr, rx_vr;
552 struct mic_copy_desc copy;
553 struct mic_device_desc *desc;
556 snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
557 mic->mic_net.tap_fd = tun_alloc(mic, if_name);
558 if (mic->mic_net.tap_fd < 0)
561 if (tap_configure(mic, if_name))
563 mpsslog("MIC name %s id %d\n", mic->name, mic->id);
565 net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
566 net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
567 net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
568 net_poll[NET_FD_TUN].events = POLLIN;
570 if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
571 VIRTIO_ID_NET, &tx_vr, &rx_vr,
572 virtnet_dev_page.dd.num_vq)) {
573 mpsslog("%s init_vr failed %s\n",
574 mic->name, strerror(errno));
579 desc = get_device_desc(mic, VIRTIO_ID_NET);
584 net_poll[NET_FD_VIRTIO_NET].revents = 0;
585 net_poll[NET_FD_TUN].revents = 0;
587 /* Start polling for data from tap and virtio net */
588 err = poll(net_poll, 2, -1);
590 mpsslog("%s poll failed %s\n",
591 __func__, strerror(errno));
594 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
595 err = wait_for_card_driver(mic,
596 mic->mic_net.virtio_net_fd,
599 mpsslog("%s %s %d Exiting...\n",
600 mic->name, __func__, __LINE__);
605 * Check if there is data to be read from TUN and write to
606 * virtio net fd if there is.
608 if (net_poll[NET_FD_TUN].revents & POLLIN) {
610 len = readv(net_poll[NET_FD_TUN].fd,
611 copy.iov, copy.iovcnt);
613 struct virtio_net_hdr *hdr
614 = (struct virtio_net_hdr *)vnet_hdr[0];
616 /* Disable checksums on the card since we are on
617 a reliable PCIe link */
618 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
620 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
621 __func__, __LINE__, hdr->flags);
622 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
623 copy.out_len, hdr->gso_type);
626 disp_iovec(mic, copy, __func__, __LINE__);
627 mpsslog("%s %s %d read from tap 0x%lx\n",
628 mic->name, __func__, __LINE__,
631 spin_for_descriptors(mic, &tx_vr);
632 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©,
635 err = mic_virtio_copy(mic,
636 mic->mic_net.virtio_net_fd, &tx_vr,
639 mpsslog("%s %s %d mic_virtio_copy %s\n",
640 mic->name, __func__, __LINE__,
644 verify_out_len(mic, ©);
646 disp_iovec(mic, copy, __func__, __LINE__);
647 mpsslog("%s %s %d wrote to net 0x%lx\n",
648 mic->name, __func__, __LINE__,
649 sum_iovec_len(©));
651 /* Reinitialize IOV for next run */
652 iov0[1].iov_len = MAX_NET_PKT_SIZE;
653 } else if (len < 0) {
654 disp_iovec(mic, ©, __func__, __LINE__);
655 mpsslog("%s %s %d read failed %s ", mic->name,
656 __func__, __LINE__, strerror(errno));
657 mpsslog("cnt %d sum %zd\n",
658 copy.iovcnt, sum_iovec_len(©));
663 * Check if there is data to be read from virtio net and
664 * write to TUN if there is.
666 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
667 while (rx_vr.info->avail_idx !=
668 le16toh(rx_vr.vr.avail->idx)) {
670 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©,
672 + sizeof(struct virtio_net_hdr));
674 err = mic_virtio_copy(mic,
675 mic->mic_net.virtio_net_fd, &rx_vr,
679 struct virtio_net_hdr *hdr
680 = (struct virtio_net_hdr *)
683 mpsslog("%s %s %d hdr->flags 0x%x, ",
684 mic->name, __func__, __LINE__,
686 mpsslog("out_len %d gso_type 0x%x\n",
690 /* Set the correct output iov_len */
691 iov1[1].iov_len = copy.out_len -
692 sizeof(struct virtio_net_hdr);
693 verify_out_len(mic, ©);
695 disp_iovec(mic, copy, __func__,
698 mic->name, __func__, __LINE__);
699 mpsslog("read from net 0x%lx\n",
700 sum_iovec_len(copy));
702 len = writev(net_poll[NET_FD_TUN].fd,
703 copy.iov, copy.iovcnt);
704 if (len != sum_iovec_len(©)) {
705 mpsslog("Tun write failed %s ",
707 mpsslog("len 0x%zx ", len);
708 mpsslog("read_len 0x%zx\n",
709 sum_iovec_len(©));
712 disp_iovec(mic, ©, __func__,
717 mpsslog("wrote to tap 0x%lx\n",
722 mpsslog("%s %s %d mic_virtio_copy %s\n",
723 mic->name, __func__, __LINE__,
729 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
730 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
737 #define VIRTIO_CONSOLE_FD 0
738 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
739 #define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
740 #define MAX_BUFFER_SIZE PAGE_SIZE
743 virtio_console(void *arg)
745 static __u8 vcons_buf[2][PAGE_SIZE];
746 struct iovec vcons_iov[2] = {
747 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
748 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
750 struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
751 struct mic_info *mic = (struct mic_info *)arg;
753 struct pollfd console_poll[MAX_CONSOLE_FD];
757 struct mic_vring tx_vr, rx_vr;
758 struct mic_copy_desc copy;
759 struct mic_device_desc *desc;
761 pty_fd = posix_openpt(O_RDWR);
763 mpsslog("can't open a pseudoterminal master device: %s\n",
767 pts_name = ptsname(pty_fd);
768 if (pts_name == NULL) {
769 mpsslog("can't get pts name\n");
772 printf("%s console message goes to %s\n", mic->name, pts_name);
773 mpsslog("%s console message goes to %s\n", mic->name, pts_name);
774 err = grantpt(pty_fd);
776 mpsslog("can't grant access: %s %s\n",
777 pts_name, strerror(errno));
780 err = unlockpt(pty_fd);
782 mpsslog("can't unlock a pseudoterminal: %s %s\n",
783 pts_name, strerror(errno));
786 console_poll[MONITOR_FD].fd = pty_fd;
787 console_poll[MONITOR_FD].events = POLLIN;
789 console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
790 console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
792 if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
793 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
794 virtcons_dev_page.dd.num_vq)) {
795 mpsslog("%s init_vr failed %s\n",
796 mic->name, strerror(errno));
801 desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
804 console_poll[MONITOR_FD].revents = 0;
805 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
806 err = poll(console_poll, MAX_CONSOLE_FD, -1);
808 mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
812 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
813 err = wait_for_card_driver(mic,
814 mic->mic_console.virtio_console_fd,
817 mpsslog("%s %s %d Exiting...\n",
818 mic->name, __func__, __LINE__);
823 if (console_poll[MONITOR_FD].revents & POLLIN) {
825 len = readv(pty_fd, copy.iov, copy.iovcnt);
828 disp_iovec(mic, copy, __func__, __LINE__);
829 mpsslog("%s %s %d read from tap 0x%lx\n",
830 mic->name, __func__, __LINE__,
833 spin_for_descriptors(mic, &tx_vr);
834 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
837 err = mic_virtio_copy(mic,
838 mic->mic_console.virtio_console_fd,
841 mpsslog("%s %s %d mic_virtio_copy %s\n",
842 mic->name, __func__, __LINE__,
846 verify_out_len(mic, ©);
848 disp_iovec(mic, copy, __func__, __LINE__);
849 mpsslog("%s %s %d wrote to net 0x%lx\n",
850 mic->name, __func__, __LINE__,
851 sum_iovec_len(copy));
853 /* Reinitialize IOV for next run */
854 iov0->iov_len = PAGE_SIZE;
855 } else if (len < 0) {
856 disp_iovec(mic, ©, __func__, __LINE__);
857 mpsslog("%s %s %d read failed %s ",
858 mic->name, __func__, __LINE__,
860 mpsslog("cnt %d sum %zd\n",
861 copy.iovcnt, sum_iovec_len(©));
865 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
866 while (rx_vr.info->avail_idx !=
867 le16toh(rx_vr.vr.avail->idx)) {
869 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
872 err = mic_virtio_copy(mic,
873 mic->mic_console.virtio_console_fd,
876 /* Set the correct output iov_len */
877 iov1->iov_len = copy.out_len;
878 verify_out_len(mic, ©);
880 disp_iovec(mic, copy, __func__,
883 mic->name, __func__, __LINE__);
884 mpsslog("read from net 0x%lx\n",
885 sum_iovec_len(copy));
888 copy.iov, copy.iovcnt);
889 if (len != sum_iovec_len(©)) {
890 mpsslog("Tun write failed %s ",
892 mpsslog("len 0x%zx ", len);
893 mpsslog("read_len 0x%zx\n",
894 sum_iovec_len(©));
897 disp_iovec(mic, copy, __func__,
902 mpsslog("wrote to tap 0x%lx\n",
907 mpsslog("%s %s %d mic_virtio_copy %s\n",
908 mic->name, __func__, __LINE__,
914 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
915 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
924 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
929 snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
930 fd = open(path, O_RDWR);
932 mpsslog("Could not open %s %s\n", path, strerror(errno));
936 err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
938 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
944 mic->mic_net.virtio_net_fd = fd;
945 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
947 case VIRTIO_ID_CONSOLE:
948 mic->mic_console.virtio_console_fd = fd;
949 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
951 case VIRTIO_ID_BLOCK:
952 mic->mic_virtblk.virtio_block_fd = fd;
953 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
959 set_backend_file(struct mic_info *mic)
962 char buff[PATH_MAX], *line, *evv, *p;
964 snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
965 config = fopen(buff, "r");
968 do { /* look for "virtblk_backend=XXXX" */
969 line = fgets(buff, PATH_MAX, config);
974 p = strchr(line, '\n');
977 } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
981 evv = strchr(line, '=');
984 mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
985 if (mic->mic_virtblk.backend_file == NULL) {
986 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
989 strcpy(mic->mic_virtblk.backend_file, evv + 1);
993 #define SECTOR_SIZE 512
995 set_backend_size(struct mic_info *mic)
997 mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
999 if (mic->mic_virtblk.backend_size < 0) {
1000 mpsslog("%s: can't seek: %s\n",
1001 mic->name, mic->mic_virtblk.backend_file);
1004 virtblk_dev_page.blk_config.capacity =
1005 mic->mic_virtblk.backend_size / SECTOR_SIZE;
1006 if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
1007 virtblk_dev_page.blk_config.capacity++;
1009 virtblk_dev_page.blk_config.capacity =
1010 htole64(virtblk_dev_page.blk_config.capacity);
1016 open_backend(struct mic_info *mic)
1018 if (!set_backend_file(mic))
1020 mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1021 if (mic->mic_virtblk.backend < 0) {
1022 mpsslog("%s: can't open: %s\n", mic->name,
1023 mic->mic_virtblk.backend_file);
1026 if (!set_backend_size(mic))
1028 mic->mic_virtblk.backend_addr = mmap(NULL,
1029 mic->mic_virtblk.backend_size,
1030 PROT_READ|PROT_WRITE, MAP_SHARED,
1031 mic->mic_virtblk.backend, 0L);
1032 if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1033 mpsslog("%s: can't map: %s %s\n",
1034 mic->name, mic->mic_virtblk.backend_file,
1041 close(mic->mic_virtblk.backend);
1043 free(mic->mic_virtblk.backend_file);
1049 close_backend(struct mic_info *mic)
1051 munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1052 close(mic->mic_virtblk.backend);
1053 free(mic->mic_virtblk.backend_file);
1057 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1059 if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1060 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1064 add_virtio_device(mic, &virtblk_dev_page.dd);
1065 if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1066 VIRTIO_ID_BLOCK, vring, NULL,
1067 virtblk_dev_page.dd.num_vq)) {
1068 mpsslog("%s init_vr failed %s\n",
1069 mic->name, strerror(errno));
1076 stop_virtblk(struct mic_info *mic)
1080 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1081 MIC_VIRTIO_RING_ALIGN) +
1082 sizeof(struct _mic_vring_info));
1083 ret = munmap(mic->mic_virtblk.block_dp,
1084 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1086 mpsslog("%s munmap errno %d\n", mic->name, errno);
1087 close(mic->mic_virtblk.virtio_block_fd);
1091 header_error_check(struct vring_desc *desc)
1093 if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1094 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1095 __func__, __LINE__);
1098 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1099 mpsslog("%s() %d: alone\n",
1100 __func__, __LINE__);
1103 if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1104 mpsslog("%s() %d: not read\n",
1105 __func__, __LINE__);
1112 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1115 struct mic_copy_desc copy;
1117 iovec.iov_len = sizeof(*hdr);
1118 iovec.iov_base = hdr;
1121 copy.vr_idx = 0; /* only one vring on virtio_block */
1122 copy.update_used = false; /* do not update used index */
1123 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1127 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1129 struct mic_copy_desc copy;
1132 copy.iovcnt = iovcnt;
1133 copy.vr_idx = 0; /* only one vring on virtio_block */
1134 copy.update_used = false; /* do not update used index */
1135 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1139 status_error_check(struct vring_desc *desc)
1141 if (le32toh(desc->len) != sizeof(__u8)) {
1142 mpsslog("%s() %d: length is not sizeof(status)\n",
1143 __func__, __LINE__);
1150 write_status(int fd, __u8 *status)
1153 struct mic_copy_desc copy;
1155 iovec.iov_base = status;
1156 iovec.iov_len = sizeof(*status);
1159 copy.vr_idx = 0; /* only one vring on virtio_block */
1160 copy.update_used = true; /* Update used index */
1161 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1164 #ifndef VIRTIO_BLK_T_GET_ID
1165 #define VIRTIO_BLK_T_GET_ID 8
1169 virtio_block(void *arg)
1171 struct mic_info *mic = (struct mic_info *)arg;
1173 struct pollfd block_poll;
1174 struct mic_vring vring;
1177 struct vring_desc *desc;
1178 struct iovec *iovec, *piov;
1180 __u32 buffer_desc_idx;
1181 struct virtio_blk_outhdr hdr;
1184 for (;;) { /* forever */
1185 if (!open_backend(mic)) { /* No virtblk */
1186 for (mic->mic_virtblk.signaled = 0;
1187 !mic->mic_virtblk.signaled;)
1192 /* backend file is specified. */
1193 if (!start_virtblk(mic, &vring))
1194 goto _close_backend;
1195 iovec = malloc(sizeof(*iovec) *
1196 le32toh(virtblk_dev_page.blk_config.seg_max));
1198 mpsslog("%s: can't alloc iovec: %s\n",
1199 mic->name, strerror(ENOMEM));
1203 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1204 block_poll.events = POLLIN;
1205 for (mic->mic_virtblk.signaled = 0;
1206 !mic->mic_virtblk.signaled;) {
1207 block_poll.revents = 0;
1208 /* timeout in 1 sec to see signaled */
1209 ret = poll(&block_poll, 1, 1000);
1211 mpsslog("%s %d: poll failed: %s\n",
1217 if (!(block_poll.revents & POLLIN)) {
1219 mpsslog("%s %d: block_poll.revents=0x%x\n",
1220 __func__, __LINE__, block_poll.revents);
1226 while (vring.info->avail_idx !=
1227 le16toh(vring.vr.avail->idx)) {
1228 /* read header element */
1230 vring.info->avail_idx &
1233 vring.vr.avail->ring[avail_idx]);
1234 desc = &vring.vr.desc[desc_idx];
1236 mpsslog("%s() %d: avail_idx=%d ",
1238 vring.info->avail_idx);
1239 mpsslog("vring.vr.num=%d desc=%p\n",
1240 vring.vr.num, desc);
1242 status = header_error_check(desc);
1244 mic->mic_virtblk.virtio_block_fd,
1247 mpsslog("%s() %d %s: ret=%d %s\n",
1253 /* buffer element */
1256 fos = mic->mic_virtblk.backend_addr +
1257 (hdr.sector * SECTOR_SIZE);
1258 buffer_desc_idx = next_desc(desc);
1259 desc_idx = buffer_desc_idx;
1260 for (desc = &vring.vr.desc[buffer_desc_idx];
1261 desc->flags & VRING_DESC_F_NEXT;
1262 desc_idx = next_desc(desc),
1263 desc = &vring.vr.desc[desc_idx]) {
1264 piov->iov_len = desc->len;
1265 piov->iov_base = fos;
1269 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1270 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1271 VIRTIO_BLK_T_GET_ID)) {
1273 VIRTIO_BLK_T_IN - does not do
1274 anything. Probably for documenting.
1275 VIRTIO_BLK_T_SCSI_CMD - for
1277 VIRTIO_BLK_T_FLUSH - turned off in
1279 VIRTIO_BLK_T_BARRIER - defined but not
1282 mpsslog("%s() %d: type %x ",
1285 mpsslog("is not supported\n");
1289 ret = transfer_blocks(
1290 mic->mic_virtblk.virtio_block_fd,
1297 /* write status and update used pointer */
1299 status = status_error_check(desc);
1301 mic->mic_virtblk.virtio_block_fd,
1304 mpsslog("%s() %d: write status=%d on desc=%p\n",
1321 reset(struct mic_info *mic)
1323 #define RESET_TIMEOUT 120
1324 int i = RESET_TIMEOUT;
1325 setsysfs(mic->name, "state", "reset");
1328 state = readsysfs(mic->name, "state");
1331 mpsslog("%s: %s %d state %s\n",
1332 mic->name, __func__, __LINE__, state);
1334 if (!strcmp(state, "ready")) {
1346 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1348 if (!strcmp(shutdown_status, "nop"))
1350 if (!strcmp(shutdown_status, "crashed"))
1352 if (!strcmp(shutdown_status, "halted"))
1354 if (!strcmp(shutdown_status, "poweroff"))
1355 return MIC_POWER_OFF;
1356 if (!strcmp(shutdown_status, "restart"))
1358 mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1363 static int get_mic_state(struct mic_info *mic)
1366 enum mic_states mic_state;
1369 state = readsysfs(mic->name, "state");
1372 mpsslog("%s: %s %d state %s\n",
1373 mic->name, __func__, __LINE__, state);
1375 if (!strcmp(state, "ready")) {
1376 mic_state = MIC_READY;
1377 } else if (!strcmp(state, "booting")) {
1378 mic_state = MIC_BOOTING;
1379 } else if (!strcmp(state, "online")) {
1380 mic_state = MIC_ONLINE;
1381 } else if (!strcmp(state, "shutting_down")) {
1382 mic_state = MIC_SHUTTING_DOWN;
1383 } else if (!strcmp(state, "reset_failed")) {
1384 mic_state = MIC_RESET_FAILED;
1385 } else if (!strcmp(state, "resetting")) {
1386 mic_state = MIC_RESETTING;
1388 mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1396 static void mic_handle_shutdown(struct mic_info *mic)
1398 #define SHUTDOWN_TIMEOUT 60
1399 int i = SHUTDOWN_TIMEOUT;
1400 char *shutdown_status;
1402 shutdown_status = readsysfs(mic->name, "shutdown_status");
1403 if (!shutdown_status) {
1407 mpsslog("%s: %s %d shutdown_status %s\n",
1408 mic->name, __func__, __LINE__, shutdown_status);
1409 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1415 free(shutdown_status);
1420 free(shutdown_status);
1426 mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1427 mic->name, __func__, __LINE__, shutdown_status);
1431 static int open_state_fd(struct mic_info *mic)
1433 char pathname[PATH_MAX];
1436 snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1437 MICSYSFSDIR, mic->name, "state");
1439 fd = open(pathname, O_RDONLY);
1441 mpsslog("%s: opening file %s failed %s\n",
1442 mic->name, pathname, strerror(errno));
1446 static int block_till_state_change(int fd, struct mic_info *mic)
1448 struct pollfd ufds[1];
1449 char value[PAGE_SIZE];
1453 ufds[0].events = POLLERR | POLLPRI;
1454 ret = poll(ufds, 1, -1);
1456 mpsslog("%s: %s %d poll failed %s\n",
1457 mic->name, __func__, __LINE__, strerror(errno));
1461 ret = lseek(fd, 0, SEEK_SET);
1463 mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1464 mic->name, __func__, __LINE__, strerror(errno));
1468 ret = read(fd, value, sizeof(value));
1470 mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1471 mic->name, __func__, __LINE__, strerror(errno));
1479 mic_config(void *arg)
1481 struct mic_info *mic = (struct mic_info *)arg;
1482 int fd, ret, stat = 0;
1484 fd = open_state_fd(mic);
1486 mpsslog("%s: %s %d open state fd failed %s\n",
1487 mic->name, __func__, __LINE__, strerror(errno));
1492 ret = block_till_state_change(fd, mic);
1494 mpsslog("%s: %s %d block_till_state_change error %s\n",
1495 mic->name, __func__, __LINE__, strerror(errno));
1499 switch (get_mic_state(mic)) {
1500 case MIC_SHUTTING_DOWN:
1501 mic_handle_shutdown(mic);
1504 case MIC_RESET_FAILED:
1505 ret = kill(mic->pid, SIGTERM);
1506 mpsslog("%s: %s %d kill pid %d ret %d\n",
1507 mic->name, __func__, __LINE__,
1510 ret = waitpid(mic->pid, &stat,
1512 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1513 mic->name, __func__, __LINE__,
1516 if (mic->boot_on_resume) {
1517 setsysfs(mic->name, "state", "boot");
1518 mic->boot_on_resume = 0;
1534 set_cmdline(struct mic_info *mic)
1536 char buffer[PATH_MAX];
1539 len = snprintf(buffer, PATH_MAX,
1540 "clocksource=tsc highres=off nohz=off ");
1541 len += snprintf(buffer + len, PATH_MAX,
1542 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1543 len += snprintf(buffer + len, PATH_MAX,
1544 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1547 setsysfs(mic->name, "cmdline", buffer);
1548 mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1549 snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1550 mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1554 set_log_buf_info(struct mic_info *mic)
1558 char system_map[] = "/lib/firmware/mic/System.map";
1559 char *map, *temp, log_buf[17] = {'\0'};
1561 fd = open(system_map, O_RDONLY);
1563 mpsslog("%s: Opening System.map failed: %d\n",
1567 len = lseek(fd, 0, SEEK_END);
1569 mpsslog("%s: Reading System.map size failed: %d\n",
1574 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1575 if (map == MAP_FAILED) {
1576 mpsslog("%s: mmap of System.map failed: %d\n",
1581 temp = strstr(map, "__log_buf");
1583 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1588 strncpy(log_buf, temp - 19, 16);
1589 setsysfs(mic->name, "log_buf_addr", log_buf);
1590 mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1591 temp = strstr(map, "log_buf_len");
1593 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1598 strncpy(log_buf, temp - 19, 16);
1599 setsysfs(mic->name, "log_buf_len", log_buf);
1600 mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1606 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1608 struct mic_info *mic;
1610 for (mic = mic_list.next; mic != NULL; mic = mic->next)
1611 mic->mic_virtblk.signaled = 1/* true */;
1615 set_mic_boot_params(struct mic_info *mic)
1617 set_log_buf_info(mic);
1624 struct mic_info *mic = (struct mic_info *)arg;
1625 struct sigaction ignore = {
1627 .sa_handler = SIG_IGN
1629 struct sigaction act = {
1630 .sa_flags = SA_SIGINFO,
1631 .sa_sigaction = change_virtblk_backend,
1633 char buffer[PATH_MAX];
1637 * Currently, one virtio block device is supported for each MIC card
1638 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1639 * The signal informs the virtio block backend about a change in the
1640 * configuration file which specifies the virtio backend file name on
1641 * the host. Virtio block backend then re-reads the configuration file
1642 * and switches to the new block device. This signalling mechanism may
1643 * not be required once multiple virtio block devices are supported by
1646 sigaction(SIGUSR1, &ignore, NULL);
1648 fd = open_state_fd(mic);
1650 mpsslog("%s: %s %d open state fd failed %s\n",
1651 mic->name, __func__, __LINE__, strerror(errno));
1657 snprintf(buffer, PATH_MAX, "boot");
1658 setsysfs(mic->name, "state", buffer);
1659 mpsslog("%s restarting mic %d\n",
1660 mic->name, mic->restart);
1665 while (block_till_state_change(fd, mic)) {
1666 mpsslog("%s: %s %d block_till_state_change error %s\n",
1667 mic->name, __func__, __LINE__, strerror(errno));
1672 if (get_mic_state(mic) == MIC_BOOTING)
1679 add_virtio_device(mic, &virtcons_dev_page.dd);
1680 add_virtio_device(mic, &virtnet_dev_page.dd);
1681 err = pthread_create(&mic->mic_console.console_thread, NULL,
1682 virtio_console, mic);
1684 mpsslog("%s virtcons pthread_create failed %s\n",
1685 mic->name, strerror(err));
1686 err = pthread_create(&mic->mic_net.net_thread, NULL,
1689 mpsslog("%s virtnet pthread_create failed %s\n",
1690 mic->name, strerror(err));
1691 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1694 mpsslog("%s virtblk pthread_create failed %s\n",
1695 mic->name, strerror(err));
1696 sigemptyset(&act.sa_mask);
1697 err = sigaction(SIGUSR1, &act, NULL);
1699 mpsslog("%s sigaction SIGUSR1 failed %s\n",
1700 mic->name, strerror(errno));
1704 mpsslog("fork failed MIC name %s id %d errno %d\n",
1705 mic->name, mic->id, errno);
1708 err = pthread_create(&mic->config_thread, NULL,
1711 mpsslog("%s mic_config pthread_create failed %s\n",
1712 mic->name, strerror(err));
1721 struct mic_info *mic;
1724 for (mic = mic_list.next; mic; mic = mic->next) {
1725 set_mic_boot_params(mic);
1726 err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1728 mpsslog("%s init_mic pthread_create failed %s\n",
1729 mic->name, strerror(err));
1739 struct mic_info *mic = &mic_list;
1740 struct dirent *file;
1744 dp = opendir(MICSYSFSDIR);
1748 while ((file = readdir(dp)) != NULL) {
1749 if (!strncmp(file->d_name, "mic", 3)) {
1750 mic->next = calloc(1, sizeof(struct mic_info));
1753 mic->id = atoi(&file->d_name[3]);
1754 mic->name = malloc(strlen(file->d_name) + 16);
1756 strcpy(mic->name, file->d_name);
1757 mpsslog("MIC name %s id %d\n", mic->name,
1769 mpsslog(char *format, ...)
1779 va_start(args, format);
1780 vsprintf(buffer, format, args);
1784 ts1 = ctime_r(&t, ts);
1785 ts1[strlen(ts1) - 1] = '\0';
1786 fprintf(logfp, "%s: %s", ts1, buffer);
1792 main(int argc, char *argv[])
1799 logfp = fopen(LOGFILE_NAME, "a+");
1801 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1814 mpsslog("MIC Daemon start\n");
1816 cnt = init_mic_list();
1818 mpsslog("MIC module not loaded\n");
1821 mpsslog("MIC found %d devices\n", cnt);