of: Fix NULL dereference in unflatten_and_copy()
[firefly-linux-kernel-4.4.55.git] / Documentation / mic / mpssd / mpssd.c
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2013 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * The full GNU General Public License is included in this distribution in
16  * the file called "COPYING".
17  *
18  * Intel MIC User Space Tools.
19  */
20
21 #define _GNU_SOURCE
22
23 #include <stdlib.h>
24 #include <fcntl.h>
25 #include <getopt.h>
26 #include <assert.h>
27 #include <unistd.h>
28 #include <stdbool.h>
29 #include <signal.h>
30 #include <poll.h>
31 #include <features.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/socket.h>
36 #include <linux/virtio_ring.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_console.h>
39 #include <linux/virtio_blk.h>
40 #include <linux/version.h>
41 #include "mpssd.h"
42 #include <linux/mic_ioctl.h>
43 #include <linux/mic_common.h>
44
45 static void init_mic(struct mic_info *mic);
46
47 static FILE *logfp;
48 static struct mic_info mic_list;
49
50 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
51
52 #define min_t(type, x, y) ({                            \
53                 type __min1 = (x);                      \
54                 type __min2 = (y);                      \
55                 __min1 < __min2 ? __min1 : __min2; })
56
57 /* align addr on a size boundary - adjust address up/down if needed */
58 #define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
59 #define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
60
61 /* align addr on a size boundary - adjust address up if needed */
62 #define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
63
64 /* to align the pointer to the (next) page boundary */
65 #define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
66
67 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
68
69 #define GSO_ENABLED             1
70 #define MAX_GSO_SIZE            (64 * 1024)
71 #define ETH_H_LEN               14
72 #define MAX_NET_PKT_SIZE        (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
73 #define MIC_DEVICE_PAGE_END     0x1000
74
75 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
76 #define VIRTIO_NET_HDR_F_DATA_VALID     2       /* Csum is valid */
77 #endif
78
79 static struct {
80         struct mic_device_desc dd;
81         struct mic_vqconfig vqconfig[2];
82         __u32 host_features, guest_acknowledgements;
83         struct virtio_console_config cons_config;
84 } virtcons_dev_page = {
85         .dd = {
86                 .type = VIRTIO_ID_CONSOLE,
87                 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
88                 .feature_len = sizeof(virtcons_dev_page.host_features),
89                 .config_len = sizeof(virtcons_dev_page.cons_config),
90         },
91         .vqconfig[0] = {
92                 .num = htole16(MIC_VRING_ENTRIES),
93         },
94         .vqconfig[1] = {
95                 .num = htole16(MIC_VRING_ENTRIES),
96         },
97 };
98
99 static struct {
100         struct mic_device_desc dd;
101         struct mic_vqconfig vqconfig[2];
102         __u32 host_features, guest_acknowledgements;
103         struct virtio_net_config net_config;
104 } virtnet_dev_page = {
105         .dd = {
106                 .type = VIRTIO_ID_NET,
107                 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
108                 .feature_len = sizeof(virtnet_dev_page.host_features),
109                 .config_len = sizeof(virtnet_dev_page.net_config),
110         },
111         .vqconfig[0] = {
112                 .num = htole16(MIC_VRING_ENTRIES),
113         },
114         .vqconfig[1] = {
115                 .num = htole16(MIC_VRING_ENTRIES),
116         },
117 #if GSO_ENABLED
118                 .host_features = htole32(
119                 1 << VIRTIO_NET_F_CSUM |
120                 1 << VIRTIO_NET_F_GSO |
121                 1 << VIRTIO_NET_F_GUEST_TSO4 |
122                 1 << VIRTIO_NET_F_GUEST_TSO6 |
123                 1 << VIRTIO_NET_F_GUEST_ECN |
124                 1 << VIRTIO_NET_F_GUEST_UFO),
125 #else
126                 .host_features = 0,
127 #endif
128 };
129
130 static const char *mic_config_dir = "/etc/sysconfig/mic";
131 static const char *virtblk_backend = "VIRTBLK_BACKEND";
132 static struct {
133         struct mic_device_desc dd;
134         struct mic_vqconfig vqconfig[1];
135         __u32 host_features, guest_acknowledgements;
136         struct virtio_blk_config blk_config;
137 } virtblk_dev_page = {
138         .dd = {
139                 .type = VIRTIO_ID_BLOCK,
140                 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
141                 .feature_len = sizeof(virtblk_dev_page.host_features),
142                 .config_len = sizeof(virtblk_dev_page.blk_config),
143         },
144         .vqconfig[0] = {
145                 .num = htole16(MIC_VRING_ENTRIES),
146         },
147         .host_features =
148                 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
149         .blk_config = {
150                 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
151                 .capacity = htole64(0),
152          }
153 };
154
155 static char *myname;
156
157 static int
158 tap_configure(struct mic_info *mic, char *dev)
159 {
160         pid_t pid;
161         char *ifargv[7];
162         char ipaddr[IFNAMSIZ];
163         int ret = 0;
164
165         pid = fork();
166         if (pid == 0) {
167                 ifargv[0] = "ip";
168                 ifargv[1] = "link";
169                 ifargv[2] = "set";
170                 ifargv[3] = dev;
171                 ifargv[4] = "up";
172                 ifargv[5] = NULL;
173                 mpsslog("Configuring %s\n", dev);
174                 ret = execvp("ip", ifargv);
175                 if (ret < 0) {
176                         mpsslog("%s execvp failed errno %s\n",
177                                 mic->name, strerror(errno));
178                         return ret;
179                 }
180         }
181         if (pid < 0) {
182                 mpsslog("%s fork failed errno %s\n",
183                         mic->name, strerror(errno));
184                 return ret;
185         }
186
187         ret = waitpid(pid, NULL, 0);
188         if (ret < 0) {
189                 mpsslog("%s waitpid failed errno %s\n",
190                         mic->name, strerror(errno));
191                 return ret;
192         }
193
194         snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
195
196         pid = fork();
197         if (pid == 0) {
198                 ifargv[0] = "ip";
199                 ifargv[1] = "addr";
200                 ifargv[2] = "add";
201                 ifargv[3] = ipaddr;
202                 ifargv[4] = "dev";
203                 ifargv[5] = dev;
204                 ifargv[6] = NULL;
205                 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
206                 ret = execvp("ip", ifargv);
207                 if (ret < 0) {
208                         mpsslog("%s execvp failed errno %s\n",
209                                 mic->name, strerror(errno));
210                         return ret;
211                 }
212         }
213         if (pid < 0) {
214                 mpsslog("%s fork failed errno %s\n",
215                         mic->name, strerror(errno));
216                 return ret;
217         }
218
219         ret = waitpid(pid, NULL, 0);
220         if (ret < 0) {
221                 mpsslog("%s waitpid failed errno %s\n",
222                         mic->name, strerror(errno));
223                 return ret;
224         }
225         mpsslog("MIC name %s %s %d DONE!\n",
226                 mic->name, __func__, __LINE__);
227         return 0;
228 }
229
230 static int tun_alloc(struct mic_info *mic, char *dev)
231 {
232         struct ifreq ifr;
233         int fd, err;
234 #if GSO_ENABLED
235         unsigned offload;
236 #endif
237         fd = open("/dev/net/tun", O_RDWR);
238         if (fd < 0) {
239                 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
240                 goto done;
241         }
242
243         memset(&ifr, 0, sizeof(ifr));
244
245         ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
246         if (*dev)
247                 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
248
249         err = ioctl(fd, TUNSETIFF, (void *)&ifr);
250         if (err < 0) {
251                 mpsslog("%s %s %d TUNSETIFF failed %s\n",
252                         mic->name, __func__, __LINE__, strerror(errno));
253                 close(fd);
254                 return err;
255         }
256 #if GSO_ENABLED
257         offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
258                 TUN_F_TSO_ECN | TUN_F_UFO;
259
260         err = ioctl(fd, TUNSETOFFLOAD, offload);
261         if (err < 0) {
262                 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
263                         mic->name, __func__, __LINE__, strerror(errno));
264                 close(fd);
265                 return err;
266         }
267 #endif
268         strcpy(dev, ifr.ifr_name);
269         mpsslog("Created TAP %s\n", dev);
270 done:
271         return fd;
272 }
273
274 #define NET_FD_VIRTIO_NET 0
275 #define NET_FD_TUN 1
276 #define MAX_NET_FD 2
277
278 static void set_dp(struct mic_info *mic, int type, void *dp)
279 {
280         switch (type) {
281         case VIRTIO_ID_CONSOLE:
282                 mic->mic_console.console_dp = dp;
283                 return;
284         case VIRTIO_ID_NET:
285                 mic->mic_net.net_dp = dp;
286                 return;
287         case VIRTIO_ID_BLOCK:
288                 mic->mic_virtblk.block_dp = dp;
289                 return;
290         }
291         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
292         assert(0);
293 }
294
295 static void *get_dp(struct mic_info *mic, int type)
296 {
297         switch (type) {
298         case VIRTIO_ID_CONSOLE:
299                 return mic->mic_console.console_dp;
300         case VIRTIO_ID_NET:
301                 return mic->mic_net.net_dp;
302         case VIRTIO_ID_BLOCK:
303                 return mic->mic_virtblk.block_dp;
304         }
305         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
306         assert(0);
307         return NULL;
308 }
309
310 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
311 {
312         struct mic_device_desc *d;
313         int i;
314         void *dp = get_dp(mic, type);
315
316         for (i = mic_aligned_size(struct mic_bootparam); i < PAGE_SIZE;
317                 i += mic_total_desc_size(d)) {
318                 d = dp + i;
319
320                 /* End of list */
321                 if (d->type == 0)
322                         break;
323
324                 if (d->type == -1)
325                         continue;
326
327                 mpsslog("%s %s d-> type %d d %p\n",
328                         mic->name, __func__, d->type, d);
329
330                 if (d->type == (__u8)type)
331                         return d;
332         }
333         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
334         assert(0);
335         return NULL;
336 }
337
338 /* See comments in vhost.c for explanation of next_desc() */
339 static unsigned next_desc(struct vring_desc *desc)
340 {
341         unsigned int next;
342
343         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
344                 return -1U;
345         next = le16toh(desc->next);
346         return next;
347 }
348
349 /* Sum up all the IOVEC length */
350 static ssize_t
351 sum_iovec_len(struct mic_copy_desc *copy)
352 {
353         ssize_t sum = 0;
354         int i;
355
356         for (i = 0; i < copy->iovcnt; i++)
357                 sum += copy->iov[i].iov_len;
358         return sum;
359 }
360
361 static inline void verify_out_len(struct mic_info *mic,
362         struct mic_copy_desc *copy)
363 {
364         if (copy->out_len != sum_iovec_len(copy)) {
365                 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
366                         mic->name, __func__, __LINE__,
367                         copy->out_len, sum_iovec_len(copy));
368                 assert(copy->out_len == sum_iovec_len(copy));
369         }
370 }
371
372 /* Display an iovec */
373 static void
374 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
375            const char *s, int line)
376 {
377         int i;
378
379         for (i = 0; i < copy->iovcnt; i++)
380                 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
381                         mic->name, s, line, i,
382                         copy->iov[i].iov_base, copy->iov[i].iov_len);
383 }
384
385 static inline __u16 read_avail_idx(struct mic_vring *vr)
386 {
387         return ACCESS_ONCE(vr->info->avail_idx);
388 }
389
390 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
391                                 struct mic_copy_desc *copy, ssize_t len)
392 {
393         copy->vr_idx = tx ? 0 : 1;
394         copy->update_used = true;
395         if (type == VIRTIO_ID_NET)
396                 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
397         else
398                 copy->iov[0].iov_len = len;
399 }
400
401 /* Central API which triggers the copies */
402 static int
403 mic_virtio_copy(struct mic_info *mic, int fd,
404                 struct mic_vring *vr, struct mic_copy_desc *copy)
405 {
406         int ret;
407
408         ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
409         if (ret) {
410                 mpsslog("%s %s %d errno %s ret %d\n",
411                         mic->name, __func__, __LINE__,
412                         strerror(errno), ret);
413         }
414         return ret;
415 }
416
417 /*
418  * This initialization routine requires at least one
419  * vring i.e. vr0. vr1 is optional.
420  */
421 static void *
422 init_vr(struct mic_info *mic, int fd, int type,
423         struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
424 {
425         int vr_size;
426         char *va;
427
428         vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
429                 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
430         va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
431                 PROT_READ, MAP_SHARED, fd, 0);
432         if (MAP_FAILED == va) {
433                 mpsslog("%s %s %d mmap failed errno %s\n",
434                         mic->name, __func__, __LINE__,
435                         strerror(errno));
436                 goto done;
437         }
438         set_dp(mic, type, va);
439         vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
440         vr0->info = vr0->va +
441                 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
442         vring_init(&vr0->vr,
443                    MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
444         mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
445                 __func__, mic->name, vr0->va, vr0->info, vr_size,
446                 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
447         mpsslog("magic 0x%x expected 0x%x\n",
448                 vr0->info->magic, MIC_MAGIC + type);
449         assert(vr0->info->magic == MIC_MAGIC + type);
450         if (vr1) {
451                 vr1->va = (struct mic_vring *)
452                         &va[MIC_DEVICE_PAGE_END + vr_size];
453                 vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
454                         MIC_VIRTIO_RING_ALIGN);
455                 vring_init(&vr1->vr,
456                            MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
457                 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
458                         __func__, mic->name, vr1->va, vr1->info, vr_size,
459                         vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
460                 mpsslog("magic 0x%x expected 0x%x\n",
461                         vr1->info->magic, MIC_MAGIC + type + 1);
462                 assert(vr1->info->magic == MIC_MAGIC + type + 1);
463         }
464 done:
465         return va;
466 }
467
468 static void
469 wait_for_card_driver(struct mic_info *mic, int fd, int type)
470 {
471         struct pollfd pollfd;
472         int err;
473         struct mic_device_desc *desc = get_device_desc(mic, type);
474
475         pollfd.fd = fd;
476         mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
477                 mic->name, __func__, type, desc->status);
478         while (1) {
479                 pollfd.events = POLLIN;
480                 pollfd.revents = 0;
481                 err = poll(&pollfd, 1, -1);
482                 if (err < 0) {
483                         mpsslog("%s %s poll failed %s\n",
484                                 mic->name, __func__, strerror(errno));
485                         continue;
486                 }
487
488                 if (pollfd.revents) {
489                         mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
490                                 mic->name, __func__, type, desc->status);
491                         if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
492                                 mpsslog("%s %s poll.revents %d\n",
493                                         mic->name, __func__, pollfd.revents);
494                                 mpsslog("%s %s desc-> type %d status 0x%x\n",
495                                         mic->name, __func__, type,
496                                         desc->status);
497                                 break;
498                         }
499                 }
500         }
501 }
502
503 /* Spin till we have some descriptors */
504 static void
505 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
506 {
507         __u16 avail_idx = read_avail_idx(vr);
508
509         while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
510 #ifdef DEBUG
511                 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
512                         mic->name, __func__,
513                         le16toh(vr->vr.avail->idx), vr->info->avail_idx);
514 #endif
515                 sched_yield();
516         }
517 }
518
519 static void *
520 virtio_net(void *arg)
521 {
522         static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
523         static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __aligned(64);
524         struct iovec vnet_iov[2][2] = {
525                 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
526                   { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
527                 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
528                   { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
529         };
530         struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
531         struct mic_info *mic = (struct mic_info *)arg;
532         char if_name[IFNAMSIZ];
533         struct pollfd net_poll[MAX_NET_FD];
534         struct mic_vring tx_vr, rx_vr;
535         struct mic_copy_desc copy;
536         struct mic_device_desc *desc;
537         int err;
538
539         snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
540         mic->mic_net.tap_fd = tun_alloc(mic, if_name);
541         if (mic->mic_net.tap_fd < 0)
542                 goto done;
543
544         if (tap_configure(mic, if_name))
545                 goto done;
546         mpsslog("MIC name %s id %d\n", mic->name, mic->id);
547
548         net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
549         net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
550         net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
551         net_poll[NET_FD_TUN].events = POLLIN;
552
553         if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
554                                   VIRTIO_ID_NET, &tx_vr, &rx_vr,
555                 virtnet_dev_page.dd.num_vq)) {
556                 mpsslog("%s init_vr failed %s\n",
557                         mic->name, strerror(errno));
558                 goto done;
559         }
560
561         copy.iovcnt = 2;
562         desc = get_device_desc(mic, VIRTIO_ID_NET);
563
564         while (1) {
565                 ssize_t len;
566
567                 net_poll[NET_FD_VIRTIO_NET].revents = 0;
568                 net_poll[NET_FD_TUN].revents = 0;
569
570                 /* Start polling for data from tap and virtio net */
571                 err = poll(net_poll, 2, -1);
572                 if (err < 0) {
573                         mpsslog("%s poll failed %s\n",
574                                 __func__, strerror(errno));
575                         continue;
576                 }
577                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
578                         wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
579                                              VIRTIO_ID_NET);
580                 /*
581                  * Check if there is data to be read from TUN and write to
582                  * virtio net fd if there is.
583                  */
584                 if (net_poll[NET_FD_TUN].revents & POLLIN) {
585                         copy.iov = iov0;
586                         len = readv(net_poll[NET_FD_TUN].fd,
587                                 copy.iov, copy.iovcnt);
588                         if (len > 0) {
589                                 struct virtio_net_hdr *hdr
590                                         = (struct virtio_net_hdr *)vnet_hdr[0];
591
592                                 /* Disable checksums on the card since we are on
593                                    a reliable PCIe link */
594                                 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
595 #ifdef DEBUG
596                                 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
597                                         __func__, __LINE__, hdr->flags);
598                                 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
599                                         copy.out_len, hdr->gso_type);
600 #endif
601 #ifdef DEBUG
602                                 disp_iovec(mic, copy, __func__, __LINE__);
603                                 mpsslog("%s %s %d read from tap 0x%lx\n",
604                                         mic->name, __func__, __LINE__,
605                                         len);
606 #endif
607                                 spin_for_descriptors(mic, &tx_vr);
608                                 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
609                                              len);
610
611                                 err = mic_virtio_copy(mic,
612                                         mic->mic_net.virtio_net_fd, &tx_vr,
613                                         &copy);
614                                 if (err < 0) {
615                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
616                                                 mic->name, __func__, __LINE__,
617                                                 strerror(errno));
618                                 }
619                                 if (!err)
620                                         verify_out_len(mic, &copy);
621 #ifdef DEBUG
622                                 disp_iovec(mic, copy, __func__, __LINE__);
623                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
624                                         mic->name, __func__, __LINE__,
625                                         sum_iovec_len(&copy));
626 #endif
627                                 /* Reinitialize IOV for next run */
628                                 iov0[1].iov_len = MAX_NET_PKT_SIZE;
629                         } else if (len < 0) {
630                                 disp_iovec(mic, &copy, __func__, __LINE__);
631                                 mpsslog("%s %s %d read failed %s ", mic->name,
632                                         __func__, __LINE__, strerror(errno));
633                                 mpsslog("cnt %d sum %zd\n",
634                                         copy.iovcnt, sum_iovec_len(&copy));
635                         }
636                 }
637
638                 /*
639                  * Check if there is data to be read from virtio net and
640                  * write to TUN if there is.
641                  */
642                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
643                         while (rx_vr.info->avail_idx !=
644                                 le16toh(rx_vr.vr.avail->idx)) {
645                                 copy.iov = iov1;
646                                 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
647                                              MAX_NET_PKT_SIZE
648                                         + sizeof(struct virtio_net_hdr));
649
650                                 err = mic_virtio_copy(mic,
651                                         mic->mic_net.virtio_net_fd, &rx_vr,
652                                         &copy);
653                                 if (!err) {
654 #ifdef DEBUG
655                                         struct virtio_net_hdr *hdr
656                                                 = (struct virtio_net_hdr *)
657                                                         vnet_hdr[1];
658
659                                         mpsslog("%s %s %d hdr->flags 0x%x, ",
660                                                 mic->name, __func__, __LINE__,
661                                                 hdr->flags);
662                                         mpsslog("out_len %d gso_type 0x%x\n",
663                                                 copy.out_len,
664                                                 hdr->gso_type);
665 #endif
666                                         /* Set the correct output iov_len */
667                                         iov1[1].iov_len = copy.out_len -
668                                                 sizeof(struct virtio_net_hdr);
669                                         verify_out_len(mic, &copy);
670 #ifdef DEBUG
671                                         disp_iovec(mic, copy, __func__,
672                                                    __LINE__);
673                                         mpsslog("%s %s %d ",
674                                                 mic->name, __func__, __LINE__);
675                                         mpsslog("read from net 0x%lx\n",
676                                                 sum_iovec_len(copy));
677 #endif
678                                         len = writev(net_poll[NET_FD_TUN].fd,
679                                                 copy.iov, copy.iovcnt);
680                                         if (len != sum_iovec_len(&copy)) {
681                                                 mpsslog("Tun write failed %s ",
682                                                         strerror(errno));
683                                                 mpsslog("len 0x%zx ", len);
684                                                 mpsslog("read_len 0x%zx\n",
685                                                         sum_iovec_len(&copy));
686                                         } else {
687 #ifdef DEBUG
688                                                 disp_iovec(mic, &copy, __func__,
689                                                            __LINE__);
690                                                 mpsslog("%s %s %d ",
691                                                         mic->name, __func__,
692                                                         __LINE__);
693                                                 mpsslog("wrote to tap 0x%lx\n",
694                                                         len);
695 #endif
696                                         }
697                                 } else {
698                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
699                                                 mic->name, __func__, __LINE__,
700                                                 strerror(errno));
701                                         break;
702                                 }
703                         }
704                 }
705                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
706                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
707         }
708 done:
709         pthread_exit(NULL);
710 }
711
712 /* virtio_console */
713 #define VIRTIO_CONSOLE_FD 0
714 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
715 #define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
716 #define MAX_BUFFER_SIZE PAGE_SIZE
717
718 static void *
719 virtio_console(void *arg)
720 {
721         static __u8 vcons_buf[2][PAGE_SIZE];
722         struct iovec vcons_iov[2] = {
723                 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
724                 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
725         };
726         struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
727         struct mic_info *mic = (struct mic_info *)arg;
728         int err;
729         struct pollfd console_poll[MAX_CONSOLE_FD];
730         int pty_fd;
731         char *pts_name;
732         ssize_t len;
733         struct mic_vring tx_vr, rx_vr;
734         struct mic_copy_desc copy;
735         struct mic_device_desc *desc;
736
737         pty_fd = posix_openpt(O_RDWR);
738         if (pty_fd < 0) {
739                 mpsslog("can't open a pseudoterminal master device: %s\n",
740                         strerror(errno));
741                 goto _return;
742         }
743         pts_name = ptsname(pty_fd);
744         if (pts_name == NULL) {
745                 mpsslog("can't get pts name\n");
746                 goto _close_pty;
747         }
748         printf("%s console message goes to %s\n", mic->name, pts_name);
749         mpsslog("%s console message goes to %s\n", mic->name, pts_name);
750         err = grantpt(pty_fd);
751         if (err < 0) {
752                 mpsslog("can't grant access: %s %s\n",
753                         pts_name, strerror(errno));
754                 goto _close_pty;
755         }
756         err = unlockpt(pty_fd);
757         if (err < 0) {
758                 mpsslog("can't unlock a pseudoterminal: %s %s\n",
759                         pts_name, strerror(errno));
760                 goto _close_pty;
761         }
762         console_poll[MONITOR_FD].fd = pty_fd;
763         console_poll[MONITOR_FD].events = POLLIN;
764
765         console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
766         console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
767
768         if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
769                                   VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
770                 virtcons_dev_page.dd.num_vq)) {
771                 mpsslog("%s init_vr failed %s\n",
772                         mic->name, strerror(errno));
773                 goto _close_pty;
774         }
775
776         copy.iovcnt = 1;
777         desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
778
779         for (;;) {
780                 console_poll[MONITOR_FD].revents = 0;
781                 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
782                 err = poll(console_poll, MAX_CONSOLE_FD, -1);
783                 if (err < 0) {
784                         mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
785                                 strerror(errno));
786                         continue;
787                 }
788                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
789                         wait_for_card_driver(mic,
790                                              mic->mic_console.virtio_console_fd,
791                                 VIRTIO_ID_CONSOLE);
792
793                 if (console_poll[MONITOR_FD].revents & POLLIN) {
794                         copy.iov = iov0;
795                         len = readv(pty_fd, copy.iov, copy.iovcnt);
796                         if (len > 0) {
797 #ifdef DEBUG
798                                 disp_iovec(mic, copy, __func__, __LINE__);
799                                 mpsslog("%s %s %d read from tap 0x%lx\n",
800                                         mic->name, __func__, __LINE__,
801                                         len);
802 #endif
803                                 spin_for_descriptors(mic, &tx_vr);
804                                 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
805                                              &copy, len);
806
807                                 err = mic_virtio_copy(mic,
808                                         mic->mic_console.virtio_console_fd,
809                                         &tx_vr, &copy);
810                                 if (err < 0) {
811                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
812                                                 mic->name, __func__, __LINE__,
813                                                 strerror(errno));
814                                 }
815                                 if (!err)
816                                         verify_out_len(mic, &copy);
817 #ifdef DEBUG
818                                 disp_iovec(mic, copy, __func__, __LINE__);
819                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
820                                         mic->name, __func__, __LINE__,
821                                         sum_iovec_len(copy));
822 #endif
823                                 /* Reinitialize IOV for next run */
824                                 iov0->iov_len = PAGE_SIZE;
825                         } else if (len < 0) {
826                                 disp_iovec(mic, &copy, __func__, __LINE__);
827                                 mpsslog("%s %s %d read failed %s ",
828                                         mic->name, __func__, __LINE__,
829                                         strerror(errno));
830                                 mpsslog("cnt %d sum %zd\n",
831                                         copy.iovcnt, sum_iovec_len(&copy));
832                         }
833                 }
834
835                 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
836                         while (rx_vr.info->avail_idx !=
837                                 le16toh(rx_vr.vr.avail->idx)) {
838                                 copy.iov = iov1;
839                                 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
840                                              &copy, PAGE_SIZE);
841
842                                 err = mic_virtio_copy(mic,
843                                         mic->mic_console.virtio_console_fd,
844                                         &rx_vr, &copy);
845                                 if (!err) {
846                                         /* Set the correct output iov_len */
847                                         iov1->iov_len = copy.out_len;
848                                         verify_out_len(mic, &copy);
849 #ifdef DEBUG
850                                         disp_iovec(mic, copy, __func__,
851                                                    __LINE__);
852                                         mpsslog("%s %s %d ",
853                                                 mic->name, __func__, __LINE__);
854                                         mpsslog("read from net 0x%lx\n",
855                                                 sum_iovec_len(copy));
856 #endif
857                                         len = writev(pty_fd,
858                                                 copy.iov, copy.iovcnt);
859                                         if (len != sum_iovec_len(&copy)) {
860                                                 mpsslog("Tun write failed %s ",
861                                                         strerror(errno));
862                                                 mpsslog("len 0x%zx ", len);
863                                                 mpsslog("read_len 0x%zx\n",
864                                                         sum_iovec_len(&copy));
865                                         } else {
866 #ifdef DEBUG
867                                                 disp_iovec(mic, copy, __func__,
868                                                            __LINE__);
869                                                 mpsslog("%s %s %d ",
870                                                         mic->name, __func__,
871                                                         __LINE__);
872                                                 mpsslog("wrote to tap 0x%lx\n",
873                                                         len);
874 #endif
875                                         }
876                                 } else {
877                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
878                                                 mic->name, __func__, __LINE__,
879                                                 strerror(errno));
880                                         break;
881                                 }
882                         }
883                 }
884                 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
885                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
886         }
887 _close_pty:
888         close(pty_fd);
889 _return:
890         pthread_exit(NULL);
891 }
892
893 static void
894 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
895 {
896         char path[PATH_MAX];
897         int fd, err;
898
899         snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
900         fd = open(path, O_RDWR);
901         if (fd < 0) {
902                 mpsslog("Could not open %s %s\n", path, strerror(errno));
903                 return;
904         }
905
906         err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
907         if (err < 0) {
908                 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
909                 close(fd);
910                 return;
911         }
912         switch (dd->type) {
913         case VIRTIO_ID_NET:
914                 mic->mic_net.virtio_net_fd = fd;
915                 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
916                 break;
917         case VIRTIO_ID_CONSOLE:
918                 mic->mic_console.virtio_console_fd = fd;
919                 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
920                 break;
921         case VIRTIO_ID_BLOCK:
922                 mic->mic_virtblk.virtio_block_fd = fd;
923                 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
924                 break;
925         }
926 }
927
928 static bool
929 set_backend_file(struct mic_info *mic)
930 {
931         FILE *config;
932         char buff[PATH_MAX], *line, *evv, *p;
933
934         snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
935         config = fopen(buff, "r");
936         if (config == NULL)
937                 return false;
938         do {  /* look for "virtblk_backend=XXXX" */
939                 line = fgets(buff, PATH_MAX, config);
940                 if (line == NULL)
941                         break;
942                 if (*line == '#')
943                         continue;
944                 p = strchr(line, '\n');
945                 if (p)
946                         *p = '\0';
947         } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
948         fclose(config);
949         if (line == NULL)
950                 return false;
951         evv = strchr(line, '=');
952         if (evv == NULL)
953                 return false;
954         mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
955         if (mic->mic_virtblk.backend_file == NULL) {
956                 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
957                 return false;
958         }
959         strcpy(mic->mic_virtblk.backend_file, evv + 1);
960         return true;
961 }
962
963 #define SECTOR_SIZE 512
964 static bool
965 set_backend_size(struct mic_info *mic)
966 {
967         mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
968                 SEEK_END);
969         if (mic->mic_virtblk.backend_size < 0) {
970                 mpsslog("%s: can't seek: %s\n",
971                         mic->name, mic->mic_virtblk.backend_file);
972                 return false;
973         }
974         virtblk_dev_page.blk_config.capacity =
975                 mic->mic_virtblk.backend_size / SECTOR_SIZE;
976         if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
977                 virtblk_dev_page.blk_config.capacity++;
978
979         virtblk_dev_page.blk_config.capacity =
980                 htole64(virtblk_dev_page.blk_config.capacity);
981
982         return true;
983 }
984
985 static bool
986 open_backend(struct mic_info *mic)
987 {
988         if (!set_backend_file(mic))
989                 goto _error_exit;
990         mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
991         if (mic->mic_virtblk.backend < 0) {
992                 mpsslog("%s: can't open: %s\n", mic->name,
993                         mic->mic_virtblk.backend_file);
994                 goto _error_free;
995         }
996         if (!set_backend_size(mic))
997                 goto _error_close;
998         mic->mic_virtblk.backend_addr = mmap(NULL,
999                 mic->mic_virtblk.backend_size,
1000                 PROT_READ|PROT_WRITE, MAP_SHARED,
1001                 mic->mic_virtblk.backend, 0L);
1002         if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1003                 mpsslog("%s: can't map: %s %s\n",
1004                         mic->name, mic->mic_virtblk.backend_file,
1005                         strerror(errno));
1006                 goto _error_close;
1007         }
1008         return true;
1009
1010  _error_close:
1011         close(mic->mic_virtblk.backend);
1012  _error_free:
1013         free(mic->mic_virtblk.backend_file);
1014  _error_exit:
1015         return false;
1016 }
1017
1018 static void
1019 close_backend(struct mic_info *mic)
1020 {
1021         munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1022         close(mic->mic_virtblk.backend);
1023         free(mic->mic_virtblk.backend_file);
1024 }
1025
1026 static bool
1027 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1028 {
1029         if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1030                 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1031                         mic->name);
1032                 return false;
1033         }
1034         add_virtio_device(mic, &virtblk_dev_page.dd);
1035         if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1036                                   VIRTIO_ID_BLOCK, vring, NULL,
1037                                   virtblk_dev_page.dd.num_vq)) {
1038                 mpsslog("%s init_vr failed %s\n",
1039                         mic->name, strerror(errno));
1040                 return false;
1041         }
1042         return true;
1043 }
1044
1045 static void
1046 stop_virtblk(struct mic_info *mic)
1047 {
1048         int vr_size, ret;
1049
1050         vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1051                 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1052         ret = munmap(mic->mic_virtblk.block_dp,
1053                 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1054         if (ret < 0)
1055                 mpsslog("%s munmap errno %d\n", mic->name, errno);
1056         close(mic->mic_virtblk.virtio_block_fd);
1057 }
1058
1059 static __u8
1060 header_error_check(struct vring_desc *desc)
1061 {
1062         if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1063                 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1064                         __func__, __LINE__);
1065                 return -EIO;
1066         }
1067         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1068                 mpsslog("%s() %d: alone\n",
1069                         __func__, __LINE__);
1070                 return -EIO;
1071         }
1072         if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1073                 mpsslog("%s() %d: not read\n",
1074                         __func__, __LINE__);
1075                 return -EIO;
1076         }
1077         return 0;
1078 }
1079
1080 static int
1081 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1082 {
1083         struct iovec iovec;
1084         struct mic_copy_desc copy;
1085
1086         iovec.iov_len = sizeof(*hdr);
1087         iovec.iov_base = hdr;
1088         copy.iov = &iovec;
1089         copy.iovcnt = 1;
1090         copy.vr_idx = 0;  /* only one vring on virtio_block */
1091         copy.update_used = false;  /* do not update used index */
1092         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1093 }
1094
1095 static int
1096 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1097 {
1098         struct mic_copy_desc copy;
1099
1100         copy.iov = iovec;
1101         copy.iovcnt = iovcnt;
1102         copy.vr_idx = 0;  /* only one vring on virtio_block */
1103         copy.update_used = false;  /* do not update used index */
1104         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1105 }
1106
1107 static __u8
1108 status_error_check(struct vring_desc *desc)
1109 {
1110         if (le32toh(desc->len) != sizeof(__u8)) {
1111                 mpsslog("%s() %d: length is not sizeof(status)\n",
1112                         __func__, __LINE__);
1113                 return -EIO;
1114         }
1115         return 0;
1116 }
1117
1118 static int
1119 write_status(int fd, __u8 *status)
1120 {
1121         struct iovec iovec;
1122         struct mic_copy_desc copy;
1123
1124         iovec.iov_base = status;
1125         iovec.iov_len = sizeof(*status);
1126         copy.iov = &iovec;
1127         copy.iovcnt = 1;
1128         copy.vr_idx = 0;  /* only one vring on virtio_block */
1129         copy.update_used = true; /* Update used index */
1130         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1131 }
1132
1133 static void *
1134 virtio_block(void *arg)
1135 {
1136         struct mic_info *mic = (struct mic_info *)arg;
1137         int ret;
1138         struct pollfd block_poll;
1139         struct mic_vring vring;
1140         __u16 avail_idx;
1141         __u32 desc_idx;
1142         struct vring_desc *desc;
1143         struct iovec *iovec, *piov;
1144         __u8 status;
1145         __u32 buffer_desc_idx;
1146         struct virtio_blk_outhdr hdr;
1147         void *fos;
1148
1149         for (;;) {  /* forever */
1150                 if (!open_backend(mic)) { /* No virtblk */
1151                         for (mic->mic_virtblk.signaled = 0;
1152                                 !mic->mic_virtblk.signaled;)
1153                                 sleep(1);
1154                         continue;
1155                 }
1156
1157                 /* backend file is specified. */
1158                 if (!start_virtblk(mic, &vring))
1159                         goto _close_backend;
1160                 iovec = malloc(sizeof(*iovec) *
1161                         le32toh(virtblk_dev_page.blk_config.seg_max));
1162                 if (!iovec) {
1163                         mpsslog("%s: can't alloc iovec: %s\n",
1164                                 mic->name, strerror(ENOMEM));
1165                         goto _stop_virtblk;
1166                 }
1167
1168                 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1169                 block_poll.events = POLLIN;
1170                 for (mic->mic_virtblk.signaled = 0;
1171                      !mic->mic_virtblk.signaled;) {
1172                         block_poll.revents = 0;
1173                                         /* timeout in 1 sec to see signaled */
1174                         ret = poll(&block_poll, 1, 1000);
1175                         if (ret < 0) {
1176                                 mpsslog("%s %d: poll failed: %s\n",
1177                                         __func__, __LINE__,
1178                                         strerror(errno));
1179                                 continue;
1180                         }
1181
1182                         if (!(block_poll.revents & POLLIN)) {
1183 #ifdef DEBUG
1184                                 mpsslog("%s %d: block_poll.revents=0x%x\n",
1185                                         __func__, __LINE__, block_poll.revents);
1186 #endif
1187                                 continue;
1188                         }
1189
1190                         /* POLLIN */
1191                         while (vring.info->avail_idx !=
1192                                 le16toh(vring.vr.avail->idx)) {
1193                                 /* read header element */
1194                                 avail_idx =
1195                                         vring.info->avail_idx &
1196                                         (vring.vr.num - 1);
1197                                 desc_idx = le16toh(
1198                                         vring.vr.avail->ring[avail_idx]);
1199                                 desc = &vring.vr.desc[desc_idx];
1200 #ifdef DEBUG
1201                                 mpsslog("%s() %d: avail_idx=%d ",
1202                                         __func__, __LINE__,
1203                                         vring.info->avail_idx);
1204                                 mpsslog("vring.vr.num=%d desc=%p\n",
1205                                         vring.vr.num, desc);
1206 #endif
1207                                 status = header_error_check(desc);
1208                                 ret = read_header(
1209                                         mic->mic_virtblk.virtio_block_fd,
1210                                         &hdr, desc_idx);
1211                                 if (ret < 0) {
1212                                         mpsslog("%s() %d %s: ret=%d %s\n",
1213                                                 __func__, __LINE__,
1214                                                 mic->name, ret,
1215                                                 strerror(errno));
1216                                         break;
1217                                 }
1218                                 /* buffer element */
1219                                 piov = iovec;
1220                                 status = 0;
1221                                 fos = mic->mic_virtblk.backend_addr +
1222                                         (hdr.sector * SECTOR_SIZE);
1223                                 buffer_desc_idx = next_desc(desc);
1224                                 desc_idx = buffer_desc_idx;
1225                                 for (desc = &vring.vr.desc[buffer_desc_idx];
1226                                      desc->flags & VRING_DESC_F_NEXT;
1227                                      desc_idx = next_desc(desc),
1228                                              desc = &vring.vr.desc[desc_idx]) {
1229                                         piov->iov_len = desc->len;
1230                                         piov->iov_base = fos;
1231                                         piov++;
1232                                         fos += desc->len;
1233                                 }
1234                                 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1235                                 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1236                                         VIRTIO_BLK_T_GET_ID)) {
1237                                         /*
1238                                           VIRTIO_BLK_T_IN - does not do
1239                                           anything. Probably for documenting.
1240                                           VIRTIO_BLK_T_SCSI_CMD - for
1241                                           virtio_scsi.
1242                                           VIRTIO_BLK_T_FLUSH - turned off in
1243                                           config space.
1244                                           VIRTIO_BLK_T_BARRIER - defined but not
1245                                           used in anywhere.
1246                                         */
1247                                         mpsslog("%s() %d: type %x ",
1248                                                 __func__, __LINE__,
1249                                                 hdr.type);
1250                                         mpsslog("is not supported\n");
1251                                         status = -ENOTSUP;
1252
1253                                 } else {
1254                                         ret = transfer_blocks(
1255                                         mic->mic_virtblk.virtio_block_fd,
1256                                                 iovec,
1257                                                 piov - iovec);
1258                                         if (ret < 0 &&
1259                                             status != 0)
1260                                                 status = ret;
1261                                 }
1262                                 /* write status and update used pointer */
1263                                 if (status != 0)
1264                                         status = status_error_check(desc);
1265                                 ret = write_status(
1266                                         mic->mic_virtblk.virtio_block_fd,
1267                                         &status);
1268 #ifdef DEBUG
1269                                 mpsslog("%s() %d: write status=%d on desc=%p\n",
1270                                         __func__, __LINE__,
1271                                         status, desc);
1272 #endif
1273                         }
1274                 }
1275                 free(iovec);
1276 _stop_virtblk:
1277                 stop_virtblk(mic);
1278 _close_backend:
1279                 close_backend(mic);
1280         }  /* forever */
1281
1282         pthread_exit(NULL);
1283 }
1284
1285 static void
1286 reset(struct mic_info *mic)
1287 {
1288 #define RESET_TIMEOUT 120
1289         int i = RESET_TIMEOUT;
1290         setsysfs(mic->name, "state", "reset");
1291         while (i) {
1292                 char *state;
1293                 state = readsysfs(mic->name, "state");
1294                 if (!state)
1295                         goto retry;
1296                 mpsslog("%s: %s %d state %s\n",
1297                         mic->name, __func__, __LINE__, state);
1298
1299                 /*
1300                  * If the shutdown was initiated by OSPM, the state stays
1301                  * in "suspended" which is also a valid condition for reset.
1302                  */
1303                 if ((!strcmp(state, "offline")) ||
1304                     (!strcmp(state, "suspended"))) {
1305                         free(state);
1306                         break;
1307                 }
1308                 free(state);
1309 retry:
1310                 sleep(1);
1311                 i--;
1312         }
1313 }
1314
1315 static int
1316 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1317 {
1318         if (!strcmp(shutdown_status, "nop"))
1319                 return MIC_NOP;
1320         if (!strcmp(shutdown_status, "crashed"))
1321                 return MIC_CRASHED;
1322         if (!strcmp(shutdown_status, "halted"))
1323                 return MIC_HALTED;
1324         if (!strcmp(shutdown_status, "poweroff"))
1325                 return MIC_POWER_OFF;
1326         if (!strcmp(shutdown_status, "restart"))
1327                 return MIC_RESTART;
1328         mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1329         /* Invalid state */
1330         assert(0);
1331 };
1332
1333 static int get_mic_state(struct mic_info *mic, char *state)
1334 {
1335         if (!strcmp(state, "offline"))
1336                 return MIC_OFFLINE;
1337         if (!strcmp(state, "online"))
1338                 return MIC_ONLINE;
1339         if (!strcmp(state, "shutting_down"))
1340                 return MIC_SHUTTING_DOWN;
1341         if (!strcmp(state, "reset_failed"))
1342                 return MIC_RESET_FAILED;
1343         if (!strcmp(state, "suspending"))
1344                 return MIC_SUSPENDING;
1345         if (!strcmp(state, "suspended"))
1346                 return MIC_SUSPENDED;
1347         mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1348         /* Invalid state */
1349         assert(0);
1350 };
1351
1352 static void mic_handle_shutdown(struct mic_info *mic)
1353 {
1354 #define SHUTDOWN_TIMEOUT 60
1355         int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1356         char *shutdown_status;
1357         while (i) {
1358                 shutdown_status = readsysfs(mic->name, "shutdown_status");
1359                 if (!shutdown_status)
1360                         continue;
1361                 mpsslog("%s: %s %d shutdown_status %s\n",
1362                         mic->name, __func__, __LINE__, shutdown_status);
1363                 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1364                 case MIC_RESTART:
1365                         mic->restart = 1;
1366                 case MIC_HALTED:
1367                 case MIC_POWER_OFF:
1368                 case MIC_CRASHED:
1369                         free(shutdown_status);
1370                         goto reset;
1371                 default:
1372                         break;
1373                 }
1374                 free(shutdown_status);
1375                 sleep(1);
1376                 i--;
1377         }
1378 reset:
1379         ret = kill(mic->pid, SIGTERM);
1380         mpsslog("%s: %s %d kill pid %d ret %d\n",
1381                 mic->name, __func__, __LINE__,
1382                 mic->pid, ret);
1383         if (!ret) {
1384                 ret = waitpid(mic->pid, &stat,
1385                         WIFSIGNALED(stat));
1386                 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1387                         mic->name, __func__, __LINE__,
1388                         ret, mic->pid);
1389         }
1390         if (ret == mic->pid)
1391                 reset(mic);
1392 }
1393
1394 static void *
1395 mic_config(void *arg)
1396 {
1397         struct mic_info *mic = (struct mic_info *)arg;
1398         char *state = NULL;
1399         char pathname[PATH_MAX];
1400         int fd, ret;
1401         struct pollfd ufds[1];
1402         char value[4096];
1403
1404         snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1405                  MICSYSFSDIR, mic->name, "state");
1406
1407         fd = open(pathname, O_RDONLY);
1408         if (fd < 0) {
1409                 mpsslog("%s: opening file %s failed %s\n",
1410                         mic->name, pathname, strerror(errno));
1411                 goto error;
1412         }
1413
1414         do {
1415                 ret = read(fd, value, sizeof(value));
1416                 if (ret < 0) {
1417                         mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1418                                 mic->name, pathname, strerror(errno));
1419                         goto close_error1;
1420                 }
1421 retry:
1422                 state = readsysfs(mic->name, "state");
1423                 if (!state)
1424                         goto retry;
1425                 mpsslog("%s: %s %d state %s\n",
1426                         mic->name, __func__, __LINE__, state);
1427                 switch (get_mic_state(mic, state)) {
1428                 case MIC_SHUTTING_DOWN:
1429                         mic_handle_shutdown(mic);
1430                         goto close_error;
1431                 case MIC_SUSPENDING:
1432                         mic->boot_on_resume = 1;
1433                         setsysfs(mic->name, "state", "suspend");
1434                         mic_handle_shutdown(mic);
1435                         goto close_error;
1436                 case MIC_OFFLINE:
1437                         if (mic->boot_on_resume) {
1438                                 setsysfs(mic->name, "state", "boot");
1439                                 mic->boot_on_resume = 0;
1440                         }
1441                         break;
1442                 default:
1443                         break;
1444                 }
1445                 free(state);
1446
1447                 ufds[0].fd = fd;
1448                 ufds[0].events = POLLERR | POLLPRI;
1449                 ret = poll(ufds, 1, -1);
1450                 if (ret < 0) {
1451                         mpsslog("%s: poll failed %s\n",
1452                                 mic->name, strerror(errno));
1453                         goto close_error1;
1454                 }
1455         } while (1);
1456 close_error:
1457         free(state);
1458 close_error1:
1459         close(fd);
1460 error:
1461         init_mic(mic);
1462         pthread_exit(NULL);
1463 }
1464
1465 static void
1466 set_cmdline(struct mic_info *mic)
1467 {
1468         char buffer[PATH_MAX];
1469         int len;
1470
1471         len = snprintf(buffer, PATH_MAX,
1472                 "clocksource=tsc highres=off nohz=off ");
1473         len += snprintf(buffer + len, PATH_MAX,
1474                 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1475         len += snprintf(buffer + len, PATH_MAX,
1476                 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1477                 mic->id);
1478
1479         setsysfs(mic->name, "cmdline", buffer);
1480         mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1481         snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1482         mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1483 }
1484
1485 static void
1486 set_log_buf_info(struct mic_info *mic)
1487 {
1488         int fd;
1489         off_t len;
1490         char system_map[] = "/lib/firmware/mic/System.map";
1491         char *map, *temp, log_buf[17] = {'\0'};
1492
1493         fd = open(system_map, O_RDONLY);
1494         if (fd < 0) {
1495                 mpsslog("%s: Opening System.map failed: %d\n",
1496                         mic->name, errno);
1497                 return;
1498         }
1499         len = lseek(fd, 0, SEEK_END);
1500         if (len < 0) {
1501                 mpsslog("%s: Reading System.map size failed: %d\n",
1502                         mic->name, errno);
1503                 close(fd);
1504                 return;
1505         }
1506         map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1507         if (map == MAP_FAILED) {
1508                 mpsslog("%s: mmap of System.map failed: %d\n",
1509                         mic->name, errno);
1510                 close(fd);
1511                 return;
1512         }
1513         temp = strstr(map, "__log_buf");
1514         if (!temp) {
1515                 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1516                 munmap(map, len);
1517                 close(fd);
1518                 return;
1519         }
1520         strncpy(log_buf, temp - 19, 16);
1521         setsysfs(mic->name, "log_buf_addr", log_buf);
1522         mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1523         temp = strstr(map, "log_buf_len");
1524         if (!temp) {
1525                 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1526                 munmap(map, len);
1527                 close(fd);
1528                 return;
1529         }
1530         strncpy(log_buf, temp - 19, 16);
1531         setsysfs(mic->name, "log_buf_len", log_buf);
1532         mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1533         munmap(map, len);
1534         close(fd);
1535 }
1536
1537 static void init_mic(struct mic_info *mic);
1538
1539 static void
1540 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1541 {
1542         struct mic_info *mic;
1543
1544         for (mic = mic_list.next; mic != NULL; mic = mic->next)
1545                 mic->mic_virtblk.signaled = 1/* true */;
1546 }
1547
1548 static void
1549 init_mic(struct mic_info *mic)
1550 {
1551         struct sigaction ignore = {
1552                 .sa_flags = 0,
1553                 .sa_handler = SIG_IGN
1554         };
1555         struct sigaction act = {
1556                 .sa_flags = SA_SIGINFO,
1557                 .sa_sigaction = change_virtblk_backend,
1558         };
1559         char buffer[PATH_MAX];
1560         int err;
1561
1562         /*
1563          * Currently, one virtio block device is supported for each MIC card
1564          * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1565          * The signal informs the virtio block backend about a change in the
1566          * configuration file which specifies the virtio backend file name on
1567          * the host. Virtio block backend then re-reads the configuration file
1568          * and switches to the new block device. This signalling mechanism may
1569          * not be required once multiple virtio block devices are supported by
1570          * the MIC daemon.
1571          */
1572         sigaction(SIGUSR1, &ignore, NULL);
1573
1574         mic->pid = fork();
1575         switch (mic->pid) {
1576         case 0:
1577                 set_log_buf_info(mic);
1578                 set_cmdline(mic);
1579                 add_virtio_device(mic, &virtcons_dev_page.dd);
1580                 add_virtio_device(mic, &virtnet_dev_page.dd);
1581                 err = pthread_create(&mic->mic_console.console_thread, NULL,
1582                         virtio_console, mic);
1583                 if (err)
1584                         mpsslog("%s virtcons pthread_create failed %s\n",
1585                                 mic->name, strerror(err));
1586                 err = pthread_create(&mic->mic_net.net_thread, NULL,
1587                         virtio_net, mic);
1588                 if (err)
1589                         mpsslog("%s virtnet pthread_create failed %s\n",
1590                                 mic->name, strerror(err));
1591                 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1592                         virtio_block, mic);
1593                 if (err)
1594                         mpsslog("%s virtblk pthread_create failed %s\n",
1595                                 mic->name, strerror(err));
1596                 sigemptyset(&act.sa_mask);
1597                 err = sigaction(SIGUSR1, &act, NULL);
1598                 if (err)
1599                         mpsslog("%s sigaction SIGUSR1 failed %s\n",
1600                                 mic->name, strerror(errno));
1601                 while (1)
1602                         sleep(60);
1603         case -1:
1604                 mpsslog("fork failed MIC name %s id %d errno %d\n",
1605                         mic->name, mic->id, errno);
1606                 break;
1607         default:
1608                 if (mic->restart) {
1609                         snprintf(buffer, PATH_MAX, "boot");
1610                         setsysfs(mic->name, "state", buffer);
1611                         mpsslog("%s restarting mic %d\n",
1612                                 mic->name, mic->restart);
1613                         mic->restart = 0;
1614                 }
1615                 pthread_create(&mic->config_thread, NULL, mic_config, mic);
1616         }
1617 }
1618
1619 static void
1620 start_daemon(void)
1621 {
1622         struct mic_info *mic;
1623
1624         for (mic = mic_list.next; mic != NULL; mic = mic->next)
1625                 init_mic(mic);
1626
1627         while (1)
1628                 sleep(60);
1629 }
1630
1631 static int
1632 init_mic_list(void)
1633 {
1634         struct mic_info *mic = &mic_list;
1635         struct dirent *file;
1636         DIR *dp;
1637         int cnt = 0;
1638
1639         dp = opendir(MICSYSFSDIR);
1640         if (!dp)
1641                 return 0;
1642
1643         while ((file = readdir(dp)) != NULL) {
1644                 if (!strncmp(file->d_name, "mic", 3)) {
1645                         mic->next = calloc(1, sizeof(struct mic_info));
1646                         if (mic->next) {
1647                                 mic = mic->next;
1648                                 mic->id = atoi(&file->d_name[3]);
1649                                 mic->name = malloc(strlen(file->d_name) + 16);
1650                                 if (mic->name)
1651                                         strcpy(mic->name, file->d_name);
1652                                 mpsslog("MIC name %s id %d\n", mic->name,
1653                                         mic->id);
1654                                 cnt++;
1655                         }
1656                 }
1657         }
1658
1659         closedir(dp);
1660         return cnt;
1661 }
1662
1663 void
1664 mpsslog(char *format, ...)
1665 {
1666         va_list args;
1667         char buffer[4096];
1668         char ts[52], *ts1;
1669         time_t t;
1670
1671         if (logfp == NULL)
1672                 return;
1673
1674         va_start(args, format);
1675         vsprintf(buffer, format, args);
1676         va_end(args);
1677
1678         time(&t);
1679         ts1 = ctime_r(&t, ts);
1680         ts1[strlen(ts1) - 1] = '\0';
1681         fprintf(logfp, "%s: %s", ts1, buffer);
1682
1683         fflush(logfp);
1684 }
1685
1686 int
1687 main(int argc, char *argv[])
1688 {
1689         int cnt;
1690         pid_t pid;
1691
1692         myname = argv[0];
1693
1694         logfp = fopen(LOGFILE_NAME, "a+");
1695         if (!logfp) {
1696                 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1697                 exit(1);
1698         }
1699         pid = fork();
1700         switch (pid) {
1701         case 0:
1702                 break;
1703         case -1:
1704                 exit(2);
1705         default:
1706                 exit(0);
1707         }
1708
1709         mpsslog("MIC Daemon start\n");
1710
1711         cnt = init_mic_list();
1712         if (cnt == 0) {
1713                 mpsslog("MIC module not loaded\n");
1714                 exit(3);
1715         }
1716         mpsslog("MIC found %d devices\n", cnt);
1717
1718         start_daemon();
1719
1720         exit(0);
1721 }