xref: /qemu/nbd/client.c (revision 954a6c4f7862b45617ff3b65609f0f290dcd5077)
1  /*
2   *  Copyright (C) 2016-2019 Red Hat, Inc.
3   *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
4   *
5   *  Network Block Device Client Side
6   *
7   *  This program is free software; you can redistribute it and/or modify
8   *  it under the terms of the GNU General Public License as published by
9   *  the Free Software Foundation; under version 2 of the License.
10   *
11   *  This program is distributed in the hope that it will be useful,
12   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   *  GNU General Public License for more details.
15   *
16   *  You should have received a copy of the GNU General Public License
17   *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include "qapi/error.h"
22  #include "qemu/queue.h"
23  #include "trace.h"
24  #include "nbd-internal.h"
25  #include "qemu/cutils.h"
26  
27  /* Definitions for opaque data types */
28  
29  static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
30  
31  /* That's all folks */
32  
33  /* Basic flow for negotiation
34  
35     Server         Client
36     Negotiate
37  
38     or
39  
40     Server         Client
41     Negotiate #1
42                    Option
43     Negotiate #2
44  
45     ----
46  
47     followed by
48  
49     Server         Client
50                    Request
51     Response
52                    Request
53     Response
54                    ...
55     ...
56                    Request (type == 2)
57  
58  */
59  
60  /* Send an option request.
61   *
62   * The request is for option @opt, with @data containing @len bytes of
63   * additional payload for the request (@len may be -1 to treat @data as
64   * a C string; and @data may be NULL if @len is 0).
65   * Return 0 if successful, -1 with errp set if it is impossible to
66   * continue. */
67  static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
68                                     uint32_t len, const char *data,
69                                     Error **errp)
70  {
71      ERRP_GUARD();
72      NBDOption req;
73      QEMU_BUILD_BUG_ON(sizeof(req) != 16);
74  
75      if (len == -1) {
76          req.length = len = strlen(data);
77      }
78      trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len);
79  
80      stq_be_p(&req.magic, NBD_OPTS_MAGIC);
81      stl_be_p(&req.option, opt);
82      stl_be_p(&req.length, len);
83  
84      if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
85          error_prepend(errp, "Failed to send option request header: ");
86          return -1;
87      }
88  
89      if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
90          error_prepend(errp, "Failed to send option request data: ");
91          return -1;
92      }
93  
94      return 0;
95  }
96  
97  /* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
98   * not going to attempt further negotiation. */
99  static void nbd_send_opt_abort(QIOChannel *ioc)
100  {
101      /* Technically, a compliant server is supposed to reply to us; but
102       * older servers disconnected instead. At any rate, we're allowed
103       * to disconnect without waiting for the server reply, so we don't
104       * even care if the request makes it to the server, let alone
105       * waiting around for whether the server replies. */
106      nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
107  }
108  
109  
110  /* Receive the header of an option reply, which should match the given
111   * opt.  Read through the length field, but NOT the length bytes of
112   * payload. Return 0 if successful, -1 with errp set if it is
113   * impossible to continue. */
114  static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
115                                      NBDOptionReply *reply, Error **errp)
116  {
117      QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
118      if (nbd_read(ioc, reply, sizeof(*reply), "option reply", errp) < 0) {
119          nbd_send_opt_abort(ioc);
120          return -1;
121      }
122      reply->magic = be64_to_cpu(reply->magic);
123      reply->option = be32_to_cpu(reply->option);
124      reply->type = be32_to_cpu(reply->type);
125      reply->length = be32_to_cpu(reply->length);
126  
127      trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
128                                     reply->type, nbd_rep_lookup(reply->type),
129                                     reply->length);
130  
131      if (reply->magic != NBD_REP_MAGIC) {
132          error_setg(errp, "Unexpected option reply magic");
133          nbd_send_opt_abort(ioc);
134          return -1;
135      }
136      if (reply->option != opt) {
137          error_setg(errp, "Unexpected option type %u (%s), expected %u (%s)",
138                     reply->option, nbd_opt_lookup(reply->option),
139                     opt, nbd_opt_lookup(opt));
140          nbd_send_opt_abort(ioc);
141          return -1;
142      }
143      return 0;
144  }
145  
146  /*
147   * If reply represents success, return 1 without further action.  If
148   * reply represents an error, consume the optional payload of the
149   * packet on ioc.  Then return 0 for unsupported (so the client can
150   * fall back to other approaches), where @strict determines if only
151   * ERR_UNSUP or all errors fit that category, or -1 with errp set for
152   * other errors.
153   */
154  static int nbd_handle_reply_err(QIOChannel *ioc, NBDOptionReply *reply,
155                                  bool strict, Error **errp)
156  {
157      ERRP_GUARD();
158      g_autofree char *msg = NULL;
159  
160      if (!(reply->type & (1 << 31))) {
161          return 1;
162      }
163  
164      if (reply->length) {
165          if (reply->length > NBD_MAX_BUFFER_SIZE) {
166              error_setg(errp, "server error %" PRIu32
167                         " (%s) message is too long",
168                         reply->type, nbd_rep_lookup(reply->type));
169              goto err;
170          }
171          msg = g_malloc(reply->length + 1);
172          if (nbd_read(ioc, msg, reply->length, NULL, errp) < 0) {
173              error_prepend(errp, "Failed to read option error %" PRIu32
174                            " (%s) message: ",
175                            reply->type, nbd_rep_lookup(reply->type));
176              goto err;
177          }
178          msg[reply->length] = '\0';
179          trace_nbd_server_error_msg(reply->type,
180                                     nbd_reply_type_lookup(reply->type), msg);
181      }
182  
183      if (reply->type == NBD_REP_ERR_UNSUP || !strict) {
184          trace_nbd_reply_err_ignored(reply->option,
185                                      nbd_opt_lookup(reply->option),
186                                      reply->type, nbd_rep_lookup(reply->type));
187          return 0;
188      }
189  
190      switch (reply->type) {
191      case NBD_REP_ERR_POLICY:
192          error_setg(errp, "Denied by server for option %" PRIu32 " (%s)",
193                     reply->option, nbd_opt_lookup(reply->option));
194          break;
195  
196      case NBD_REP_ERR_INVALID:
197          error_setg(errp, "Invalid parameters for option %" PRIu32 " (%s)",
198                     reply->option, nbd_opt_lookup(reply->option));
199          break;
200  
201      case NBD_REP_ERR_PLATFORM:
202          error_setg(errp, "Server lacks support for option %" PRIu32 " (%s)",
203                     reply->option, nbd_opt_lookup(reply->option));
204          break;
205  
206      case NBD_REP_ERR_TLS_REQD:
207          error_setg(errp, "TLS negotiation required before option %" PRIu32
208                     " (%s)", reply->option, nbd_opt_lookup(reply->option));
209          error_append_hint(errp, "Did you forget a valid tls-creds?\n");
210          break;
211  
212      case NBD_REP_ERR_UNKNOWN:
213          error_setg(errp, "Requested export not available");
214          break;
215  
216      case NBD_REP_ERR_SHUTDOWN:
217          error_setg(errp, "Server shutting down before option %" PRIu32 " (%s)",
218                     reply->option, nbd_opt_lookup(reply->option));
219          break;
220  
221      case NBD_REP_ERR_BLOCK_SIZE_REQD:
222          error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIu32
223                     " (%s)", reply->option, nbd_opt_lookup(reply->option));
224          break;
225  
226      default:
227          error_setg(errp, "Unknown error code when asking for option %" PRIu32
228                     " (%s)", reply->option, nbd_opt_lookup(reply->option));
229          break;
230      }
231  
232      if (msg) {
233          error_append_hint(errp, "server reported: %s\n", msg);
234      }
235  
236   err:
237      nbd_send_opt_abort(ioc);
238      return -1;
239  }
240  
241  /* nbd_receive_list:
242   * Process another portion of the NBD_OPT_LIST reply, populating any
243   * name received into *@name. If @description is non-NULL, and the
244   * server provided a description, that is also populated. The caller
245   * must eventually call g_free() on success.
246   * Returns 1 if name and description were set and iteration must continue,
247   *         0 if iteration is complete (including if OPT_LIST unsupported),
248   *         -1 with @errp set if an unrecoverable error occurred.
249   */
250  static int nbd_receive_list(QIOChannel *ioc, char **name, char **description,
251                              Error **errp)
252  {
253      NBDOptionReply reply;
254      uint32_t len;
255      uint32_t namelen;
256      g_autofree char *local_name = NULL;
257      g_autofree char *local_desc = NULL;
258      int error;
259  
260      if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
261          return -1;
262      }
263      error = nbd_handle_reply_err(ioc, &reply, true, errp);
264      if (error <= 0) {
265          return error;
266      }
267      len = reply.length;
268  
269      if (reply.type == NBD_REP_ACK) {
270          if (len != 0) {
271              error_setg(errp, "length too long for option end");
272              nbd_send_opt_abort(ioc);
273              return -1;
274          }
275          return 0;
276      } else if (reply.type != NBD_REP_SERVER) {
277          error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
278                     reply.type, nbd_rep_lookup(reply.type),
279                     NBD_REP_SERVER, nbd_rep_lookup(NBD_REP_SERVER));
280          nbd_send_opt_abort(ioc);
281          return -1;
282      }
283  
284      if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
285          error_setg(errp, "incorrect option length %" PRIu32, len);
286          nbd_send_opt_abort(ioc);
287          return -1;
288      }
289      if (nbd_read32(ioc, &namelen, "option name length", errp) < 0) {
290          nbd_send_opt_abort(ioc);
291          return -1;
292      }
293      len -= sizeof(namelen);
294      if (len < namelen || namelen > NBD_MAX_STRING_SIZE) {
295          error_setg(errp, "incorrect name length in server's list response");
296          nbd_send_opt_abort(ioc);
297          return -1;
298      }
299  
300      local_name = g_malloc(namelen + 1);
301      if (nbd_read(ioc, local_name, namelen, "export name", errp) < 0) {
302          nbd_send_opt_abort(ioc);
303          return -1;
304      }
305      local_name[namelen] = '\0';
306      len -= namelen;
307      if (len) {
308          if (len > NBD_MAX_STRING_SIZE) {
309              error_setg(errp, "incorrect description length in server's "
310                         "list response");
311              nbd_send_opt_abort(ioc);
312              return -1;
313          }
314          local_desc = g_malloc(len + 1);
315          if (nbd_read(ioc, local_desc, len, "export description", errp) < 0) {
316              nbd_send_opt_abort(ioc);
317              return -1;
318          }
319          local_desc[len] = '\0';
320      }
321  
322      trace_nbd_receive_list(local_name, local_desc ?: "");
323      *name = g_steal_pointer(&local_name);
324      if (description) {
325          *description = g_steal_pointer(&local_desc);
326      }
327      return 1;
328  }
329  
330  
331  /*
332   * nbd_opt_info_or_go:
333   * Send option for NBD_OPT_INFO or NBD_OPT_GO and parse the reply.
334   * Returns -1 if the option proves the export @info->name cannot be
335   * used, 0 if the option is unsupported (fall back to NBD_OPT_LIST and
336   * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to
337   * go (with the rest of @info populated).
338   */
339  static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
340                                NBDExportInfo *info, Error **errp)
341  {
342      ERRP_GUARD();
343      NBDOptionReply reply;
344      uint32_t len = strlen(info->name);
345      uint16_t type;
346      int error;
347      char *buf;
348  
349      /* The protocol requires that the server send NBD_INFO_EXPORT with
350       * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so
351       * flags still 0 is a witness of a broken server. */
352      info->flags = 0;
353  
354      assert(opt == NBD_OPT_GO || opt == NBD_OPT_INFO);
355      trace_nbd_opt_info_go_start(nbd_opt_lookup(opt), info->name);
356      buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
357      stl_be_p(buf, len);
358      memcpy(buf + 4, info->name, len);
359      /* At most one request, everything else up to server */
360      stw_be_p(buf + 4 + len, info->request_sizes);
361      if (info->request_sizes) {
362          stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
363      }
364      error = nbd_send_option_request(ioc, opt,
365                                      4 + len + 2 + 2 * info->request_sizes,
366                                      buf, errp);
367      g_free(buf);
368      if (error < 0) {
369          return -1;
370      }
371  
372      while (1) {
373          if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
374              return -1;
375          }
376          error = nbd_handle_reply_err(ioc, &reply, true, errp);
377          if (error <= 0) {
378              return error;
379          }
380          len = reply.length;
381  
382          if (reply.type == NBD_REP_ACK) {
383              /*
384               * Server is done sending info, and moved into transmission
385               * phase for NBD_OPT_GO, but make sure it sent flags
386               */
387              if (len) {
388                  error_setg(errp, "server sent invalid NBD_REP_ACK");
389                  return -1;
390              }
391              if (!info->flags) {
392                  error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
393                  return -1;
394              }
395              trace_nbd_opt_info_go_success(nbd_opt_lookup(opt));
396              return 1;
397          }
398          if (reply.type != NBD_REP_INFO) {
399              error_setg(errp, "unexpected reply type %u (%s), expected %u (%s)",
400                         reply.type, nbd_rep_lookup(reply.type),
401                         NBD_REP_INFO, nbd_rep_lookup(NBD_REP_INFO));
402              nbd_send_opt_abort(ioc);
403              return -1;
404          }
405          if (len < sizeof(type)) {
406              error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short",
407                         len);
408              nbd_send_opt_abort(ioc);
409              return -1;
410          }
411          if (nbd_read16(ioc, &type, "info type", errp) < 0) {
412              nbd_send_opt_abort(ioc);
413              return -1;
414          }
415          len -= sizeof(type);
416          switch (type) {
417          case NBD_INFO_EXPORT:
418              if (len != sizeof(info->size) + sizeof(info->flags)) {
419                  error_setg(errp, "remaining export info len %" PRIu32
420                             " is unexpected size", len);
421                  nbd_send_opt_abort(ioc);
422                  return -1;
423              }
424              if (nbd_read64(ioc, &info->size, "info size", errp) < 0) {
425                  nbd_send_opt_abort(ioc);
426                  return -1;
427              }
428              if (nbd_read16(ioc, &info->flags, "info flags", errp) < 0) {
429                  nbd_send_opt_abort(ioc);
430                  return -1;
431              }
432              if (info->min_block &&
433                  !QEMU_IS_ALIGNED(info->size, info->min_block)) {
434                  error_setg(errp, "export size %" PRIu64 " is not multiple of "
435                             "minimum block size %" PRIu32, info->size,
436                             info->min_block);
437                  nbd_send_opt_abort(ioc);
438                  return -1;
439              }
440              trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
441              break;
442  
443          case NBD_INFO_BLOCK_SIZE:
444              if (len != sizeof(info->min_block) * 3) {
445                  error_setg(errp, "remaining export info len %" PRIu32
446                             " is unexpected size", len);
447                  nbd_send_opt_abort(ioc);
448                  return -1;
449              }
450              if (nbd_read32(ioc, &info->min_block, "info minimum block size",
451                             errp) < 0) {
452                  nbd_send_opt_abort(ioc);
453                  return -1;
454              }
455              if (!is_power_of_2(info->min_block)) {
456                  error_setg(errp, "server minimum block size %" PRIu32
457                             " is not a power of two", info->min_block);
458                  nbd_send_opt_abort(ioc);
459                  return -1;
460              }
461              if (nbd_read32(ioc, &info->opt_block, "info preferred block size",
462                             errp) < 0)
463              {
464                  nbd_send_opt_abort(ioc);
465                  return -1;
466              }
467              if (!is_power_of_2(info->opt_block) ||
468                  info->opt_block < info->min_block) {
469                  error_setg(errp, "server preferred block size %" PRIu32
470                             " is not valid", info->opt_block);
471                  nbd_send_opt_abort(ioc);
472                  return -1;
473              }
474              if (nbd_read32(ioc, &info->max_block, "info maximum block size",
475                             errp) < 0)
476              {
477                  nbd_send_opt_abort(ioc);
478                  return -1;
479              }
480              if (info->max_block < info->min_block) {
481                  error_setg(errp, "server maximum block size %" PRIu32
482                             " is not valid", info->max_block);
483                  nbd_send_opt_abort(ioc);
484                  return -1;
485              }
486              trace_nbd_opt_info_block_size(info->min_block, info->opt_block,
487                                            info->max_block);
488              break;
489  
490          default:
491              /*
492               * Not worth the bother to check if NBD_INFO_NAME or
493               * NBD_INFO_DESCRIPTION exceed NBD_MAX_STRING_SIZE.
494               */
495              trace_nbd_opt_info_unknown(type, nbd_info_lookup(type));
496              if (nbd_drop(ioc, len, errp) < 0) {
497                  error_prepend(errp, "Failed to read info payload: ");
498                  nbd_send_opt_abort(ioc);
499                  return -1;
500              }
501              break;
502          }
503      }
504  }
505  
506  /* Return -1 on failure, 0 if wantname is an available export. */
507  static int nbd_receive_query_exports(QIOChannel *ioc,
508                                       const char *wantname,
509                                       Error **errp)
510  {
511      bool list_empty = true;
512      bool found_export = false;
513  
514      trace_nbd_receive_query_exports_start(wantname);
515      if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
516          return -1;
517      }
518  
519      while (1) {
520          char *name;
521          int ret = nbd_receive_list(ioc, &name, NULL, errp);
522  
523          if (ret < 0) {
524              /* Server gave unexpected reply */
525              return -1;
526          } else if (ret == 0) {
527              /* Done iterating. */
528              if (list_empty) {
529                  /*
530                   * We don't have enough context to tell a server that
531                   * sent an empty list apart from a server that does
532                   * not support the list command; but as this function
533                   * is just used to trigger a nicer error message
534                   * before trying NBD_OPT_EXPORT_NAME, assume the
535                   * export is available.
536                   */
537                  return 0;
538              } else if (!found_export) {
539                  error_setg(errp, "No export with name '%s' available",
540                             wantname);
541                  nbd_send_opt_abort(ioc);
542                  return -1;
543              }
544              trace_nbd_receive_query_exports_success(wantname);
545              return 0;
546          }
547          list_empty = false;
548          if (!strcmp(name, wantname)) {
549              found_export = true;
550          }
551          g_free(name);
552      }
553  }
554  
555  /*
556   * nbd_request_simple_option: Send an option request, and parse the reply.
557   * @strict controls whether ERR_UNSUP or all errors produce 0 status.
558   * return 1 for successful negotiation,
559   *        0 if operation is unsupported,
560   *        -1 with errp set for any other error
561   */
562  static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
563                                       Error **errp)
564  {
565      NBDOptionReply reply;
566      int error;
567  
568      if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
569          return -1;
570      }
571  
572      if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
573          return -1;
574      }
575      error = nbd_handle_reply_err(ioc, &reply, strict, errp);
576      if (error <= 0) {
577          return error;
578      }
579  
580      if (reply.type != NBD_REP_ACK) {
581          error_setg(errp, "Server answered option %d (%s) with unexpected "
582                     "reply %" PRIu32 " (%s)", opt, nbd_opt_lookup(opt),
583                     reply.type, nbd_rep_lookup(reply.type));
584          nbd_send_opt_abort(ioc);
585          return -1;
586      }
587  
588      if (reply.length != 0) {
589          error_setg(errp, "Option %d ('%s') response length is %" PRIu32
590                     " (it should be zero)", opt, nbd_opt_lookup(opt),
591                     reply.length);
592          nbd_send_opt_abort(ioc);
593          return -1;
594      }
595  
596      return 1;
597  }
598  
599  static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
600                                          QCryptoTLSCreds *tlscreds,
601                                          const char *hostname, Error **errp)
602  {
603      int ret;
604      QIOChannelTLS *tioc;
605      struct NBDTLSHandshakeData data = { 0 };
606  
607      ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
608      if (ret <= 0) {
609          if (ret == 0) {
610              error_setg(errp, "Server don't support STARTTLS option");
611              nbd_send_opt_abort(ioc);
612          }
613          return NULL;
614      }
615  
616      trace_nbd_receive_starttls_new_client();
617      tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
618      if (!tioc) {
619          return NULL;
620      }
621      qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
622      data.loop = g_main_loop_new(g_main_context_default(), FALSE);
623      trace_nbd_receive_starttls_tls_handshake();
624      qio_channel_tls_handshake(tioc,
625                                nbd_tls_handshake,
626                                &data,
627                                NULL,
628                                NULL);
629  
630      if (!data.complete) {
631          g_main_loop_run(data.loop);
632      }
633      g_main_loop_unref(data.loop);
634      if (data.error) {
635          error_propagate(errp, data.error);
636          object_unref(OBJECT(tioc));
637          return NULL;
638      }
639  
640      return QIO_CHANNEL(tioc);
641  }
642  
643  /*
644   * nbd_send_meta_query:
645   * Send 0 or 1 set/list meta context queries.
646   * Return 0 on success, -1 with errp set for any error
647   */
648  static int nbd_send_meta_query(QIOChannel *ioc, uint32_t opt,
649                                 const char *export, const char *query,
650                                 Error **errp)
651  {
652      int ret;
653      uint32_t export_len = strlen(export);
654      uint32_t queries = !!query;
655      uint32_t query_len = 0;
656      uint32_t data_len;
657      char *data;
658      char *p;
659  
660      data_len = sizeof(export_len) + export_len + sizeof(queries);
661      assert(export_len <= NBD_MAX_STRING_SIZE);
662      if (query) {
663          query_len = strlen(query);
664          data_len += sizeof(query_len) + query_len;
665          assert(query_len <= NBD_MAX_STRING_SIZE);
666      } else {
667          assert(opt == NBD_OPT_LIST_META_CONTEXT);
668      }
669      p = data = g_malloc(data_len);
670  
671      trace_nbd_opt_meta_request(nbd_opt_lookup(opt), query ?: "(all)", export);
672      stl_be_p(p, export_len);
673      memcpy(p += sizeof(export_len), export, export_len);
674      stl_be_p(p += export_len, queries);
675      if (query) {
676          stl_be_p(p += sizeof(queries), query_len);
677          memcpy(p += sizeof(query_len), query, query_len);
678      }
679  
680      ret = nbd_send_option_request(ioc, opt, data_len, data, errp);
681      g_free(data);
682      return ret;
683  }
684  
685  /*
686   * nbd_receive_one_meta_context:
687   * Called in a loop to receive and trace one set/list meta context reply.
688   * Pass non-NULL @name or @id to collect results back to the caller, which
689   * must eventually call g_free().
690   * return 1 if name is set and iteration must continue,
691   *        0 if iteration is complete (including if option is unsupported),
692   *        -1 with errp set for any error
693   */
694  static int nbd_receive_one_meta_context(QIOChannel *ioc,
695                                          uint32_t opt,
696                                          char **name,
697                                          uint32_t *id,
698                                          Error **errp)
699  {
700      int ret;
701      NBDOptionReply reply;
702      char *local_name = NULL;
703      uint32_t local_id;
704  
705      if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
706          return -1;
707      }
708  
709      ret = nbd_handle_reply_err(ioc, &reply, false, errp);
710      if (ret <= 0) {
711          return ret;
712      }
713  
714      if (reply.type == NBD_REP_ACK) {
715          if (reply.length != 0) {
716              error_setg(errp, "Unexpected length to ACK response");
717              nbd_send_opt_abort(ioc);
718              return -1;
719          }
720          return 0;
721      } else if (reply.type != NBD_REP_META_CONTEXT) {
722          error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
723                     reply.type, nbd_rep_lookup(reply.type),
724                     NBD_REP_META_CONTEXT, nbd_rep_lookup(NBD_REP_META_CONTEXT));
725          nbd_send_opt_abort(ioc);
726          return -1;
727      }
728  
729      if (reply.length <= sizeof(local_id) ||
730          reply.length > NBD_MAX_BUFFER_SIZE) {
731          error_setg(errp, "Failed to negotiate meta context, server "
732                     "answered with unexpected length %" PRIu32,
733                     reply.length);
734          nbd_send_opt_abort(ioc);
735          return -1;
736      }
737  
738      if (nbd_read32(ioc, &local_id, "context id", errp) < 0) {
739          return -1;
740      }
741  
742      reply.length -= sizeof(local_id);
743      local_name = g_malloc(reply.length + 1);
744      if (nbd_read(ioc, local_name, reply.length, "context name", errp) < 0) {
745          g_free(local_name);
746          return -1;
747      }
748      local_name[reply.length] = '\0';
749      trace_nbd_opt_meta_reply(nbd_opt_lookup(opt), local_name, local_id);
750  
751      if (name) {
752          *name = local_name;
753      } else {
754          g_free(local_name);
755      }
756      if (id) {
757          *id = local_id;
758      }
759      return 1;
760  }
761  
762  /*
763   * nbd_negotiate_simple_meta_context:
764   * Request the server to set the meta context for export @info->name
765   * using @info->x_dirty_bitmap with a fallback to "base:allocation",
766   * setting @info->context_id to the resulting id. Fail if the server
767   * responds with more than one context or with a context different
768   * than the query.
769   * return 1 for successful negotiation,
770   *        0 if operation is unsupported,
771   *        -1 with errp set for any other error
772   */
773  static int nbd_negotiate_simple_meta_context(QIOChannel *ioc,
774                                               NBDExportInfo *info,
775                                               Error **errp)
776  {
777      /*
778       * TODO: Removing the x_dirty_bitmap hack will mean refactoring
779       * this function to request and store ids for multiple contexts
780       * (both base:allocation and a dirty bitmap), at which point this
781       * function should lose the term _simple.
782       */
783      int ret;
784      const char *context = info->x_dirty_bitmap ?: "base:allocation";
785      bool received = false;
786      char *name = NULL;
787  
788      if (nbd_send_meta_query(ioc, NBD_OPT_SET_META_CONTEXT,
789                              info->name, context, errp) < 0) {
790          return -1;
791      }
792  
793      ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
794                                         &name, &info->context_id, errp);
795      if (ret < 0) {
796          return -1;
797      }
798      if (ret == 1) {
799          if (strcmp(context, name)) {
800              error_setg(errp, "Failed to negotiate meta context '%s', server "
801                         "answered with different context '%s'", context,
802                         name);
803              g_free(name);
804              nbd_send_opt_abort(ioc);
805              return -1;
806          }
807          g_free(name);
808          received = true;
809  
810          ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
811                                             NULL, NULL, errp);
812          if (ret < 0) {
813              return -1;
814          }
815      }
816      if (ret != 0) {
817          error_setg(errp, "Server answered with more than one context");
818          nbd_send_opt_abort(ioc);
819          return -1;
820      }
821      return received;
822  }
823  
824  /*
825   * nbd_list_meta_contexts:
826   * Request the server to list all meta contexts for export @info->name.
827   * return 0 if list is complete (even if empty),
828   *        -1 with errp set for any error
829   */
830  static int nbd_list_meta_contexts(QIOChannel *ioc,
831                                    NBDExportInfo *info,
832                                    Error **errp)
833  {
834      int ret;
835      int seen_any = false;
836      int seen_qemu = false;
837  
838      if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
839                              info->name, NULL, errp) < 0) {
840          return -1;
841      }
842  
843      while (1) {
844          char *context;
845  
846          ret = nbd_receive_one_meta_context(ioc, NBD_OPT_LIST_META_CONTEXT,
847                                             &context, NULL, errp);
848          if (ret == 0 && seen_any && !seen_qemu) {
849              /*
850               * Work around qemu 3.0 bug: the server forgot to send
851               * "qemu:" replies to 0 queries. If we saw at least one
852               * reply (probably base:allocation), but none of them were
853               * qemu:, then run a more specific query to make sure.
854               */
855              seen_qemu = true;
856              if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
857                                      info->name, "qemu:", errp) < 0) {
858                  return -1;
859              }
860              continue;
861          }
862          if (ret <= 0) {
863              return ret;
864          }
865          seen_any = true;
866          seen_qemu |= strstart(context, "qemu:", NULL);
867          info->contexts = g_renew(char *, info->contexts, ++info->n_contexts);
868          info->contexts[info->n_contexts - 1] = context;
869      }
870  }
871  
872  /*
873   * nbd_start_negotiate:
874   * Start the handshake to the server.  After a positive return, the server
875   * is ready to accept additional NBD_OPT requests.
876   * Returns: negative errno: failure talking to server
877   *          0: server is oldstyle, must call nbd_negotiate_finish_oldstyle
878   *          1: server is newstyle, but can only accept EXPORT_NAME
879   *          2: server is newstyle, but lacks structured replies
880   *          3: server is newstyle and set up for structured replies
881   */
882  static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
883                                 QCryptoTLSCreds *tlscreds,
884                                 const char *hostname, QIOChannel **outioc,
885                                 bool structured_reply, bool *zeroes,
886                                 Error **errp)
887  {
888      ERRP_GUARD();
889      uint64_t magic;
890  
891      trace_nbd_start_negotiate(tlscreds, hostname ? hostname : "<null>");
892  
893      if (zeroes) {
894          *zeroes = true;
895      }
896      if (outioc) {
897          *outioc = NULL;
898      }
899      if (tlscreds && !outioc) {
900          error_setg(errp, "Output I/O channel required for TLS");
901          return -EINVAL;
902      }
903  
904      if (nbd_read64(ioc, &magic, "initial magic", errp) < 0) {
905          return -EINVAL;
906      }
907      trace_nbd_receive_negotiate_magic(magic);
908  
909      if (magic != NBD_INIT_MAGIC) {
910          error_setg(errp, "Bad initial magic received: 0x%" PRIx64, magic);
911          return -EINVAL;
912      }
913  
914      if (nbd_read64(ioc, &magic, "server magic", errp) < 0) {
915          return -EINVAL;
916      }
917      trace_nbd_receive_negotiate_magic(magic);
918  
919      if (magic == NBD_OPTS_MAGIC) {
920          uint32_t clientflags = 0;
921          uint16_t globalflags;
922          bool fixedNewStyle = false;
923  
924          if (nbd_read16(ioc, &globalflags, "server flags", errp) < 0) {
925              return -EINVAL;
926          }
927          trace_nbd_receive_negotiate_server_flags(globalflags);
928          if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
929              fixedNewStyle = true;
930              clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
931          }
932          if (globalflags & NBD_FLAG_NO_ZEROES) {
933              if (zeroes) {
934                  *zeroes = false;
935              }
936              clientflags |= NBD_FLAG_C_NO_ZEROES;
937          }
938          /* client requested flags */
939          clientflags = cpu_to_be32(clientflags);
940          if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
941              error_prepend(errp, "Failed to send clientflags field: ");
942              return -EINVAL;
943          }
944          if (tlscreds) {
945              if (fixedNewStyle) {
946                  *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
947                  if (!*outioc) {
948                      return -EINVAL;
949                  }
950                  ioc = *outioc;
951                  if (aio_context) {
952                      qio_channel_set_blocking(ioc, false, NULL);
953                      qio_channel_attach_aio_context(ioc, aio_context);
954                  }
955              } else {
956                  error_setg(errp, "Server does not support STARTTLS");
957                  return -EINVAL;
958              }
959          }
960          if (fixedNewStyle) {
961              int result = 0;
962  
963              if (structured_reply) {
964                  result = nbd_request_simple_option(ioc,
965                                                     NBD_OPT_STRUCTURED_REPLY,
966                                                     false, errp);
967                  if (result < 0) {
968                      return -EINVAL;
969                  }
970              }
971              return 2 + result;
972          } else {
973              return 1;
974          }
975      } else if (magic == NBD_CLIENT_MAGIC) {
976          if (tlscreds) {
977              error_setg(errp, "Server does not support STARTTLS");
978              return -EINVAL;
979          }
980          return 0;
981      } else {
982          error_setg(errp, "Bad server magic received: 0x%" PRIx64, magic);
983          return -EINVAL;
984      }
985  }
986  
987  /*
988   * nbd_negotiate_finish_oldstyle:
989   * Populate @info with the size and export flags from an oldstyle server,
990   * but does not consume 124 bytes of reserved zero padding.
991   * Returns 0 on success, -1 with @errp set on failure
992   */
993  static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info,
994                                           Error **errp)
995  {
996      uint32_t oldflags;
997  
998      if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
999          return -EINVAL;
1000      }
1001  
1002      if (nbd_read32(ioc, &oldflags, "export flags", errp) < 0) {
1003          return -EINVAL;
1004      }
1005      if (oldflags & ~0xffff) {
1006          error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
1007          return -EINVAL;
1008      }
1009      info->flags = oldflags;
1010      return 0;
1011  }
1012  
1013  /*
1014   * nbd_receive_negotiate:
1015   * Connect to server, complete negotiation, and move into transmission phase.
1016   * Returns: negative errno: failure talking to server
1017   *          0: server is connected
1018   */
1019  int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
1020                            QCryptoTLSCreds *tlscreds,
1021                            const char *hostname, QIOChannel **outioc,
1022                            NBDExportInfo *info, Error **errp)
1023  {
1024      ERRP_GUARD();
1025      int result;
1026      bool zeroes;
1027      bool base_allocation = info->base_allocation;
1028  
1029      assert(info->name && strlen(info->name) <= NBD_MAX_STRING_SIZE);
1030      trace_nbd_receive_negotiate_name(info->name);
1031  
1032      result = nbd_start_negotiate(aio_context, ioc, tlscreds, hostname, outioc,
1033                                   info->structured_reply, &zeroes, errp);
1034  
1035      info->structured_reply = false;
1036      info->base_allocation = false;
1037      if (tlscreds && *outioc) {
1038          ioc = *outioc;
1039      }
1040  
1041      switch (result) {
1042      case 3: /* newstyle, with structured replies */
1043          info->structured_reply = true;
1044          if (base_allocation) {
1045              result = nbd_negotiate_simple_meta_context(ioc, info, errp);
1046              if (result < 0) {
1047                  return -EINVAL;
1048              }
1049              info->base_allocation = result == 1;
1050          }
1051          /* fall through */
1052      case 2: /* newstyle, try OPT_GO */
1053          /* Try NBD_OPT_GO first - if it works, we are done (it
1054           * also gives us a good message if the server requires
1055           * TLS).  If it is not available, fall back to
1056           * NBD_OPT_LIST for nicer error messages about a missing
1057           * export, then use NBD_OPT_EXPORT_NAME.  */
1058          result = nbd_opt_info_or_go(ioc, NBD_OPT_GO, info, errp);
1059          if (result < 0) {
1060              return -EINVAL;
1061          }
1062          if (result > 0) {
1063              return 0;
1064          }
1065          /* Check our desired export is present in the
1066           * server export list. Since NBD_OPT_EXPORT_NAME
1067           * cannot return an error message, running this
1068           * query gives us better error reporting if the
1069           * export name is not available.
1070           */
1071          if (nbd_receive_query_exports(ioc, info->name, errp) < 0) {
1072              return -EINVAL;
1073          }
1074          /* fall through */
1075      case 1: /* newstyle, but limited to EXPORT_NAME */
1076          /* write the export name request */
1077          if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, info->name,
1078                                      errp) < 0) {
1079              return -EINVAL;
1080          }
1081  
1082          /* Read the response */
1083          if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
1084              return -EINVAL;
1085          }
1086  
1087          if (nbd_read16(ioc, &info->flags, "export flags", errp) < 0) {
1088              return -EINVAL;
1089          }
1090          break;
1091      case 0: /* oldstyle, parse length and flags */
1092          if (*info->name) {
1093              error_setg(errp, "Server does not support non-empty export names");
1094              return -EINVAL;
1095          }
1096          if (nbd_negotiate_finish_oldstyle(ioc, info, errp) < 0) {
1097              return -EINVAL;
1098          }
1099          break;
1100      default:
1101          return result;
1102      }
1103  
1104      trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
1105      if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
1106          error_prepend(errp, "Failed to read reserved block: ");
1107          return -EINVAL;
1108      }
1109      return 0;
1110  }
1111  
1112  /* Clean up result of nbd_receive_export_list */
1113  void nbd_free_export_list(NBDExportInfo *info, int count)
1114  {
1115      int i, j;
1116  
1117      if (!info) {
1118          return;
1119      }
1120  
1121      for (i = 0; i < count; i++) {
1122          g_free(info[i].name);
1123          g_free(info[i].description);
1124          for (j = 0; j < info[i].n_contexts; j++) {
1125              g_free(info[i].contexts[j]);
1126          }
1127          g_free(info[i].contexts);
1128      }
1129      g_free(info);
1130  }
1131  
1132  /*
1133   * nbd_receive_export_list:
1134   * Query details about a server's exports, then disconnect without
1135   * going into transmission phase. Return a count of the exports listed
1136   * in @info by the server, or -1 on error. Caller must free @info using
1137   * nbd_free_export_list().
1138   */
1139  int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
1140                              const char *hostname, NBDExportInfo **info,
1141                              Error **errp)
1142  {
1143      int result;
1144      int count = 0;
1145      int i;
1146      int rc;
1147      int ret = -1;
1148      NBDExportInfo *array = NULL;
1149      QIOChannel *sioc = NULL;
1150  
1151      *info = NULL;
1152      result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, &sioc, true,
1153                                   NULL, errp);
1154      if (tlscreds && sioc) {
1155          ioc = sioc;
1156      }
1157  
1158      switch (result) {
1159      case 2:
1160      case 3:
1161          /* newstyle - use NBD_OPT_LIST to populate array, then try
1162           * NBD_OPT_INFO on each array member. If structured replies
1163           * are enabled, also try NBD_OPT_LIST_META_CONTEXT. */
1164          if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
1165              goto out;
1166          }
1167          while (1) {
1168              char *name;
1169              char *desc;
1170  
1171              rc = nbd_receive_list(ioc, &name, &desc, errp);
1172              if (rc < 0) {
1173                  goto out;
1174              } else if (rc == 0) {
1175                  break;
1176              }
1177              array = g_renew(NBDExportInfo, array, ++count);
1178              memset(&array[count - 1], 0, sizeof(*array));
1179              array[count - 1].name = name;
1180              array[count - 1].description = desc;
1181              array[count - 1].structured_reply = result == 3;
1182          }
1183  
1184          for (i = 0; i < count; i++) {
1185              array[i].request_sizes = true;
1186              rc = nbd_opt_info_or_go(ioc, NBD_OPT_INFO, &array[i], errp);
1187              if (rc < 0) {
1188                  goto out;
1189              } else if (rc == 0) {
1190                  /*
1191                   * Pointless to try rest of loop. If OPT_INFO doesn't work,
1192                   * it's unlikely that meta contexts work either
1193                   */
1194                  break;
1195              }
1196  
1197              if (result == 3 &&
1198                  nbd_list_meta_contexts(ioc, &array[i], errp) < 0) {
1199                  goto out;
1200              }
1201          }
1202  
1203          /* Send NBD_OPT_ABORT as a courtesy before hanging up */
1204          nbd_send_opt_abort(ioc);
1205          break;
1206      case 1: /* newstyle, but limited to EXPORT_NAME */
1207          error_setg(errp, "Server does not support export lists");
1208          /* We can't even send NBD_OPT_ABORT, so merely hang up */
1209          goto out;
1210      case 0: /* oldstyle, parse length and flags */
1211          array = g_new0(NBDExportInfo, 1);
1212          array->name = g_strdup("");
1213          count = 1;
1214  
1215          if (nbd_negotiate_finish_oldstyle(ioc, array, errp) < 0) {
1216              goto out;
1217          }
1218  
1219          /* Send NBD_CMD_DISC as a courtesy to the server, but ignore all
1220           * errors now that we have the information we wanted. */
1221          if (nbd_drop(ioc, 124, NULL) == 0) {
1222              NBDRequest request = { .type = NBD_CMD_DISC };
1223  
1224              nbd_send_request(ioc, &request);
1225          }
1226          break;
1227      default:
1228          goto out;
1229      }
1230  
1231      *info = array;
1232      array = NULL;
1233      ret = count;
1234  
1235   out:
1236      qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
1237      qio_channel_close(ioc, NULL);
1238      object_unref(OBJECT(sioc));
1239      nbd_free_export_list(array, count);
1240      return ret;
1241  }
1242  
1243  #ifdef __linux__
1244  int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
1245               Error **errp)
1246  {
1247      unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
1248      unsigned long sectors = info->size / sector_size;
1249  
1250      /* FIXME: Once the kernel module is patched to honor block sizes,
1251       * and to advertise that fact to user space, we should update the
1252       * hand-off to the kernel to use any block sizes we learned. */
1253      assert(!info->request_sizes);
1254      if (info->size / sector_size != sectors) {
1255          error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
1256                     info->size);
1257          return -E2BIG;
1258      }
1259  
1260      trace_nbd_init_set_socket();
1261  
1262      if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
1263          int serrno = errno;
1264          error_setg(errp, "Failed to set NBD socket");
1265          return -serrno;
1266      }
1267  
1268      trace_nbd_init_set_block_size(sector_size);
1269  
1270      if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
1271          int serrno = errno;
1272          error_setg(errp, "Failed setting NBD block size");
1273          return -serrno;
1274      }
1275  
1276      trace_nbd_init_set_size(sectors);
1277      if (info->size % sector_size) {
1278          trace_nbd_init_trailing_bytes(info->size % sector_size);
1279      }
1280  
1281      if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
1282          int serrno = errno;
1283          error_setg(errp, "Failed setting size (in blocks)");
1284          return -serrno;
1285      }
1286  
1287      if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) {
1288          if (errno == ENOTTY) {
1289              int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0;
1290              trace_nbd_init_set_readonly();
1291  
1292              if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
1293                  int serrno = errno;
1294                  error_setg(errp, "Failed setting read-only attribute");
1295                  return -serrno;
1296              }
1297          } else {
1298              int serrno = errno;
1299              error_setg(errp, "Failed setting flags");
1300              return -serrno;
1301          }
1302      }
1303  
1304      trace_nbd_init_finish();
1305  
1306      return 0;
1307  }
1308  
1309  int nbd_client(int fd)
1310  {
1311      int ret;
1312      int serrno;
1313  
1314      trace_nbd_client_loop();
1315  
1316      ret = ioctl(fd, NBD_DO_IT);
1317      if (ret < 0 && errno == EPIPE) {
1318          /* NBD_DO_IT normally returns EPIPE when someone has disconnected
1319           * the socket via NBD_DISCONNECT.  We do not want to return 1 in
1320           * that case.
1321           */
1322          ret = 0;
1323      }
1324      serrno = errno;
1325  
1326      trace_nbd_client_loop_ret(ret, strerror(serrno));
1327  
1328      trace_nbd_client_clear_queue();
1329      ioctl(fd, NBD_CLEAR_QUE);
1330  
1331      trace_nbd_client_clear_socket();
1332      ioctl(fd, NBD_CLEAR_SOCK);
1333  
1334      errno = serrno;
1335      return ret;
1336  }
1337  
1338  int nbd_disconnect(int fd)
1339  {
1340      ioctl(fd, NBD_CLEAR_QUE);
1341      ioctl(fd, NBD_DISCONNECT);
1342      ioctl(fd, NBD_CLEAR_SOCK);
1343      return 0;
1344  }
1345  
1346  #endif /* __linux__ */
1347  
1348  int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
1349  {
1350      uint8_t buf[NBD_REQUEST_SIZE];
1351  
1352      trace_nbd_send_request(request->from, request->len, request->handle,
1353                             request->flags, request->type,
1354                             nbd_cmd_lookup(request->type));
1355  
1356      stl_be_p(buf, NBD_REQUEST_MAGIC);
1357      stw_be_p(buf + 4, request->flags);
1358      stw_be_p(buf + 6, request->type);
1359      stq_be_p(buf + 8, request->handle);
1360      stq_be_p(buf + 16, request->from);
1361      stl_be_p(buf + 24, request->len);
1362  
1363      return nbd_write(ioc, buf, sizeof(buf), NULL);
1364  }
1365  
1366  /* nbd_receive_simple_reply
1367   * Read simple reply except magic field (which should be already read).
1368   * Payload is not read (payload is possible for CMD_READ, but here we even
1369   * don't know whether it take place or not).
1370   */
1371  static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
1372                                      Error **errp)
1373  {
1374      int ret;
1375  
1376      assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
1377  
1378      ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
1379                     sizeof(*reply) - sizeof(reply->magic), "reply", errp);
1380      if (ret < 0) {
1381          return ret;
1382      }
1383  
1384      reply->error = be32_to_cpu(reply->error);
1385      reply->handle = be64_to_cpu(reply->handle);
1386  
1387      return 0;
1388  }
1389  
1390  /* nbd_receive_structured_reply_chunk
1391   * Read structured reply chunk except magic field (which should be already
1392   * read).
1393   * Payload is not read.
1394   */
1395  static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
1396                                                NBDStructuredReplyChunk *chunk,
1397                                                Error **errp)
1398  {
1399      int ret;
1400  
1401      assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
1402  
1403      ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
1404                     sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
1405                     errp);
1406      if (ret < 0) {
1407          return ret;
1408      }
1409  
1410      chunk->flags = be16_to_cpu(chunk->flags);
1411      chunk->type = be16_to_cpu(chunk->type);
1412      chunk->handle = be64_to_cpu(chunk->handle);
1413      chunk->length = be32_to_cpu(chunk->length);
1414  
1415      return 0;
1416  }
1417  
1418  /* nbd_read_eof
1419   * Tries to read @size bytes from @ioc.
1420   * Returns 1 on success
1421   *         0 on eof, when no data was read (errp is not set)
1422   *         negative errno on failure (errp is set)
1423   */
1424  static inline int coroutine_fn
1425  nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
1426               Error **errp)
1427  {
1428      bool partial = false;
1429  
1430      assert(size);
1431      while (size > 0) {
1432          struct iovec iov = { .iov_base = buffer, .iov_len = size };
1433          ssize_t len;
1434  
1435          len = qio_channel_readv(ioc, &iov, 1, errp);
1436          if (len == QIO_CHANNEL_ERR_BLOCK) {
1437              qio_channel_yield(ioc, G_IO_IN);
1438              continue;
1439          } else if (len < 0) {
1440              return -EIO;
1441          } else if (len == 0) {
1442              if (partial) {
1443                  error_setg(errp,
1444                             "Unexpected end-of-file before all bytes were read");
1445                  return -EIO;
1446              } else {
1447                  return 0;
1448              }
1449          }
1450  
1451          partial = true;
1452          size -= len;
1453          buffer = (uint8_t*) buffer + len;
1454      }
1455      return 1;
1456  }
1457  
1458  /* nbd_receive_reply
1459   *
1460   * Decreases bs->in_flight while waiting for a new reply. This yield is where
1461   * we wait indefinitely and the coroutine must be able to be safely reentered
1462   * for nbd_client_attach_aio_context().
1463   *
1464   * Returns 1 on success
1465   *         0 on eof, when no data was read (errp is not set)
1466   *         negative errno on failure (errp is set)
1467   */
1468  int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
1469                                     NBDReply *reply, Error **errp)
1470  {
1471      int ret;
1472      const char *type;
1473  
1474      ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
1475      if (ret <= 0) {
1476          return ret;
1477      }
1478  
1479      reply->magic = be32_to_cpu(reply->magic);
1480  
1481      switch (reply->magic) {
1482      case NBD_SIMPLE_REPLY_MAGIC:
1483          ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
1484          if (ret < 0) {
1485              break;
1486          }
1487          trace_nbd_receive_simple_reply(reply->simple.error,
1488                                         nbd_err_lookup(reply->simple.error),
1489                                         reply->handle);
1490          break;
1491      case NBD_STRUCTURED_REPLY_MAGIC:
1492          ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
1493          if (ret < 0) {
1494              break;
1495          }
1496          type = nbd_reply_type_lookup(reply->structured.type);
1497          trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
1498                                                   reply->structured.type, type,
1499                                                   reply->structured.handle,
1500                                                   reply->structured.length);
1501          break;
1502      default:
1503          error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
1504          return -EINVAL;
1505      }
1506      if (ret < 0) {
1507          return ret;
1508      }
1509  
1510      return 1;
1511  }
1512  
1513