From e9738dcd93364d4a3eea275f9423dc4d3b90e7a6 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 27 Jun 2026 02:32:15 +0000 Subject: [PATCH 1/2] refactor(vm): extract Hyper.Vm.Id with firecracker-safe ids Move VM-id generation out of Hyper.gen_vm_id/0 into a dedicated Hyper.Vm.Id module (type t() + generate/0), and migrate the Hyper.Vm.id() type to Hyper.Vm.Id.t() across the tree. generate/0 also switches the encoding from url-safe base64 to lowercase base32: the old encoding emitted '-' and '_', which firecracker rejects in an instance id (InvalidInstanceId / 'Invalid char (_)') and which collide with dm/jailer name rules. base32 ([a-z2-7]) is alphanumeric and safe on all three. --- lib/hyper.ex | 10 +++------- lib/hyper/cluster/routing.ex | 6 +++--- lib/hyper/grpc/codec.ex | 8 ++++---- lib/hyper/img/db/lease.ex | 4 ++-- lib/hyper/node.ex | 4 ++-- lib/hyper/node/fire_vmm.ex | 2 +- lib/hyper/node/fire_vmm/chroot_jail.ex | 2 +- lib/hyper/node/fire_vmm/client.ex | 2 +- lib/hyper/node/fire_vmm/jailer.ex | 8 ++++---- lib/hyper/node/fire_vmm/state.ex | 4 ++-- lib/hyper/node/img.ex | 8 ++++---- lib/hyper/node/img/mutable.ex | 4 ++-- lib/hyper/vm.ex | 1 - lib/hyper/vm/id.ex | 27 ++++++++++++++++++++++++++ test/hyper/vm/id_test.exs | 20 +++++++++++++++++++ 15 files changed, 76 insertions(+), 34 deletions(-) create mode 100644 lib/hyper/vm/id.ex create mode 100644 test/hyper/vm/id_test.exs diff --git a/lib/hyper.ex b/lib/hyper.ex index fb0a626e..9b70f131 100644 --- a/lib/hyper.ex +++ b/lib/hyper.ex @@ -18,7 +18,7 @@ defmodule Hyper do @spec create_vm(Hyper.Vm.Spec.t()) :: {:ok, Hyper.Vm.t()} | {:error, term()} def create_vm(%Hyper.Vm.Spec{} = spec) do with {:ok, arch} <- resolve_arch(spec.arch) do - vm_id = gen_vm_id() + vm_id = Hyper.Vm.Id.generate() spec = %{spec | arch: arch} instance_spec = Hyper.Vm.Instance.spec(spec.type) @@ -34,17 +34,13 @@ defmodule Hyper do end end - @doc "Generate a fresh VM id (url-safe base64, dm-name compatible)." - @spec gen_vm_id() :: Hyper.Vm.id() - def gen_vm_id, do: Base.url_encode64(:crypto.strong_rand_bytes(9), padding: false) - @spec resolve_arch(Hyper.Vm.Instance.arch() | nil) :: {:ok, Hyper.Vm.Instance.arch()} | {:error, term()} defp resolve_arch(nil), do: Sys.Arch.current() defp resolve_arch(arch), do: {:ok, arch} @doc "Cluster-wide: which node currently runs `vm_id`? `nil` if unknown." - @spec whereis(Hyper.Vm.id()) :: node() | nil + @spec whereis(Hyper.Vm.Id.t()) :: node() | nil def whereis(vm_id), do: Hyper.Cluster.Routing.whereis(vm_id) @doc """ @@ -57,7 +53,7 @@ defmodule Hyper do died with its host, so "unknown" is the truthful answer. Only `:erpc`'s own transport failures are swallowed; a genuine fault in the lookup still raises. """ - @spec id(Hyper.Vm.t()) :: Hyper.Vm.id() | nil + @spec id(Hyper.Vm.t()) :: Hyper.Vm.Id.t() | nil def id(pid) when is_pid(pid) do :erpc.call(node(pid), Hyper.Cluster.Routing, :id_for, [pid]) catch diff --git a/lib/hyper/cluster/routing.ex b/lib/hyper/cluster/routing.ex index c7802595..1f60f8f7 100644 --- a/lib/hyper/cluster/routing.ex +++ b/lib/hyper/cluster/routing.ex @@ -29,7 +29,7 @@ defmodule Hyper.Cluster.Routing do def via(key), do: {:via, Horde.Registry, {@name, key}} @doc "Which node currently runs `vm_id`? `nil` if unknown." - @spec whereis(Hyper.Vm.id()) :: node() | nil + @spec whereis(Hyper.Vm.Id.t()) :: node() | nil @decorate with_span("Hyper.Cluster.Routing.whereis", include: [:vm_id]) def whereis(vm_id) do case Horde.Registry.lookup(@name, {vm_id, :supervisor}) do @@ -43,7 +43,7 @@ defmodule Hyper.Cluster.Routing do replica via a registry match spec; intended to be called on the node that owns `pid` (see `Hyper.id/1`). """ - @spec id_for(pid()) :: Hyper.Vm.id() | nil + @spec id_for(pid()) :: Hyper.Vm.Id.t() | nil @decorate with_span("Hyper.Cluster.Routing.id_for") def id_for(pid) when is_pid(pid) do case Horde.Registry.select(@name, [ @@ -55,7 +55,7 @@ defmodule Hyper.Cluster.Routing do end @doc "Every VM the cluster currently knows about, paired with the node it runs on." - @spec all() :: [{Hyper.Vm.id(), node()}] + @spec all() :: [{Hyper.Vm.Id.t(), node()}] @decorate with_span("Hyper.Cluster.Routing.all") def all do @name diff --git a/lib/hyper/grpc/codec.ex b/lib/hyper/grpc/codec.ex index 650040a1..9da622a0 100644 --- a/lib/hyper/grpc/codec.ex +++ b/lib/hyper/grpc/codec.ex @@ -85,15 +85,15 @@ defmodule Hyper.Grpc.Codec do end @doc "Convert a domain result to an outbound response message, or an error to `GRPC.RPCError`." - @spec to_grpc({:created, Hyper.Vm.id(), node()}) :: CreateVmResponse.t() + @spec to_grpc({:created, Hyper.Vm.Id.t(), node()}) :: CreateVmResponse.t() def to_grpc({:created, vm_id, node}) when is_binary(vm_id), do: %CreateVmResponse{vm_id: vm_id, node: to_string(node)} - @spec to_grpc({:located, Hyper.Vm.id(), node()}) :: GetVmResponse.t() + @spec to_grpc({:located, Hyper.Vm.Id.t(), node()}) :: GetVmResponse.t() def to_grpc({:located, vm_id, node}), do: %GetVmResponse{vm_id: vm_id, node: to_string(node)} - @spec to_grpc({:vms, [{Hyper.Vm.id(), node()}]}) :: ListVmsResponse.t() + @spec to_grpc({:vms, [{Hyper.Vm.Id.t(), node()}]}) :: ListVmsResponse.t() def to_grpc({:vms, vms}), do: %ListVmsResponse{vms: Enum.map(vms, &vm/1)} @@ -117,7 +117,7 @@ defmodule Hyper.Grpc.Codec do def to_grpc({:exit, {:nodedown, _}}), do: rpc_error(:machine_unreachable) def to_grpc({:exit, reason}), do: rpc_error({:stop_failed, reason}) - @spec vm({Hyper.Vm.id(), node()}) :: Vm.t() + @spec vm({Hyper.Vm.Id.t(), node()}) :: Vm.t() defp vm({vm_id, node}), do: %Vm{vm_id: vm_id, node: to_string(node)} @spec instance_type(instance_enum()) :: {:ok, Hyper.Vm.Instance.t()} diff --git a/lib/hyper/img/db/lease.ex b/lib/hyper/img/db/lease.ex index 7c737406..979b5f84 100644 --- a/lib/hyper/img/db/lease.ex +++ b/lib/hyper/img/db/lease.ex @@ -49,7 +49,7 @@ defmodule Hyper.Img.Db.Lease do Upserts on `(node_id, vm_id)` - the same call both takes a fresh lease and heartbeats a live one. """ - @spec bump(Hyper.Img.id(), Hyper.Vm.id(), Unit.Time.t()) :: + @spec bump(Hyper.Img.id(), Hyper.Vm.Id.t(), Unit.Time.t()) :: {:ok, %__MODULE__{}} | {:error, Ecto.Changeset.t()} @decorate with_span("Hyper.Img.Db.Lease.bump", include: [:image_id, :vm_id]) def bump(image_id, vm_id, ttl) do @@ -72,7 +72,7 @@ defmodule Hyper.Img.Db.Lease do Release the lease issued to the given node_id and the given vm_id. Since each VM only ever uses one image, it is not necessary to specify the image id. """ - @spec release(Hyper.Vm.id()) :: :ok + @spec release(Hyper.Vm.Id.t()) :: :ok @decorate with_span("Hyper.Img.Db.Lease.release", include: [:vm_id]) def release(vm_id) do query = from(l in __MODULE__, where: l.node_id == ^to_string(node()) and l.vm_id == ^vm_id) diff --git a/lib/hyper/node.ex b/lib/hyper/node.ex index a5e0f63b..77870843 100644 --- a/lib/hyper/node.ex +++ b/lib/hyper/node.ex @@ -63,7 +63,7 @@ defmodule Hyper.Node do layer, resolve the kernel, and start the VM supervisor. The uid is freed and the mutable layer torn down automatically when the VM supervisor dies. """ - @spec start_image_vm(Hyper.Vm.id(), Hyper.Vm.Spec.t()) :: {:ok, pid()} | {:error, term()} + @spec start_image_vm(Hyper.Vm.Id.t(), Hyper.Vm.Spec.t()) :: {:ok, pid()} | {:error, term()} @decorate with_span("Hyper.Node.start_image_vm", include: [:vm_id, :spec]) def start_image_vm(vm_id, %Hyper.Vm.Spec{} = spec) do with {:ok, uid} <- Users.claim(), @@ -89,7 +89,7 @@ defmodule Hyper.Node do end @doc false - @spec build_opts(Hyper.Vm.id(), Hyper.Vm.Spec.t(), Users.id(), pid(), Path.t()) :: + @spec build_opts(Hyper.Vm.Id.t(), Hyper.Vm.Spec.t(), Users.id(), pid(), Path.t()) :: FireVMM.Opts.t() def build_opts(vm_id, %Hyper.Vm.Spec{} = spec, uid, mutable, kernel) do %FireVMM.Opts{ diff --git a/lib/hyper/node/fire_vmm.ex b/lib/hyper/node/fire_vmm.ex index 7e5603b2..12f8236b 100644 --- a/lib/hyper/node/fire_vmm.ex +++ b/lib/hyper/node/fire_vmm.ex @@ -34,7 +34,7 @@ defmodule Hyper.Node.FireVMM do defstruct [:vm_id, :uid, :gid, :type, :arch, :mutable, :kernel, :boot_args] @type t :: %__MODULE__{ - vm_id: Hyper.Vm.id(), + vm_id: Hyper.Vm.Id.t(), uid: Hyper.Node.Users.id(), gid: Hyper.Node.Users.id(), type: Hyper.Vm.Instance.t(), diff --git a/lib/hyper/node/fire_vmm/chroot_jail.ex b/lib/hyper/node/fire_vmm/chroot_jail.ex index edaa4f1d..cab7364c 100644 --- a/lib/hyper/node/fire_vmm/chroot_jail.ex +++ b/lib/hyper/node/fire_vmm/chroot_jail.ex @@ -27,7 +27,7 @@ defmodule Hyper.Node.FireVMM.ChrootJail do `uid:gid`), and return `cold` with its kernel + rootfs paths rewritten to their in-jail equivalents. Fails the boot if either artifact cannot be staged. """ - @spec stage(Hyper.Vm.id(), non_neg_integer(), non_neg_integer(), Cold.t()) :: + @spec stage(Hyper.Vm.Id.t(), non_neg_integer(), non_neg_integer(), Cold.t()) :: {:ok, Cold.t()} | {:error, term()} @decorate with_span("Hyper.Node.FireVMM.ChrootJail.stage", include: [:vm_id]) def stage(vm_id, uid, gid, %Cold{} = cold) do diff --git a/lib/hyper/node/fire_vmm/client.ex b/lib/hyper/node/fire_vmm/client.ex index c6901ae2..f3911bcf 100644 --- a/lib/hyper/node/fire_vmm/client.ex +++ b/lib/hyper/node/fire_vmm/client.ex @@ -66,7 +66,7 @@ defmodule Hyper.Node.FireVMM.Client do GenServer.start_link(__MODULE__, opts, gen_opts(name)) end - @spec via(Hyper.Vm.id()) :: GenServer.name() + @spec via(Hyper.Vm.Id.t()) :: GenServer.name() def via(vm_id), do: Hyper.Cluster.Routing.via({vm_id, :client}) # Cap on a single Firecracker API call. diff --git a/lib/hyper/node/fire_vmm/jailer.ex b/lib/hyper/node/fire_vmm/jailer.ex index 381f9d23..ace7a553 100644 --- a/lib/hyper/node/fire_vmm/jailer.ex +++ b/lib/hyper/node/fire_vmm/jailer.ex @@ -125,13 +125,13 @@ defmodule Hyper.Node.FireVMM.Jailer do end @doc "Host path of the VM's per-VM jail dir (`//`)." - @spec chroot_dir(Hyper.Vm.id()) :: Path.t() + @spec chroot_dir(Hyper.Vm.Id.t()) :: Path.t() def chroot_dir(id) do Path.join([Hyper.Cfg.Dirs.chroot_base(), exec_name(), id]) end @doc "Host path of the VM's chroot root (`///root`)." - @spec chroot_root(Hyper.Vm.id()) :: Path.t() + @spec chroot_root(Hyper.Vm.Id.t()) :: Path.t() def chroot_root(id) do Path.join(chroot_dir(id), "root") end @@ -141,7 +141,7 @@ defmodule Hyper.Node.FireVMM.Jailer do cgroup the jailer creates for firecracker. Reconstructed (the jailer owns its placement) so a relaunch can clear the stale leaf left by a prior incarnation. """ - @spec cgroup_dir(Hyper.Vm.id()) :: Path.t() + @spec cgroup_dir(Hyper.Vm.Id.t()) :: Path.t() def cgroup_dir(id) do Path.join(["/sys/fs/cgroup", Hyper.Cfg.Jails.cgroup(), exec_name(), id]) end @@ -153,7 +153,7 @@ defmodule Hyper.Node.FireVMM.Jailer do derive it independently and are guaranteed to agree. We do not control where the jailer places the socket, so the path is reconstructed here. """ - @spec host_socket(Hyper.Vm.id()) :: Path.t() + @spec host_socket(Hyper.Vm.Id.t()) :: Path.t() def host_socket(id) do Path.join([ Hyper.Cfg.Dirs.chroot_base(), diff --git a/lib/hyper/node/fire_vmm/state.ex b/lib/hyper/node/fire_vmm/state.ex index b390965c..2a37c796 100644 --- a/lib/hyper/node/fire_vmm/state.ex +++ b/lib/hyper/node/fire_vmm/state.ex @@ -58,7 +58,7 @@ defmodule Hyper.Node.FireVMM.State do :gen_statem.start_link(via(id), __MODULE__, opts, []) end - @spec stop(Hyper.Vm.id()) :: :ok + @spec stop(Hyper.Vm.Id.t()) :: :ok def stop(id) do :gen_statem.call(via(id), :stop) end @@ -174,7 +174,7 @@ defmodule Hyper.Node.FireVMM.State do # Cold boot, issued through the Client and aborting at the first error: # machine-config -> boot-source -> each drive -> each NIC -> InstanceStart. - @spec apply_spec(Hyper.Vm.id(), BootSpec.Cold.t()) :: :ok | {:error, term()} + @spec apply_spec(Hyper.Vm.Id.t(), BootSpec.Cold.t()) :: :ok | {:error, term()} @decorate with_span("Hyper.Node.FireVMM.State.Configuring.apply_spec", include: [:id]) defp apply_spec(id, %BootSpec.Cold{} = cold) do via = Client.via(id) diff --git a/lib/hyper/node/img.ex b/lib/hyper/node/img.ex index 320d4a7f..3f2ed123 100644 --- a/lib/hyper/node/img.ex +++ b/lib/hyper/node/img.ex @@ -53,7 +53,7 @@ defmodule Hyper.Node.Img do end @doc "Create a per-VM mutable layer for `vm_id` over `img_id`." - @spec create_mutable(Hyper.Img.id(), Hyper.Vm.id()) :: {:ok, pid()} | {:error, term()} + @spec create_mutable(Hyper.Img.id(), Hyper.Vm.Id.t()) :: {:ok, pid()} | {:error, term()} def create_mutable(img_id, vm_id) do case DynamicSupervisor.start_child( @mutable_sup, @@ -74,7 +74,7 @@ defmodule Hyper.Node.Img do Serve `img` to `vm_id` for the duration of `callable`, holding a DB lease on the image (and transitively its whole blob chain) the whole time. """ - @spec with_image(Hyper.Img.id(), Hyper.Vm.id(), (-> result)) :: result | {:error, term()} + @spec with_image(Hyper.Img.id(), Hyper.Vm.Id.t(), (-> result)) :: result | {:error, term()} when result: var def with_image(img, vm_id, callable) do with_image_lease(img, vm_id, callable) @@ -84,7 +84,7 @@ defmodule Hyper.Node.Img do # even if `callable` raises. A background task re-bumps the lease for the whole # run, so a long-lived VM never lets its claim lapse. If the lease cannot be # taken, returns the error and never runs `callable`. - @spec with_image_lease(Hyper.Img.id(), Hyper.Vm.id(), (-> result)) :: result | {:error, term()} + @spec with_image_lease(Hyper.Img.id(), Hyper.Vm.Id.t(), (-> result)) :: result | {:error, term()} when result: var defp with_image_lease(img, vm_id, callable) do ttl = Db.Lease.default_ttl() @@ -104,7 +104,7 @@ defmodule Hyper.Node.Img do # Re-bump the lease forever at 1/3 of the TTL, until killed. Runs in a task for the # lifetime of `callable`; transient bump failures are swallowed so a DB hiccup # can't tear down the VM - the next tick retries. - @spec heartbeat(Hyper.Img.id(), Hyper.Vm.id(), Unit.Time.t()) :: no_return() + @spec heartbeat(Hyper.Img.id(), Hyper.Vm.Id.t(), Unit.Time.t()) :: no_return() defp heartbeat(img, vm_id, ttl) do Process.sleep(div(Unit.Time.as_ms(ttl), 3)) diff --git a/lib/hyper/node/img/mutable.ex b/lib/hyper/node/img/mutable.ex index 57de460b..eb426efb 100644 --- a/lib/hyper/node/img/mutable.ex +++ b/lib/hyper/node/img/mutable.ex @@ -29,7 +29,7 @@ defmodule Hyper.Node.Img.Mutable do @enforce_keys [:img_id, :vm_id] defstruct [:img_id, :vm_id] - @type t :: %__MODULE__{img_id: Hyper.Img.id(), vm_id: Hyper.Vm.id()} + @type t :: %__MODULE__{img_id: Hyper.Img.id(), vm_id: Hyper.Vm.Id.t()} end defmodule State do @@ -129,7 +129,7 @@ defmodule Hyper.Node.Img.Mutable do end @doc false - @spec dm_name(Hyper.Vm.id()) :: String.t() + @spec dm_name(Hyper.Vm.Id.t()) :: String.t() def dm_name(vm_id), do: "hyper-rw-#{sanitize(vm_id)}" defp sanitize(id), do: String.replace(id, ~r/[^A-Za-z0-9._-]/, "_") diff --git a/lib/hyper/vm.ex b/lib/hyper/vm.ex index 0b99da8d..3001e41c 100644 --- a/lib/hyper/vm.ex +++ b/lib/hyper/vm.ex @@ -10,7 +10,6 @@ defmodule Hyper.Vm do @dialyzer {:nowarn_function, [fast_fork: 1, fork: 1]} @type t :: pid() - @type id :: String.t() @typedoc """ What a VM boots from: explicit, already-jail-visible artifact paths for a cold diff --git a/lib/hyper/vm/id.ex b/lib/hyper/vm/id.ex new file mode 100644 index 00000000..7c43d29e --- /dev/null +++ b/lib/hyper/vm/id.ex @@ -0,0 +1,27 @@ +defmodule Hyper.Vm.Id do + @moduledoc """ + A microVM id and its generator. + + An id is a `v` prefix followed by lowercase base32 of 10 random bytes, charset + `[a-z2-7]` - alphanumeric only, no `-`, `_`, or other punctuation. That charset + is the intersection of three independent constraints the id must satisfy at + once: + + * firecracker rejects `_` in an instance id (`InvalidInstanceId`); + * dm/jailer names must not start with `-`; + * registry keys and chroot path components stay trivially safe. + """ + + @type t :: String.t() + + @doc """ + Generate a fresh, random VM id (see the module doc for the charset contract). + + The previous base64url encoding emitted `-` and `_`, so it could produce ids + firecracker refused at boot (`Invalid char (_)`). + """ + @spec generate() :: t() + def generate do + "v" <> Base.encode32(:crypto.strong_rand_bytes(10), padding: false, case: :lower) + end +end diff --git a/test/hyper/vm/id_test.exs b/test/hyper/vm/id_test.exs new file mode 100644 index 00000000..5f82fb1e --- /dev/null +++ b/test/hyper/vm/id_test.exs @@ -0,0 +1,20 @@ +defmodule Hyper.Vm.IdTest do + @moduledoc """ + The charset contract of `Hyper.Vm.Id.generate/0`. The load-bearing invariant is + the refusal property: a generated id is *always* strictly alphanumeric, so it + can never carry a char that firecracker (`_`) or dm/jailer names (`-`) reject. + """ + + use ExUnit.Case, async: true + use ExUnitProperties + + alias Hyper.Vm.Id + + property "generate/0 produces a `v`-prefixed, strictly alphanumeric id" do + check all(_ <- StreamData.constant(nil)) do + id = Id.generate() + assert id =~ ~r/\A[A-Za-z0-9]+\z/ + assert String.starts_with?(id, "v") + end + end +end From c2bf12e56c253b3abf22ecb1c1758fe15c82a42b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Sat, 27 Jun 2026 18:57:25 +0000 Subject: [PATCH 2/2] style(node): reflow with_image_lease spec after Vm.Id rename The Hyper.Vm.id() -> Hyper.Vm.Id.t() rename pushed the with_image_lease/3 @spec past the line limit; reflow it so mix format --check-formatted (the first CI gate) passes. --- lib/hyper/node/img.ex | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/hyper/node/img.ex b/lib/hyper/node/img.ex index 3f2ed123..53bf2ae6 100644 --- a/lib/hyper/node/img.ex +++ b/lib/hyper/node/img.ex @@ -84,7 +84,8 @@ defmodule Hyper.Node.Img do # even if `callable` raises. A background task re-bumps the lease for the whole # run, so a long-lived VM never lets its claim lapse. If the lease cannot be # taken, returns the error and never runs `callable`. - @spec with_image_lease(Hyper.Img.id(), Hyper.Vm.Id.t(), (-> result)) :: result | {:error, term()} + @spec with_image_lease(Hyper.Img.id(), Hyper.Vm.Id.t(), (-> result)) :: + result | {:error, term()} when result: var defp with_image_lease(img, vm_id, callable) do ttl = Db.Lease.default_ttl()