Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions deploy-aws
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# endregion

import os
import re
import sys

import click
Expand Down Expand Up @@ -100,6 +101,35 @@ class DeployAWSCommand(DeployCommand):

return value

@staticmethod
def availability_zone_callback(ctx, param, value):
"""
Called after parsing --availability-zone option.
Accepts a full AZ name ("us-west-2b"), a bare zone letter ("b"),
or an empty value (auto-select the first zone offering the instance type).
"""

value = (value or "").strip().lower()

if value == "":
return ""

region = ctx.params.get("region", "")

# accept a bare zone suffix (e.g. "b") and prefix it with the region
if region and re.fullmatch(r"[a-z]", value):
value = f"{region}{value}"

# sanity check: the zone must belong to the selected region
if region and not value.startswith(region):
raise click.BadParameter(
colorize_error(
f'Availability zone "{value}" is not in region "{region}".'
)
)

return value

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

Expand Down Expand Up @@ -144,6 +174,27 @@ class DeployAWSCommand(DeployCommand):
),
)

# --availability-zone
self.params.insert(
# insert before --ingress-cidrs option (and after --region, so the
# region is parsed before the AZ callback runs)
self.param_index("ingress_cidrs"),
click.core.Option(
("--availability-zone", "--az"),
default="",
show_default=True,
callback=DeployAWSCommand.availability_zone_callback,
prompt=colorize_prompt(
"* Availability Zone (e.g. us-west-2b, or just \"b\"; "
"leave empty to auto-select. Use this to route around "
'"InsufficientInstanceCapacity" errors in a given zone)'
),
help="AWS availability zone for the instance, e.g. 'us-west-2b'."
" Leave empty to auto-select the first zone that offers the"
" instance type. Useful to avoid per-zone GPU capacity shortages.",
),
)

# defaults

self.params[self.param_index("from_image")].default = config[
Expand Down Expand Up @@ -179,6 +230,7 @@ class AWSDeployer(Deployer):
self.create_tfvars(
{
"region": self.params["region"],
"availability_zone": self.params.get("availability_zone", ""),
}
)

Expand Down
13 changes: 12 additions & 1 deletion src/terraform/aws/isaac-workstation/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,23 @@ data "aws_ec2_instance_type_offerings" "zones" {
location_type = "availability-zone"
}

locals {
# availability zones that offer the requested instance type, sorted
offered_zones = sort(data.aws_ec2_instance_type_offerings.zones.locations)

# use the explicitly requested availability zone when provided, otherwise
# fall back to the first zone that offers the instance type. an explicit AZ
# lets the user route around per-zone GPU capacity shortages
# (InsufficientInstanceCapacity), which vary over time and by zone.
availability_zone = var.availability_zone != "" ? var.availability_zone : try(local.offered_zones[0], "not-available")
}

# create a subnet for the isaac-workstation instance

resource "aws_subnet" "subnet" {
# get a /24 block from vpc cidr
cidr_block = cidrsubnet(var.vpc.cidr_block, 8, 3)
availability_zone = try(sort(data.aws_ec2_instance_type_offerings.zones.locations)[0], "not-available")
availability_zone = local.availability_zone
vpc_id = var.vpc.id
map_public_ip_on_launch = true

Expand Down
7 changes: 7 additions & 0 deletions src/terraform/aws/isaac-workstation/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ variable "region" {
type = string
}

# optional availability zone (e.g. "us-west-2b"); empty = auto-select the
# first zone that offers the instance type
variable "availability_zone" {
default = ""
type = string
}

variable "from_image" {
default = true
type = bool
Expand Down
1 change: 1 addition & 0 deletions src/terraform/aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ module "isaac_workstation" {
instance_type = var.isaac_workstation_instance_type
from_image = var.from_image
region = var.region
availability_zone = var.availability_zone
ssh_port = var.ssh_port
deployment_name = var.deployment_name
ingress_cidrs = var.ingress_cidrs
Expand Down
6 changes: 6 additions & 0 deletions src/terraform/aws/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ variable "region" {
type = string
}

# optional availability zone (e.g. "us-west-2b"); empty = auto-select
variable "availability_zone" {
default = ""
type = string
}

variable "from_image" {
default = false
type = bool
Expand Down