Source code for sagemaker.hyperpod.inference.config.hp_endpoint_config

   1from pydantic import BaseModel, ConfigDict, Field
   2from typing import Any, Optional, List, Dict, Union, Literal
   3
   4

[docs]
   5class Dimensions(BaseModel):
   6    model_config = ConfigDict(extra="forbid", populate_by_name=True)
   7
   8    name: str = Field(description="CloudWatch Metric dimension name")
   9    value: str = Field(description="CloudWatch Metric dimension value")

  10
  11

[docs]
  12class CloudWatchTrigger(BaseModel):
  13    model_config = ConfigDict(extra="forbid", populate_by_name=True)
  14
  15    """CloudWatch metric trigger to use for autoscaling"""
  16
  17
  18    activationTargetValue: Optional[float] = Field(
  19        default=0,
  20        alias="activation_target_value",
  21        description="Activation Value for CloudWatch metric to scale from 0 to 1. Only applicable if minReplicaCount = 0",
  22    )
  23    dimensions: Optional[List[Dimensions]] = Field(
  24        default=None, description="Dimensions for Cloudwatch metrics"
  25    )
  26    metricCollectionPeriod: Optional[int] = Field(
  27        default=300,
  28        alias="metric_collection_period",
  29        description="Defines the Period for CloudWatch query",
  30    )
  31    metricCollectionStartTime: Optional[int] = Field(
  32        default=300,
  33        alias="metric_collection_start_time",
  34        description="Defines the StartTime for CloudWatch query",
  35    )
  36    metricName: Optional[str] = Field(
  37        default=None,
  38        alias="metric_name",
  39        description="Metric name to query for Cloudwatch trigger",
  40    )
  41    metricStat: Optional[str] = Field(
  42        default="Average",
  43        alias="metric_stat",
  44        description="Statistics metric to be used by Trigger. Used to define Stat for CloudWatch query. Default is Average.",
  45    )
  46    metricType: Optional[Literal["Value", "Average"]] = Field(
  47        default="Average",
  48        alias="metric_type",
  49        description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value",
  50    )
  51    minValue: Optional[float] = Field(
  52        default=0,
  53        alias="min_value",
  54        description="Minimum metric value used in case of empty response from CloudWatch. Default is 0.",
  55    )
  56    name: Optional[str] = Field(
  57        default=None, description="Name for the CloudWatch trigger"
  58    )
  59    namespace: Optional[str] = Field(
  60        default=None, description="AWS CloudWatch namespace for metric"
  61    )
  62    targetValue: Optional[float] = Field(
  63        default=None,
  64        alias="target_value",
  65        description="TargetValue for CloudWatch metric",
  66    )
  67    useCachedMetrics: Optional[bool] = Field(
  68        default=True,
  69        alias="use_cached_metrics",
  70        description="Enable caching of metric values during polling interval. Default is true",
  71    )

  72
  73

[docs]
  74class CloudWatchTriggerList(BaseModel):
  75    model_config = ConfigDict(extra="forbid", populate_by_name=True)
  76
  77    activationTargetValue: Optional[float] = Field(
  78        default=0,
  79        alias="activation_target_value",
  80        description="Activation Value for CloudWatch metric to scale from 0 to 1. Only applicable if minReplicaCount = 0",
  81    )
  82    dimensions: Optional[List[Dimensions]] = Field(
  83        default=None, description="Dimensions for Cloudwatch metrics"
  84    )
  85    metricCollectionPeriod: Optional[int] = Field(
  86        default=300,
  87        alias="metric_collection_period",
  88        description="Defines the Period for CloudWatch query",
  89    )
  90    metricCollectionStartTime: Optional[int] = Field(
  91        default=300,
  92        alias="metric_collection_start_time",
  93        description="Defines the StartTime for CloudWatch query",
  94    )
  95    metricName: Optional[str] = Field(
  96        default=None,
  97        alias="metric_name",
  98        description="Metric name to query for Cloudwatch trigger",
  99    )
 100    metricStat: Optional[str] = Field(
 101        default="Average",
 102        alias="metric_stat",
 103        description="Statistics metric to be used by Trigger. Used to define Stat for CloudWatch query. Default is Average.",
 104    )
 105    metricType: Optional[Literal["Value", "Average"]] = Field(
 106        default="Average",
 107        alias="metric_type",
 108        description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value",
 109    )
 110    minValue: Optional[float] = Field(
 111        default=0,
 112        alias="min_value",
 113        description="Minimum metric value used in case of empty response from CloudWatch. Default is 0.",
 114    )
 115    name: Optional[str] = Field(
 116        default=None, description="Name for the CloudWatch trigger"
 117    )
 118    namespace: Optional[str] = Field(
 119        default=None, description="AWS CloudWatch namespace for metric"
 120    )
 121    targetValue: Optional[float] = Field(
 122        default=None,
 123        alias="target_value",
 124        description="TargetValue for CloudWatch metric",
 125    )
 126    useCachedMetrics: Optional[bool] = Field(
 127        default=True,
 128        alias="use_cached_metrics",
 129        description="Enable caching of metric values during polling interval. Default is true",
 130    )

 131
 132

[docs]
 133class PrometheusTrigger(BaseModel):
 134    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 135
 136    """Prometheus metric trigger to use for autoscaling"""
 137
 138
 139    activationTargetValue: Optional[float] = Field(
 140        default=0,
 141        alias="activation_target_value",
 142        description="Activation Value for Prometheus metric to scale from 0 to 1. Only applicable if minReplicaCount = 0",
 143    )
 144    customHeaders: Optional[str] = Field(
 145        default=None,
 146        alias="custom_headers",
 147        description="Custom headers to include while querying the prometheus endpoint.",
 148    )
 149    metricType: Optional[Literal["Value", "Average"]] = Field(
 150        default="Average",
 151        alias="metric_type",
 152        description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value",
 153    )
 154    name: Optional[str] = Field(
 155        default=None, description="Name for the Prometheus trigger"
 156    )
 157    namespace: Optional[str] = Field(
 158        default=None, description="Namespace for namespaced queries"
 159    )
 160    query: Optional[str] = Field(
 161        default=None, description="PromQLQuery for the metric."
 162    )
 163    serverAddress: Optional[str] = Field(
 164        default=None,
 165        alias="server_address",
 166        description="Server address for AMP workspace",
 167    )
 168    targetValue: Optional[float] = Field(
 169        default=None,
 170        alias="target_value",
 171        description="Target metric value for scaling",
 172    )
 173    useCachedMetrics: Optional[bool] = Field(
 174        default=True,
 175        alias="use_cached_metrics",
 176        description="Enable caching of metric values during polling interval. Default is true",
 177    )

 178
 179

[docs]
 180class PrometheusTriggerList(BaseModel):
 181    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 182
 183    activationTargetValue: Optional[float] = Field(
 184        default=0,
 185        alias="activation_target_value",
 186        description="Activation Value for Prometheus metric to scale from 0 to 1. Only applicable if minReplicaCount = 0",
 187    )
 188    customHeaders: Optional[str] = Field(
 189        default=None,
 190        alias="custom_headers",
 191        description="Custom headers to include while querying the prometheus endpoint.",
 192    )
 193    metricType: Optional[Literal["Value", "Average"]] = Field(
 194        default="Average",
 195        alias="metric_type",
 196        description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value",
 197    )
 198    name: Optional[str] = Field(
 199        default=None, description="Name for the Prometheus trigger"
 200    )
 201    namespace: Optional[str] = Field(
 202        default=None, description="Namespace for namespaced queries"
 203    )
 204    query: Optional[str] = Field(
 205        default=None, description="PromQLQuery for the metric."
 206    )
 207    serverAddress: Optional[str] = Field(
 208        default=None,
 209        alias="server_address",
 210        description="Server address for AMP workspace",
 211    )
 212    targetValue: Optional[float] = Field(
 213        default=None,
 214        alias="target_value",
 215        description="Target metric value for scaling",
 216    )
 217    useCachedMetrics: Optional[bool] = Field(
 218        default=True,
 219        alias="use_cached_metrics",
 220        description="Enable caching of metric values during polling interval. Default is true",
 221    )

 222
 223

[docs]
 224class AutoScalingSpec(BaseModel):
 225    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 226
 227    cloudWatchTrigger: Optional[CloudWatchTrigger] = Field(
 228        default=None,
 229        alias="cloud_watch_trigger",
 230        description="CloudWatch metric trigger to use for autoscaling",
 231    )
 232    cloudWatchTriggerList: Optional[List[CloudWatchTriggerList]] = Field(
 233        default=None,
 234        alias="cloud_watch_trigger_list",
 235        description="Multiple CloudWatch metric triggers to use for autoscaling. Takes priority over CloudWatchTrigger if both are provided.",
 236    )
 237    cooldownPeriod: Optional[int] = Field(
 238        default=300,
 239        alias="cooldown_period",
 240        description="The period to wait after the last trigger reported active before scaling the resource back to 0. Default 300 seconds.",
 241    )
 242    initialCooldownPeriod: Optional[int] = Field(
 243        default=300,
 244        alias="initial_cooldown_period",
 245        description="The delay before the cooldownPeriod starts after the initial creation of the ScaledObject. Default 300 seconds.",
 246    )
 247    maxReplicaCount: Optional[int] = Field(
 248        default=5,
 249        alias="max_replica_count",
 250        description="The maximum number of model pods to scale to. Default 5.",
 251    )
 252    minReplicaCount: Optional[int] = Field(
 253        default=1,
 254        alias="min_replica_count",
 255        description="The minimum number of model pods to scale down to. Default 1.",
 256    )
 257    pollingInterval: Optional[int] = Field(
 258        default=30,
 259        alias="polling_interval",
 260        description="This is the interval to check each trigger on. Default 30 seconds.",
 261    )
 262    prometheusTrigger: Optional[PrometheusTrigger] = Field(
 263        default=None,
 264        alias="prometheus_trigger",
 265        description="Prometheus metric trigger to use for autoscaling",
 266    )
 267    prometheusTriggerList: Optional[List[PrometheusTriggerList]] = Field(
 268        default=None,
 269        alias="prometheus_trigger_list",
 270        description="Multiple Prometheus metric triggers to use for autoscaling. Takes priority over PrometheusTrigger if both are provided.",
 271    )
 272    scaleDownStabilizationTime: Optional[int] = Field(
 273        default=300,
 274        alias="scale_down_stabilization_time",
 275        description="The time window to stabilize for HPA before scaling down. Default 300 seconds.",
 276    )
 277    scaleUpStabilizationTime: Optional[int] = Field(
 278        default=0,
 279        alias="scale_up_stabilization_time",
 280        description="The time window to stabilize for HPA before scaling up. Default 0 seconds.",
 281    )

 282
 283

[docs]
 284class Kubernetes(BaseModel):
 285    """User-provided customizations for the inference pod."""
 286
 287    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 288
 289    initContainers: Optional[List[Dict[str, Any]]] = Field(
 290        default=None, alias="init_containers",
 291        description="Init containers to run before the inference server starts.",
 292    )
 293    schedulerName: Optional[str] = Field(
 294        default=None, alias="scheduler_name",
 295        description="Name of the scheduler to use for pod scheduling.",
 296    )
 297    serviceAccountName: Optional[str] = Field(
 298        default=None,
 299        alias="service_account_name",
 300        description="Name of the Kubernetes ServiceAccount to use for the inference pod. If not specified, the namespace's default service account will be used. This is useful for providing AWS credentials via IRSA to init containers or the worker.",
 301    )
 302    volumes: Optional[List[Dict[str, Any]]] = Field(
 303        default=None,
 304        description="Additional volumes to add to the pod spec.",
 305    )

 306
 307

[docs]
 308class NodeSelectorRequirement(BaseModel):
 309    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 310
 311    key: str
 312    operator: str
 313    values: Optional[List[str]] = None

 314
 315

[docs]
 316class NodeSelectorTerm(BaseModel):
 317    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 318
 319    matchExpressions: Optional[List[NodeSelectorRequirement]] = Field(
 320        default=None, alias="match_expressions"
 321    )
 322    matchFields: Optional[List[NodeSelectorRequirement]] = Field(
 323        default=None, alias="match_fields"
 324    )

 325
 326

[docs]
 327class PreferredSchedulingTerm(BaseModel):
 328    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 329
 330    preference: NodeSelectorTerm
 331    weight: int

 332
 333

[docs]
 334class NodeSelector(BaseModel):
 335    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 336
 337    nodeSelectorTerms: List[NodeSelectorTerm] = Field(alias="node_selector_terms")

 338
 339

[docs]
 340class NodeAffinity(BaseModel):
 341    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 342
 343    preferredDuringSchedulingIgnoredDuringExecution: Optional[
 344        List[PreferredSchedulingTerm]
 345    ] = Field(default=None, alias="preferred_during_scheduling_ignored_during_execution")
 346    requiredDuringSchedulingIgnoredDuringExecution: Optional[NodeSelector] = Field(
 347        default=None, alias="required_during_scheduling_ignored_during_execution"
 348    )

 349
 350

[docs]
 351class CustomCertificateConfig(BaseModel):
 352    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 353
 354    acmArn: str = Field(alias="acm_arn", description="ACM certificate ARN")
 355    domainName: str = Field(
 356        alias="domain_name",
 357        description="Domain name to use from the certificate.",
 358    )

 359
 360

[docs]
 361class Probe(BaseModel):
 362    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 363
 364    exec: Optional[Dict[str, Any]] = None
 365    failureThreshold: Optional[int] = Field(default=None, alias="failure_threshold")
 366    grpc: Optional[Dict[str, Any]] = None
 367    httpGet: Optional[Dict[str, Any]] = Field(default=None, alias="http_get")
 368    initialDelaySeconds: Optional[int] = Field(
 369        default=None, alias="initial_delay_seconds"
 370    )
 371    periodSeconds: Optional[int] = Field(default=None, alias="period_seconds")
 372    successThreshold: Optional[int] = Field(default=None, alias="success_threshold")
 373    tcpSocket: Optional[Dict[str, Any]] = Field(default=None, alias="tcp_socket")
 374    terminationGracePeriodSeconds: Optional[int] = Field(
 375        default=None, alias="termination_grace_period_seconds"
 376    )
 377    timeoutSeconds: Optional[int] = Field(default=None, alias="timeout_seconds")

 378
 379

[docs]
 380class Probes(BaseModel):
 381    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 382
 383    livenessProbe: Optional[Probe] = Field(default=None, alias="liveness_probe")
 384    readinessProbe: Optional[Probe] = Field(default=None, alias="readiness_probe")
 385    startupProbe: Optional[Probe] = Field(default=None, alias="startup_probe")

 386
 387

[docs]
 388class RequestLimits(BaseModel):
 389    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 390
 391    maxConcurrentRequests: Optional[int] = Field(
 392        default=None, alias="max_concurrent_requests"
 393    )
 394    maxQueueSize: Optional[int] = Field(default=None, alias="max_queue_size")
 395    overflowStatusCode: Optional[int] = Field(
 396        default=429, alias="overflow_status_code"
 397    )

 398
 399

[docs]
 400class IntelligentRoutingSpec(BaseModel):
 401    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 402
 403    """Configuration for intelligent routing This feature is currently not supported for existing deployments. Adding this configuration to an existing deployment will be rejected."""
 404
 405
 406    autoScalingSpec: Optional[AutoScalingSpec] = Field(
 407        default=None, alias="auto_scaling_spec"
 408    )
 409    enabled: Optional[bool] = Field(
 410        default=False, description="Once set, the enabled field cannot be modified"
 411    )
 412    routingStrategy: Optional[
 413        Literal["prefixaware", "kvaware", "session", "roundrobin"]
 414    ] = Field(default="prefixaware", alias="routing_strategy")

 415
 416

[docs]
 417class L2CacheSpec(BaseModel):
 418    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 419
 420    """Configuration for providing L2 Cache offloading"""
 421
 422
 423    l2CacheBackend: Optional[str] = Field(
 424        default=None,
 425        alias="l2_cache_backend",
 426        description="L2 cache backend type. Required when L2CacheSpec is provided.",
 427    )
 428    l2CacheLocalUrl: Optional[str] = Field(
 429        default=None,
 430        alias="l2_cache_local_url",
 431        description="Provide the L2 cache URL to local storage",
 432    )

 433
 434

[docs]
 435class KvCacheSpec(BaseModel):
 436    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 437
 438    """Configuration for KV Cache specification By default L1CacheOffloading will be enabled"""
 439
 440
 441    cacheConfigFile: Optional[str] = Field(
 442        default=None,
 443        alias="cache_config_file",
 444        description="KVCache configuration file path. If specified, override other configurations provided via spec",
 445    )
 446    enableL1Cache: Optional[bool] = Field(
 447        default=True, alias="enable_l1_cache", description="Enable CPU offloading"
 448    )
 449    enableL2Cache: Optional[bool] = Field(default=False, alias="enable_l2_cache")
 450    l2CacheSpec: Optional[L2CacheSpec] = Field(
 451        default=None,
 452        alias="l2_cache_spec",
 453        description="Configuration for providing L2 Cache offloading",
 454    )

 455
 456

[docs]
 457class LoadBalancer(BaseModel):
 458    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 459
 460    """Configuration for Application Load Balancer"""
 461
 462
 463    healthCheckPath: Optional[str] = Field(
 464        default="/ping",
 465        alias="health_check_path",
 466        description="Health check path for the ALB target group. Defaults to /ping if not specified.",
 467    )
 468    routingAlgorithm: Optional[Literal["least_outstanding_requests", "round_robin"]] = (
 469        Field(
 470            default="least_outstanding_requests",
 471            alias="routing_algorithm",
 472            description="Routing algorithm for the ALB target group (least_oustanding_requests or round_robin)",
 473        )
 474    )

 475
 476

[docs]
 477class ModelMetrics(BaseModel):
 478    """Configuration for model container metrics scraping"""
 479
 480    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 481
 482    path: Optional[str] = Field(
 483        default="/metrics", description="Path where the model exposes metrics"
 484    )
 485    port: Optional[int] = Field(
 486        default=8080,
 487        description="Port where the model exposes metrics. If not specified, a default port will be used.",
 488    )

 489
 490

[docs]
 491class Metrics(BaseModel):
 492    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 493
 494    """Configuration for metrics collection and exposure"""
 495
 496
 497    enabled: Optional[bool] = Field(
 498        default=True, description="Enable metrics collection for this model deployment"
 499    )
 500    metricsScrapeIntervalSeconds: Optional[int] = Field(
 501        default=15,
 502        alias="metrics_scrape_interval_seconds",
 503        description="Scrape interval in seconds for metrics collection from sidecar and model container.",
 504    )
 505    modelMetrics: Optional[ModelMetrics] = Field(
 506        default=None,
 507        alias="model_metrics",
 508        description="Configuration for model container metrics scraping",
 509    )

 510
 511

[docs]
 512class FsxStorage(BaseModel):
 513    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 514
 515    dnsName: Optional[str] = Field(
 516        default=None, alias="dns_name", description="FSX File System DNS Name"
 517    )
 518    fileSystemId: str = Field(alias="file_system_id", description="FSX File System ID")
 519    mountName: Optional[str] = Field(
 520        default=None, alias="mount_name", description="FSX File System Mount Name"
 521    )

 522
 523

[docs]
 524class S3Storage(BaseModel):
 525    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 526
 527    bucketName: str = Field(alias="bucket_name", description="S3 bucket location")
 528    region: str = Field(description="S3 bucket region")

 529
 530

[docs]
 531class TokenSecretRef(BaseModel):
 532    """Reference to a Kubernetes Secret containing the HuggingFace API token."""
 533
 534    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 535
 536    key: str = Field(
 537        description="The key of the secret to select from. Must be a valid secret key."
 538    )
 539    name: Optional[str] = Field(
 540        default="",
 541        description="Name of the referent.",
 542    )
 543    optional: Optional[bool] = Field(
 544        default=None,
 545        description="Specify whether the Secret or its key must be defined",
 546    )

 547
 548

[docs]
 549class HuggingFaceModel(BaseModel):
 550    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 551
 552    """HuggingFace model configuration. Required when modelSourceType is huggingface."""
 553
 554
 555    commitSHA: Optional[str] = Field(
 556        default=None,
 557        alias="commit_sha",
 558        description="Git commit SHA for the model revision. Must be a full 40-character lowercase hex SHA. If not provided, the operator defaults to main branch.",
 559    )
 560    modelId: str = Field(
 561        alias="model_id",
 562        description='HuggingFace Hub model identifier in org/model format (e.g. "meta-llama/Llama-3.1-8B-Instruct").',
 563    )
 564    tokenSecretRef: Optional[TokenSecretRef] = Field(
 565        default=None,
 566        alias="token_secret_ref",
 567        description="Reference to a Kubernetes Secret containing the HuggingFace API token.",
 568    )

 569
 570

[docs]
 571class ModelSourceConfig(BaseModel):
 572    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 573
 574    fsxStorage: Optional[FsxStorage] = Field(default=None, alias="fsx_storage")
 575    huggingFaceModel: Optional[HuggingFaceModel] = Field(
 576        default=None,
 577        alias="hugging_face_model",
 578        description='HuggingFace model configuration. Required when modelSourceType is "huggingface".',
 579    )
 580    modelLocation: Optional[str] = Field(
 581        default=None,
 582        alias="model_location",
 583        description="Specific location where the model data exists",
 584    )
 585    modelSourceType: Literal["fsx", "s3", "huggingface", "kubernetesVolume"] = Field(
 586        alias="model_source_type"
 587    )
 588    prefetchEnabled: Optional[bool] = Field(
 589        default=False,
 590        alias="prefetch_enabled",
 591        description="In case the model seems to fit within the instance's memory (VRAM), this option can be used to pre-fetch the model to RAM and then the inference server will load to the GPU/CPU device thereafter.",
 592    )
 593    s3Storage: Optional[S3Storage] = Field(default=None, alias="s3_storage")

 594
 595

[docs]
 596class Tags(BaseModel):
 597    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 598
 599    name: str
 600    value: str

 601
 602

[docs]
 603class TlsConfig(BaseModel):
 604    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 605
 606    """Configurations for TLS"""
 607
 608
 609    customCertificateConfig: Optional[CustomCertificateConfig] = Field(
 610        default=None, alias="custom_certificate_config",
 611        description="Customer-provided ACM certificate configuration",
 612    )
 613    tlsCertificateOutputS3Uri: Optional[str] = Field(
 614        default=None, alias="tls_certificate_output_s3_uri"
 615    )

 616
 617

[docs]
 618class ConfigMapKeyRef(BaseModel):
 619    """Selects a key of a ConfigMap."""
 620
 621    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 622
 623    key: str = Field(description="The key to select.")
 624    name: Optional[str] = Field(
 625        default="",
 626        description="Name of the referent. This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names",
 627    )
 628    optional: Optional[bool] = Field(
 629        default=None,
 630        description="Specify whether the ConfigMap or its key must be defined",
 631    )

 632
 633

[docs]
 634class FieldRef(BaseModel):
 635    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 636
 637    """Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs."""
 638
 639
 640    apiVersion: Optional[str] = Field(
 641        default=None,
 642        alias="api_version",
 643        description='Version of the schema the FieldPath is written in terms of, defaults to "v1".',
 644    )
 645    fieldPath: str = Field(
 646        alias="field_path",
 647        description="Path of the field to select in the specified API version.",
 648    )

 649
 650

[docs]
 651class ResourceFieldRef(BaseModel):
 652    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 653
 654    """Selects a resource of the container: only resources limits and requests (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported."""
 655
 656
 657    containerName: Optional[str] = Field(
 658        default=None,
 659        alias="container_name",
 660        description="Container name: required for volumes, optional for env vars",
 661    )
 662    divisor: Optional[Union[int, str]] = Field(
 663        default=None,
 664        description='Specifies the output format of the exposed resources, defaults to "1"',
 665    )
 666    resource: str = Field(description="Required: resource to select")

 667
 668

[docs]
 669class SecretKeyRef(BaseModel):
 670    """Selects a key of a secret in the pod's namespace"""
 671
 672    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 673
 674    key: str = Field(
 675        description="The key of the secret to select from.  Must be a valid secret key."
 676    )
 677    name: Optional[str] = Field(
 678        default="",
 679        description="Name of the referent. This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names",
 680    )
 681    optional: Optional[bool] = Field(
 682        default=None,
 683        description="Specify whether the Secret or its key must be defined",
 684    )

 685
 686

[docs]
 687class ValueFrom(BaseModel):
 688    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 689
 690    """Source for the environment variable's value. Cannot be used if value is not empty."""
 691
 692
 693    configMapKeyRef: Optional[ConfigMapKeyRef] = Field(
 694        default=None,
 695        alias="config_map_key_ref",
 696        description="Selects a key of a ConfigMap.",
 697    )
 698    fieldRef: Optional[FieldRef] = Field(
 699        default=None,
 700        alias="field_ref",
 701        description="Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.",
 702    )
 703    resourceFieldRef: Optional[ResourceFieldRef] = Field(
 704        default=None,
 705        alias="resource_field_ref",
 706        description="Selects a resource of the container: only resources limits and requests (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.",
 707    )
 708    secretKeyRef: Optional[SecretKeyRef] = Field(
 709        default=None,
 710        alias="secret_key_ref",
 711        description="Selects a key of a secret in the pod's namespace",
 712    )

 713
 714

[docs]
 715class EnvironmentVariables(BaseModel):
 716    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 717
 718    """EnvVar represents an environment variable present in a Container."""
 719
 720
 721    name: str = Field(
 722        description="Name of the environment variable. Must be a C_IDENTIFIER."
 723    )
 724    value: Optional[str] = Field(
 725        default=None,
 726        description='Variable references $(VAR_NAME) are expanded using the previously defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".',
 727    )
 728    valueFrom: Optional[ValueFrom] = Field(
 729        default=None,
 730        alias="value_from",
 731        description="Source for the environment variable's value. Cannot be used if value is not empty.",
 732    )

 733
 734

[docs]
 735class ModelInvocationPort(BaseModel):
 736    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 737
 738    """Defines the port at which the model server will listen to the invocation requests."""
 739
 740
 741    containerPort: int = Field(
 742        alias="container_port",
 743        description="Port on which the model server will be listening",
 744    )
 745    name: Optional[str] = Field(
 746        default="http",
 747        description="This is name for the port within the deployed container where the model will listen. This will be referred to by the Load Balancer Service. This must be an IANA_SVC_NAME (for eg. http) and unique within the pod.",
 748    )

 749
 750

[docs]
 751class ModelVolumeMount(BaseModel):
 752    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 753
 754    """Defines the volume where model will be loaded"""
 755
 756
 757    mountPath: Optional[str] = Field(
 758        default="/opt/ml/model",
 759        alias="mount_path",
 760        description="This is the path within the container where the model data will be available for the inference server to load it to GPU,CPU or other device",
 761    )
 762    name: str = Field(description="Name of the model volume mount")

 763
 764

[docs]
 765class Claims(BaseModel):
 766    """ResourceClaim references one entry in PodSpec.ResourceClaims."""
 767
 768    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 769
 770    name: str = Field(
 771        description="Name must match the name of one entry in pod.spec.resourceClaims of the Pod where this field is used. It makes that resource available inside a container."
 772    )
 773    request: Optional[str] = Field(
 774        default=None,
 775        description="Request is the name chosen for a request in the referenced claim. If empty, everything from the claim is made available, otherwise only the result of this request.",
 776    )

 777
 778

[docs]
 779class Resources(BaseModel):
 780    """Defines the Resources in terms of CPU, GPU, Memory needed for the model to be deployed"""
 781
 782    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 783
 784    claims: Optional[List[Claims]] = Field(
 785        default=None,
 786        description="Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container.  This is an alpha field and requires enabling the DynamicResourceAllocation feature gate.  This field is immutable. It can only be set for containers.",
 787    )
 788    limits: Optional[Dict[str, Union[int, str]]] = Field(
 789        default=None,
 790        description="Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/",
 791    )
 792    requests: Optional[Dict[str, Union[int, str]]] = Field(
 793        default=None,
 794        description="Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. Requests cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/",
 795    )

 796
 797

[docs]
 798class Worker(BaseModel):
 799    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 800
 801    """Details of the worker"""
 802
 803
 804    args: Optional[List[str]] = Field(
 805        default=None, description="Defines the Arguments to the entrypoint."
 806    )
 807    command: Optional[List[str]] = Field(
 808        default=None,
 809        description="Defines the Command which is Entrypoint array. Not executed within a shell.",
 810    )
 811    environmentVariables: Optional[List[EnvironmentVariables]] = Field(
 812        default=None,
 813        alias="environment_variables",
 814        description="List of environment variables to set in the container. Cannot be updated.",
 815    )
 816    image: str = Field(description="The name of the inference server image to be used")
 817    modelInvocationPort: ModelInvocationPort = Field(
 818        alias="model_invocation_port",
 819        description="Defines the port at which the model server will listen to the invocation requests.",
 820    )
 821    modelVolumeMount: ModelVolumeMount = Field(
 822        alias="model_volume_mount",
 823        description="Defines the volume where model will be loaded",
 824    )
 825    probes: Optional[Probes] = Field(
 826        default=None,
 827        description="Configuration for container probes (liveness, readiness, startup)",
 828    )
 829    requestLimits: Optional[RequestLimits] = Field(
 830        default=None,
 831        alias="request_limits",
 832        description="Configuration for request limiting on the nginx sidecar proxy",
 833    )
 834    resources: Resources = Field(
 835        description="Defines the Resources in terms of CPU, GPU, Memory needed for the model to be deployed"
 836    )
 837    workingDir: Optional[str] = Field(
 838        default=None,
 839        alias="working_dir",
 840        description="Defines the working directory of container.",
 841    )

 842
 843

[docs]
 844class CaptureContentTypeHeader(BaseModel):
 845    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 846
 847    """Configuration for how to treat different content type headers during capture"""
 848
 849
 850    csvContentTypes: Optional[List[str]] = Field(
 851        default=None,
 852        alias="csv_content_types",
 853        description="List of content type headers to treat as CSV",
 854    )
 855    jsonContentTypes: Optional[List[str]] = Field(
 856        default=None,
 857        alias="json_content_types",
 858        description="List of content type headers to treat as JSON",
 859    )

 860
 861

[docs]
 862class CaptureOptions(BaseModel):
 863    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 864
 865    """CaptureOption defines what data to capture (input, output, or both)."""
 866
 867
 868    captureMode: Literal["Input", "Output"] = Field(
 869        alias="capture_mode", description="Capture mode: Input or Output"
 870    )

 871
 872

[docs]
 873class BufferConfig(BaseModel):
 874    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 875
 876    """Configuration for buffering and flushing captured data"""
 877
 878
 879    batchSize: Optional[int] = Field(
 880        default=10,
 881        alias="batch_size",
 882        description="Number of records to batch before writing to S3",
 883    )
 884    flushIntervalSeconds: Optional[int] = Field(
 885        default=60,
 886        alias="flush_interval_seconds",
 887        description="Flush interval in seconds",
 888    )

 889
 890

[docs]
 891class PayloadConfig(BaseModel):
 892    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 893
 894    """Configuration for payload size limits"""
 895
 896
 897    maxPayloadSizeKB: Optional[int] = Field(
 898        default=0,
 899        alias="max_payload_size_kb",
 900        description="Maximum payload size in KB to capture. 0 means no limit (capture full payload).",
 901    )

 902
 903

[docs]
 904class DataCaptureModelPod(BaseModel):
 905    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 906
 907    """Configuration for Model Pod level data capture (Tier 3)"""
 908
 909
 910    bufferConfig: Optional[BufferConfig] = Field(
 911        default=None,
 912        alias="buffer_config",
 913        description="Configuration for buffering and flushing captured data",
 914    )
 915    captureContentTypeHeader: Optional[CaptureContentTypeHeader] = Field(
 916        default=None,
 917        alias="capture_content_type_header",
 918        description="Configuration for how to treat different content type headers during capture",
 919    )
 920    captureOptions: Optional[List[CaptureOptions]] = Field(
 921        default=None,
 922        alias="capture_options",
 923        description="Capture options (Input, Output, or both). Defaults to [Input, Output] when enabled.",
 924    )
 925    enabled: bool = Field(description="Enable or disable model pod data capture")
 926    initialSamplingPercentage: Optional[int] = Field(
 927        default=None,
 928        alias="initial_sampling_percentage",
 929        description="Percentage of requests to capture (0-100). Defaults to 100 when enabled.",
 930    )
 931    kmsKeyId: Optional[str] = Field(
 932        default=None,
 933        alias="kms_key_id",
 934        description="Optional KMS key ID, ARN, alias name, or alias ARN for encrypting captured data",
 935    )
 936    payloadConfig: Optional[PayloadConfig] = Field(
 937        default=None,
 938        alias="payload_config",
 939        description="Configuration for payload size limits",
 940    )

 941
 942

[docs]
 943class DataCaptureSagemakerEndpoint(BaseModel):
 944    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 945
 946    """Configuration for SageMaker Endpoint level data capture (Tier 1)"""
 947
 948
 949    captureContentTypeHeader: Optional[CaptureContentTypeHeader] = Field(
 950        default=None,
 951        alias="capture_content_type_header",
 952        description="Configuration for how to treat different content type headers during capture",
 953    )
 954    captureOptions: Optional[List[CaptureOptions]] = Field(
 955        default=None,
 956        alias="capture_options",
 957        description="Capture options (Input, Output, or both). Defaults to [Input, Output] when enabled.",
 958    )
 959    enabled: bool = Field(
 960        description="Enable or disable SageMaker endpoint data capture"
 961    )
 962    initialSamplingPercentage: Optional[int] = Field(
 963        default=None,
 964        alias="initial_sampling_percentage",
 965        description="Percentage of requests to capture (0-100). Defaults to 100 when enabled.",
 966    )
 967    kmsKeyId: Optional[str] = Field(
 968        default=None,
 969        alias="kms_key_id",
 970        description="Optional KMS key ID, ARN, alias name, or alias ARN for encrypting captured data",
 971    )

 972
 973

[docs]
 974class DataCaptureLoadBalancer(BaseModel):
 975    """Configuration for LoadBalancer level data capture (Tier 2)"""
 976
 977    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 978
 979    enabled: bool = Field(description="Enable or disable load balancer access logs")

 980
 981

[docs]
 982class DataCapture(BaseModel):
 983    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 984
 985    """Configuration for data capture across multiple tiers (SageMaker, LoadBalancer, Model Pod)"""
 986
 987
 988    loadBalancer: Optional[DataCaptureLoadBalancer] = Field(
 989        default=None,
 990        alias="load_balancer",
 991        description="Configuration for LoadBalancer level data capture (Tier 2)",
 992    )
 993    modelPod: Optional[DataCaptureModelPod] = Field(
 994        default=None,
 995        alias="model_pod",
 996        description="Configuration for Model Pod level data capture (Tier 3)",
 997    )
 998    s3Uri: Optional[str] = Field(
 999        default=None,
1000        alias="s3_uri",
1001        description="Common S3 URI for all data capture tiers. Each tier will write to a specific prefix within this bucket.",
1002    )
1003    sagemakerEndpoint: Optional[DataCaptureSagemakerEndpoint] = Field(
1004        default=None,
1005        alias="sagemaker_endpoint",
1006        description="Configuration for SageMaker Endpoint level data capture (Tier 1)",
1007    )

1008
1009

[docs]
1010class DnsConfig(BaseModel):
1011    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1012
1013    """DNS automation configuration for Route53. Requires tlsConfig.customCertificateConfig to be set."""
1014
1015
1016    hostedZoneId: str = Field(
1017        alias="hosted_zone_id",
1018        description="Route53 Hosted Zone ID where the DNS record will be created.",
1019    )

1020
1021
1022class _HPEndpoint(BaseModel):
1023    """InferenceEndpointConfigSpec defines the desired state of InferenceEndpointConfig."""
1024
1025    model_config = ConfigDict(extra="ignore", populate_by_name=True)
1026
1027    InitialReplicaCount: Optional[int] = Field(
1028        default=None,
1029        alias="initial_replica_count",
1030        description="Number of desired pods. This is a pointer to distinguish between explicit zero and not specified. Defaults to 1.",
1031    )
1032    autoScalingSpec: Optional[AutoScalingSpec] = Field(
1033        default=None, alias="auto_scaling_spec"
1034    )
1035    dataCapture: Optional[DataCapture] = Field(
1036        default=None,
1037        alias="data_capture",
1038        description="Configuration for data capture across multiple tiers (SageMaker, LoadBalancer, Model Pod)",
1039    )
1040    dnsConfig: Optional[DnsConfig] = Field(
1041        default=None,
1042        alias="dns_config",
1043        description="DNS automation configuration for Route53. Requires tlsConfig.customCertificateConfig to be set.",
1044    )
1045    endpointName: Optional[str] = Field(
1046        default=None,
1047        alias="endpoint_name",
1048        description="Name used for Sagemaker Endpoint Name of sagemaker endpoint. Defaults to empty string which represents that Sagemaker endpoint will not be created.",
1049    )
1050    instanceType: Optional[str] = Field(
1051        default=None,
1052        alias="instance_type",
1053        description="Single instance type to deploy the model on. Mutually exclusive with instanceTypes.",
1054    )
1055    instanceTypes: Optional[List[str]] = Field(
1056        default=None,
1057        alias="instance_types",
1058        description="List of instance types to deploy the model on, in order of preference.",
1059    )
1060    intelligentRoutingSpec: Optional[IntelligentRoutingSpec] = Field(
1061        default=None,
1062        alias="intelligent_routing_spec",
1063        description="Configuration for intelligent routing This feature is currently not supported for existing deployments. Adding this configuration to an existing deployment will be rejected.",
1064    )
1065    invocationEndpoint: Optional[str] = Field(
1066        default="invocations",
1067        alias="invocation_endpoint",
1068        description="The invocation endpoint of the model server. http://<host>:<port>/ would be pre-populated based on the other fields. Please fill in the path after http://<host>:<port>/ specific to your model server.",
1069    )
1070    kvCacheSpec: Optional[KvCacheSpec] = Field(
1071        default=None,
1072        alias="kv_cache_spec",
1073        description="Configuration for KV Cache specification By default L1CacheOffloading will be enabled",
1074    )
1075    kubernetes: Optional[Kubernetes] = Field(
1076        default=None,
1077        description="User-provided customizations for the inference pod.",
1078    )
1079    loadBalancer: Optional[LoadBalancer] = Field(
1080        default=None,
1081        alias="load_balancer",
1082        description="Configuration for Application Load Balancer",
1083    )
1084    maxDeployTimeInSeconds: Optional[int] = Field(
1085        default=3600,
1086        alias="max_deploy_time_in_seconds",
1087        description="Maximum allowed time in seconds for the deployment to complete before timing out. Defaults to 1 hour (3600 seconds)",
1088    )
1089    metrics: Optional[Metrics] = Field(
1090        default=None, description="Configuration for metrics collection and exposure"
1091    )
1092    modelName: str = Field(
1093        alias="model_name",
1094        description="Name of model that will be created on Sagemaker",
1095    )
1096    modelSourceConfig: ModelSourceConfig = Field(alias="model_source_config")
1097    modelVersion: Optional[str] = Field(
1098        default=None,
1099        alias="model_version",
1100        description="Version of the model used in creating sagemaker endpoint",
1101    )
1102    nodeAffinity: Optional[NodeAffinity] = Field(
1103        default=None,
1104        alias="node_affinity",
1105        description="Custom node affinity configuration for advanced scheduling.",
1106    )
1107    replicas: Optional[int] = Field(
1108        default=1,
1109        description="The desired number of inference server replicas. Default 1.",
1110    )
1111    tags: Optional[List[Tags]] = Field(
1112        default=None,
1113        description="Mentions the tags to be added to the Sagemaker Endpoint",
1114    )
1115    tlsConfig: Optional[TlsConfig] = Field(
1116        default=None, alias="tls_config", description="Configurations for TLS"
1117    )
1118    worker: Worker = Field(description="Details of the worker")
1119
1120

[docs]
1121class Conditions(BaseModel):
1122    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1123
1124    """DeploymentCondition describes the state of a deployment at a certain point."""
1125
1126
1127    lastTransitionTime: Optional[str] = Field(
1128        default=None,
1129        alias="last_transition_time",
1130        description="Last time the condition transitioned from one status to another.",
1131    )
1132    lastUpdateTime: Optional[str] = Field(
1133        default=None,
1134        alias="last_update_time",
1135        description="The last time this condition was updated.",
1136    )
1137    message: Optional[str] = Field(
1138        default=None,
1139        description="A human readable message indicating details about the transition.",
1140    )
1141    reason: Optional[str] = Field(
1142        default=None, description="The reason for the condition's last transition."
1143    )
1144    status: str = Field(
1145        description="Status of the condition, one of True, False, Unknown."
1146    )
1147    type: str = Field(description="Type of deployment condition.")
1148    observedGeneration: Optional[int] = Field(
1149        default=None,
1150        alias="observed_generation",
1151        description="observedGeneration represents the .metadata.generation that the condition was set based upon. For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date with respect to the current state of the instance.",
1152    )

1153
1154

[docs]
1155class Status(BaseModel):
1156    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1157
1158    """Status of the Deployment Object"""
1159
1160
1161    availableReplicas: Optional[int] = Field(
1162        default=None,
1163        alias="available_replicas",
1164        description="Total number of available pods (ready for at least minReadySeconds) targeted by this deployment.",
1165    )
1166    collisionCount: Optional[int] = Field(
1167        default=None,
1168        alias="collision_count",
1169        description="Count of hash collisions for the Deployment. The Deployment controller uses this field as a collision avoidance mechanism when it needs to create the name for the newest ReplicaSet.",
1170    )
1171    conditions: Optional[List[Conditions]] = Field(
1172        default=None,
1173        description="Represents the latest available observations of a deployment's current state.",
1174    )
1175    observedGeneration: Optional[int] = Field(
1176        default=None,
1177        alias="observed_generation",
1178        description="The generation observed by the deployment controller.",
1179    )
1180    readyReplicas: Optional[int] = Field(
1181        default=None,
1182        alias="ready_replicas",
1183        description="readyReplicas is the number of pods targeted by this Deployment with a Ready Condition.",
1184    )
1185    replicas: Optional[int] = Field(
1186        default=None,
1187        description="Total number of non-terminated pods targeted by this deployment (their labels match the selector).",
1188    )
1189    terminatingReplicas: Optional[int] = Field(
1190        default=None,
1191        alias="terminating_replicas",
1192        description="Total number of terminating pods targeted by this deployment.",
1193    )
1194    unavailableReplicas: Optional[int] = Field(
1195        default=None,
1196        alias="unavailable_replicas",
1197        description="Total number of unavailable pods targeted by this deployment. This is the total number of pods that are still required for the deployment to have 100% available capacity. They may either be pods that are running but not yet available or pods that still have not been created.",
1198    )
1199    updatedReplicas: Optional[int] = Field(
1200        default=None,
1201        alias="updated_replicas",
1202        description="Total number of non-terminated pods targeted by this deployment that have the desired template spec.",
1203    )

1204
1205

[docs]
1206class DeploymentStatus(BaseModel):
1207    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1208
1209    """Details of the native kubernetes deployment that hosts the model"""
1210
1211
1212    deploymentObjectOverallState: Optional[str] = Field(
1213        default=None,
1214        alias="deployment_object_overall_state",
1215        description="Overall State of the Deployment Object",
1216    )
1217    lastUpdated: str = Field(alias="last_updated", description="Last Update Time")
1218    message: Optional[str] = Field(
1219        default=None,
1220        description="Message populated in the root CRD while updating the status of underlying Deployment",
1221    )
1222    name: str = Field(description="Name of the Deployment Object")
1223    reason: Optional[str] = Field(
1224        default=None,
1225        description="Reason populated in the root CRD while updating the status of underlying Deployment",
1226    )
1227    status: Optional[Status] = Field(
1228        default=None, description="Status of the Deployment Object"
1229    )

1230
1231

[docs]
1232class Sagemaker(BaseModel):
1233    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1234
1235    """Status of the SageMaker endpoint"""
1236
1237
1238    configArn: Optional[str] = Field(
1239        default=None,
1240        alias="config_arn",
1241        description="The Amazon Resource Name (ARN) of the endpoint configuration.",
1242    )
1243    endpointArn: Optional[str] = Field(
1244        default=None,
1245        alias="endpoint_arn",
1246        description="The Amazon Resource Name (ARN) of the SageMaker endpoint",
1247    )
1248    modelArn: Optional[str] = Field(
1249        default=None,
1250        alias="model_arn",
1251        description="The ARN of the model created in SageMaker.",
1252    )
1253    state: str = Field(description="The current state of the SageMaker endpoint")

1254
1255

[docs]
1256class Endpoints(BaseModel):
1257    """EndpointStatus contains the status of SageMaker endpoints"""
1258
1259    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1260
1261    sagemaker: Optional[Sagemaker] = Field(
1262        default=None, description="Status of the SageMaker endpoint"
1263    )

1264
1265

[docs]
1266class ModelMetricsStatus(BaseModel):
1267    """Status of model container metrics collection"""
1268
1269    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1270
1271    path: Optional[str] = Field(
1272        default=None, description="The path where metrics are available"
1273    )
1274    port: Optional[int] = Field(
1275        default=None, description="The port on which metrics are exposed"
1276    )

1277
1278

[docs]
1279class MetricsStatus(BaseModel):
1280    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1281
1282    """Status of metrics collection"""
1283
1284
1285    enabled: bool = Field(description="Whether metrics collection is enabled")
1286    errorMessage: Optional[str] = Field(
1287        default=None,
1288        alias="error_message",
1289        description="Error message if metrics collection is in error state",
1290    )
1291    metricsScrapeIntervalSeconds: Optional[int] = Field(
1292        default=None,
1293        alias="metrics_scrape_interval_seconds",
1294        description="Scrape interval in seconds for metrics collection from sidecar and model container.",
1295    )
1296    modelMetrics: Optional[ModelMetricsStatus] = Field(
1297        default=None,
1298        alias="model_metrics",
1299        description="Status of model container metrics collection",
1300    )
1301    state: Optional[str] = Field(
1302        default=None, description="Current state of metrics collection"
1303    )

1304
1305

[docs]
1306class TlsCertificate(BaseModel):
1307    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1308
1309    """CertificateStatus represents the status of TLS certificates"""
1310
1311
1312    certificateARN: Optional[str] = Field(
1313        default=None,
1314        alias="certificate_arn",
1315        description="The Amazon Resource Name (ARN) of the ACM certificate",
1316    )
1317    certificateDomainNames: Optional[List[str]] = Field(
1318        default=None,
1319        alias="certificate_domain_names",
1320        description="The certificate domain names that is attached to the certificate",
1321    )
1322    certificateHealth: Optional[Literal["Valid", "Expiring", "Expired"]] = Field(
1323        default=None,
1324        alias="certificate_health",
1325        description="Certificate health status",
1326    )
1327    certificateName: Optional[str] = Field(
1328        default=None,
1329        alias="certificate_name",
1330        description="The certificate name of cert manager",
1331    )
1332    importedCertificates: Optional[List[str]] = Field(
1333        default=None,
1334        alias="imported_certificates",
1335        description="Used for tracking the imported certificates to ACM",
1336    )
1337    issuerName: Optional[str] = Field(
1338        default=None, alias="issuer_name", description="The issuer name of cert manager"
1339    )
1340    lastCertExpiryTime: Optional[str] = Field(
1341        default=None,
1342        alias="last_cert_expiry_time",
1343        description="The last certificate expiry time",
1344    )
1345    tlsCertificateOutputS3Bucket: Optional[str] = Field(
1346        default=None,
1347        alias="tls_certificate_output_s3_bucket",
1348        description="S3 bucket that stores the certificate that needs to be trusted",
1349    )
1350    tlsCertificateS3Keys: Optional[List[str]] = Field(
1351        default=None,
1352        alias="tls_certificate_s3_keys",
1353        description="The output tls certificate S3 key that points to the .pem file",
1354    )

1355
1356

[docs]
1357class DnsStatus(BaseModel):
1358    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1359
1360    """Status of the operator-managed Route53 DNS record"""
1361
1362
1363    dnsHealth: Optional[Literal["Active", "Pending", "Error"]] = Field(
1364        default=None,
1365        alias="dns_health",
1366        description="DNS resolution status: Active, Pending, or Error.",
1367    )
1368    hostedZoneId: Optional[str] = Field(
1369        default=None,
1370        alias="hosted_zone_id",
1371        description="Route53 hosted zone ID.",
1372    )
1373    lastTransitionTime: Optional[str] = Field(
1374        default=None,
1375        alias="last_transition_time",
1376        description="When the status last transitioned, used for propagation timeout.",
1377    )
1378    managedByOperator: bool = Field(
1379        alias="managed_by_operator",
1380        description="Whether the operator manages this DNS record.",
1381    )
1382    message: Optional[str] = Field(
1383        default=None, description="Human-readable status or error message."
1384    )
1385    previousHostedZoneId: Optional[str] = Field(
1386        default=None,
1387        alias="previous_hosted_zone_id",
1388        description="Previous hosted zone ID, retained during domain/zone changes until cleanup completes.",
1389    )
1390    previousRecordName: Optional[str] = Field(
1391        default=None,
1392        alias="previous_record_name",
1393        description="Previous record name, retained during domain/zone changes until cleanup completes.",
1394    )
1395    recordName: Optional[str] = Field(
1396        default=None, alias="record_name", description="Route53 record name."
1397    )

1398
1399

[docs]
1400class InferenceEndpointConfigStatus(BaseModel):
1401    model_config = ConfigDict(extra="forbid", populate_by_name=True)
1402
1403    """ModelDeploymentStatus defines the observed state of ModelDeployment"""
1404
1405
1406    conditions: Optional[List[Conditions]] = Field(
1407        default=None,
1408        description="Detailed conditions representing the state of the deployment",
1409    )
1410    deploymentStatus: Optional[DeploymentStatus] = Field(
1411        default=None,
1412        alias="deployment_status",
1413        description="Details of the native kubernetes deployment that hosts the model",
1414    )
1415    dnsStatus: Optional[DnsStatus] = Field(
1416        default=None,
1417        alias="dns_status",
1418        description="Status of the operator-managed Route53 DNS record",
1419    )
1420    endpoints: Optional[Endpoints] = Field(
1421        default=None,
1422        description="EndpointStatus contains the status of SageMaker endpoints",
1423    )
1424    metricsStatus: Optional[MetricsStatus] = Field(
1425        default=None, alias="metrics_status", description="Status of metrics collection"
1426    )
1427    observedGeneration: Optional[int] = Field(
1428        default=None,
1429        alias="observed_generation",
1430        description="Latest generation reconciled by controller",
1431    )
1432    replicas: Optional[int] = Field(
1433        default=None, description="The observed number of inference server replicas."
1434    )
1435    selector: Optional[str] = Field(
1436        default=None, description="LabelSelector for the deployment."
1437    )
1438    state: Optional[
1439        Literal[
1440            "DeploymentPending",
1441            "DeploymentInProgress",
1442            "DeploymentFailed",
1443            "DeploymentComplete",
1444            "DeletionPending",
1445            "DeletionInProgress",
1446            "DeletionFailed",
1447            "DeletionComplete",
1448        ]
1449    ] = Field(default=None, description="Current phase of the model deployment")
1450    tlsCertificate: Optional[TlsCertificate] = Field(
1451        default=None,
1452        alias="tls_certificate",
1453        description="CertificateStatus represents the status of TLS certificates",
1454    )