Source code for sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config

   1from pydantic import BaseModel, ConfigDict, Field
   2from typing import Optional, List, Literal
   3
   4

[docs]
   5class Dimensions(BaseModel):
   6    model_config = ConfigDict(extra="forbid", populate_by_name=True)
   7
   8    name: str = Field(description="CloudWatch Metric dimension name")
   9    value: str = Field(description="CloudWatch Metric dimension value")

  10
  11

[docs]
  12class CloudWatchTrigger(BaseModel):
  13    model_config = ConfigDict(extra="forbid", populate_by_name=True)
  14
  15    """CloudWatch metric trigger to use for autoscaling"""
  16
  17
  18    activationTargetValue: Optional[float] = Field(
  19        default=0,
  20        alias="activation_target_value",
  21        description="Activation Value for CloudWatch metric to scale from 0 to 1. Only applicable if minReplicaCount = 0",
  22    )
  23    dimensions: Optional[List[Dimensions]] = Field(
  24        default=None, description="Dimensions for Cloudwatch metrics"
  25    )
  26    metricCollectionPeriod: Optional[int] = Field(
  27        default=300,
  28        alias="metric_collection_period",
  29        description="Defines the Period for CloudWatch query",
  30    )
  31    metricCollectionStartTime: Optional[int] = Field(
  32        default=300,
  33        alias="metric_collection_start_time",
  34        description="Defines the StartTime for CloudWatch query",
  35    )
  36    metricName: Optional[str] = Field(
  37        default=None,
  38        alias="metric_name",
  39        description="Metric name to query for Cloudwatch trigger",
  40    )
  41    metricStat: Optional[str] = Field(
  42        default="Average",
  43        alias="metric_stat",
  44        description="Statistics metric to be used by Trigger. Used to define Stat for CloudWatch query. Default is Average.",
  45    )
  46    metricType: Optional[Literal["Value", "Average"]] = Field(
  47        default="Average",
  48        alias="metric_type",
  49        description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value",
  50    )
  51    minValue: Optional[float] = Field(
  52        default=0,
  53        alias="min_value",
  54        description="Minimum metric value used in case of empty response from CloudWatch. Default is 0.",
  55    )
  56    name: Optional[str] = Field(
  57        default=None, description="Name for the CloudWatch trigger"
  58    )
  59    namespace: Optional[str] = Field(
  60        default=None, description="AWS CloudWatch namespace for metric"
  61    )
  62    targetValue: Optional[float] = Field(
  63        default=None,
  64        alias="target_value",
  65        description="TargetValue for CloudWatch metric",
  66    )
  67    useCachedMetrics: Optional[bool] = Field(
  68        default=True,
  69        alias="use_cached_metrics",
  70        description="Enable caching of metric values during polling interval. Default is true",
  71    )

  72
  73

[docs]
  74class PrometheusTrigger(BaseModel):
  75    model_config = ConfigDict(extra="forbid", populate_by_name=True)
  76
  77    """Prometheus metric trigger to use for autoscaling"""
  78
  79
  80    activationTargetValue: Optional[float] = Field(
  81        default=0,
  82        alias="activation_target_value",
  83        description="Activation Value for Prometheus metric to scale from 0 to 1. Only applicable if minReplicaCount = 0",
  84    )
  85    customHeaders: Optional[str] = Field(
  86        default=None,
  87        alias="custom_headers",
  88        description="Custom headers to include while querying the prometheus endpoint.",
  89    )
  90    metricType: Optional[Literal["Value", "Average"]] = Field(
  91        default="Average",
  92        alias="metric_type",
  93        description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value",
  94    )
  95    name: Optional[str] = Field(
  96        default=None, description="Name for the Prometheus trigger"
  97    )
  98    namespace: Optional[str] = Field(
  99        default=None, description="Namespace for namespaced queries"
 100    )
 101    query: Optional[str] = Field(
 102        default=None, description="PromQLQuery for the metric."
 103    )
 104    serverAddress: Optional[str] = Field(
 105        default=None,
 106        alias="server_address",
 107        description="Server address for AMP workspace",
 108    )
 109    targetValue: Optional[float] = Field(
 110        default=None,
 111        alias="target_value",
 112        description="Target metric value for scaling",
 113    )
 114    useCachedMetrics: Optional[bool] = Field(
 115        default=True,
 116        alias="use_cached_metrics",
 117        description="Enable caching of metric values during polling interval. Default is true",
 118    )

 119
 120

[docs]
 121class CloudWatchTriggerList(BaseModel):
 122    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 123
 124    activationTargetValue: Optional[float] = Field(
 125        default=0,
 126        alias="activation_target_value",
 127        description="Activation Value for CloudWatch metric to scale from 0 to 1. Only applicable if minReplicaCount = 0",
 128    )
 129    dimensions: Optional[List[Dimensions]] = Field(
 130        default=None, description="Dimensions for Cloudwatch metrics"
 131    )
 132    metricCollectionPeriod: Optional[int] = Field(
 133        default=300,
 134        alias="metric_collection_period",
 135        description="Defines the Period for CloudWatch query",
 136    )
 137    metricCollectionStartTime: Optional[int] = Field(
 138        default=300,
 139        alias="metric_collection_start_time",
 140        description="Defines the StartTime for CloudWatch query",
 141    )
 142    metricName: Optional[str] = Field(
 143        default=None,
 144        alias="metric_name",
 145        description="Metric name to query for Cloudwatch trigger",
 146    )
 147    metricStat: Optional[str] = Field(
 148        default="Average",
 149        alias="metric_stat",
 150        description="Statistics metric to be used by Trigger. Used to define Stat for CloudWatch query. Default is Average.",
 151    )
 152    metricType: Optional[Literal["Value", "Average"]] = Field(
 153        default="Average",
 154        alias="metric_type",
 155        description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value",
 156    )
 157    minValue: Optional[float] = Field(
 158        default=0,
 159        alias="min_value",
 160        description="Minimum metric value used in case of empty response from CloudWatch. Default is 0.",
 161    )
 162    name: Optional[str] = Field(
 163        default=None, description="Name for the CloudWatch trigger"
 164    )
 165    namespace: Optional[str] = Field(
 166        default=None, description="AWS CloudWatch namespace for metric"
 167    )
 168    targetValue: Optional[float] = Field(
 169        default=None,
 170        alias="target_value",
 171        description="TargetValue for CloudWatch metric",
 172    )
 173    useCachedMetrics: Optional[bool] = Field(
 174        default=True,
 175        alias="use_cached_metrics",
 176        description="Enable caching of metric values during polling interval. Default is true",
 177    )

 178
 179

[docs]
 180class PrometheusTriggerList(BaseModel):
 181    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 182
 183    activationTargetValue: Optional[float] = Field(
 184        default=0,
 185        alias="activation_target_value",
 186        description="Activation Value for Prometheus metric to scale from 0 to 1. Only applicable if minReplicaCount = 0",
 187    )
 188    customHeaders: Optional[str] = Field(
 189        default=None,
 190        alias="custom_headers",
 191        description="Custom headers to include while querying the prometheus endpoint.",
 192    )
 193    metricType: Optional[Literal["Value", "Average"]] = Field(
 194        default="Average",
 195        alias="metric_type",
 196        description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value",
 197    )
 198    name: Optional[str] = Field(
 199        default=None, description="Name for the Prometheus trigger"
 200    )
 201    namespace: Optional[str] = Field(
 202        default=None, description="Namespace for namespaced queries"
 203    )
 204    query: Optional[str] = Field(
 205        default=None, description="PromQLQuery for the metric."
 206    )
 207    serverAddress: Optional[str] = Field(
 208        default=None,
 209        alias="server_address",
 210        description="Server address for AMP workspace",
 211    )
 212    targetValue: Optional[float] = Field(
 213        default=None,
 214        alias="target_value",
 215        description="Target metric value for scaling",
 216    )
 217    useCachedMetrics: Optional[bool] = Field(
 218        default=True,
 219        alias="use_cached_metrics",
 220        description="Enable caching of metric values during polling interval. Default is true",
 221    )

 222
 223

[docs]
 224class AutoScalingSpec(BaseModel):
 225    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 226
 227    cloudWatchTrigger: Optional[CloudWatchTrigger] = Field(
 228        default=None,
 229        alias="cloud_watch_trigger",
 230        description="CloudWatch metric trigger to use for autoscaling",
 231    )
 232    cloudWatchTriggerList: Optional[List[CloudWatchTriggerList]] = Field(
 233        default=None,
 234        alias="cloud_watch_trigger_list",
 235        description="Multiple CloudWatch metric triggers to use for autoscaling. Takes priority over CloudWatchTrigger if both are provided.",
 236    )
 237    cooldownPeriod: Optional[int] = Field(
 238        default=300,
 239        alias="cooldown_period",
 240        description="The period to wait after the last trigger reported active before scaling the resource back to 0. Default 300 seconds.",
 241    )
 242    initialCooldownPeriod: Optional[int] = Field(
 243        default=300,
 244        alias="initial_cooldown_period",
 245        description="The delay before the cooldownPeriod starts after the initial creation of the ScaledObject. Default 300 seconds.",
 246    )
 247    maxReplicaCount: Optional[int] = Field(
 248        default=5,
 249        alias="max_replica_count",
 250        description="The maximum number of model pods to scale to. Default 5.",
 251    )
 252    minReplicaCount: Optional[int] = Field(
 253        default=1,
 254        alias="min_replica_count",
 255        description="The minimum number of model pods to scale down to. Default 1.",
 256    )
 257    pollingInterval: Optional[int] = Field(
 258        default=30,
 259        alias="polling_interval",
 260        description="This is the interval to check each trigger on. Default 30 seconds.",
 261    )
 262    prometheusTrigger: Optional[PrometheusTrigger] = Field(
 263        default=None,
 264        alias="prometheus_trigger",
 265        description="Prometheus metric trigger to use for autoscaling",
 266    )
 267    prometheusTriggerList: Optional[List[PrometheusTriggerList]] = Field(
 268        default=None,
 269        alias="prometheus_trigger_list",
 270        description="Multiple Prometheus metric triggers to use for autoscaling. Takes priority over PrometheusTrigger if both are provided.",
 271    )
 272    scaleDownStabilizationTime: Optional[int] = Field(
 273        default=300,
 274        alias="scale_down_stabilization_time",
 275        description="The time window to stabilize for HPA before scaling down. Default 300 seconds.",
 276    )
 277    scaleUpStabilizationTime: Optional[int] = Field(
 278        default=0,
 279        alias="scale_up_stabilization_time",
 280        description="The time window to stabilize for HPA before scaling up. Default 0 seconds.",
 281    )

 282
 283

[docs]
 284class EnvironmentVariables(BaseModel):
 285    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 286
 287    name: str
 288    value: str

 289
 290

[docs]
 291class ModelMetrics(BaseModel):
 292    """Configuration for model container metrics scraping"""
 293
 294    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 295
 296    path: Optional[str] = Field(
 297        default="/metrics", description="Path where the model exposes metrics"
 298    )
 299    port: Optional[int] = Field(
 300        default=8080,
 301        description="Port where the model exposes metrics. If not specified, a default port will be used.",
 302    )

 303
 304

[docs]
 305class Metrics(BaseModel):
 306    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 307
 308    """Configuration for metrics collection and exposure"""
 309
 310
 311    enabled: Optional[bool] = Field(
 312        default=True, description="Enable metrics collection for this model deployment"
 313    )
 314    metricsScrapeIntervalSeconds: Optional[int] = Field(
 315        default=15,
 316        alias="metrics_scrape_interval_seconds",
 317        description="Scrape interval in seconds for metrics collection from sidecar and model container.",
 318    )
 319    modelMetrics: Optional[ModelMetrics] = Field(
 320        default=None,
 321        alias="model_metrics",
 322        description="Configuration for model container metrics scraping",
 323    )

 324
 325

[docs]
 326class AdditionalConfigs(BaseModel):
 327    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 328
 329    name: str
 330    value: str

 331
 332

[docs]
 333class Model(BaseModel):
 334    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 335
 336    acceptEula: bool = Field(
 337        default=False,
 338        alias="accept_eula",
 339        description="For models that require a Model Access Config, specify True or False to indicate whether model terms of use have been accepted.",
 340    )
 341    additionalConfigs: Optional[List[AdditionalConfigs]] = Field(
 342        default=None, alias="additional_configs"
 343    )
 344    gatedModelDownloadRole: Optional[str] = Field(
 345        default=None,
 346        alias="gated_model_download_role",
 347        description="The Amazon Resource Name (ARN) of an IAM role that will be used to download gated model",
 348    )
 349    modelHubName: Optional[str] = Field(
 350        default="SageMakerPublicHub",
 351        alias="model_hub_name",
 352        description="The name of the model hub content. Can be an ARN or a simple name.",
 353    )
 354    modelId: str = Field(
 355        alias="model_id",
 356        description="The unique identifier of the model within the specified hub (hubContentArn).",
 357    )
 358    modelVersion: Optional[str] = Field(
 359        default=None,
 360        alias="model_version",
 361        description="The version of the model to deploy, in semantic versioning format (e.g., 1.0.0).",
 362    )

 363
 364

[docs]
 365class SageMakerEndpoint(BaseModel):
 366    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 367
 368    name: Optional[str] = Field(
 369        default="",
 370        description="Name of a SageMaker endpoint to be created for this JumpStartModel. The default value of empty string, when used, will skip endpoint creation.",
 371    )

 372
 373

[docs]
 374class Validations(BaseModel):
 375    model_config = ConfigDict(extra='forbid')
 376
 377    acceleratorPartitionValidation: Optional[bool] = Field(
 378        default=True, 
 379        alias="accelerator_partition_validation", 
 380        description="Enable MIG validation for GPU partitioning. Default is true."
 381    )

 382
 383

[docs]
 384class Server(BaseModel):
 385    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 386
 387    executionRole: Optional[str] = Field(
 388        default=None,
 389        alias="execution_role",
 390        description="The Amazon Resource Name (ARN) of an IAM role that will be used to deploy and manage the inference server",
 391    )
 392    instanceType: str = Field(
 393        alias="instance_type",
 394        description="The EC2 instance type to use for the inference server. Must be one of the supported types.",
 395    )
 396
 397    acceleratorPartitionType: Optional[str] = Field(
 398        default=None, 
 399        alias="accelerator_partition_type", 
 400        description="MIG profile to use for GPU partitioning"
 401    )
 402    
 403    validations: Optional[Validations] = Field(
 404        default=None, 
 405        description="Validations configuration for the server"
 406    )

 407
 408

[docs]
 409class IntelligentRoutingSpec(BaseModel):
 410    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 411
 412    """Configuration for intelligent routing"""
 413
 414
 415    autoScalingSpec: Optional[AutoScalingSpec] = Field(
 416        default=None, alias="auto_scaling_spec"
 417    )
 418    enabled: Optional[bool] = Field(
 419        default=False, description="Once set, the enabled field cannot be modified"
 420    )
 421    routingStrategy: Optional[
 422        Literal["prefixaware", "kvaware", "session", "roundrobin"]
 423    ] = Field(default="prefixaware", alias="routing_strategy")

 424
 425

[docs]
 426class L2CacheSpec(BaseModel):
 427    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 428
 429    l2CacheBackend: Optional[str] = Field(
 430        default=None, alias="l2_cache_backend"
 431    )
 432    l2CacheLocalUrl: Optional[str] = Field(
 433        default=None, alias="l2_cache_local_url"
 434    )

 435
 436

[docs]
 437class KvCacheSpec(BaseModel):
 438    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 439
 440    cacheConfigFile: Optional[str] = Field(
 441        default=None, alias="cache_config_file"
 442    )
 443    enableL1Cache: Optional[bool] = Field(
 444        default=True, alias="enable_l1_cache"
 445    )
 446    enableL2Cache: Optional[bool] = Field(
 447        default=False, alias="enable_l2_cache"
 448    )
 449    l2CacheSpec: Optional[L2CacheSpec] = Field(
 450        default=None, alias="l2_cache_spec"
 451    )

 452
 453

[docs]
 454class LoadBalancer(BaseModel):
 455    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 456
 457    healthCheckPath: Optional[str] = Field(
 458        default="/ping", alias="health_check_path"
 459    )
 460    routingAlgorithm: Optional[Literal["least_outstanding_requests", "round_robin"]] = (
 461        Field(default="least_outstanding_requests", alias="routing_algorithm")
 462    )

 463
 464

[docs]
 465class CustomCertificateConfig(BaseModel):
 466    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 467
 468    acmArn: str = Field(alias="acm_arn", description="ACM certificate ARN")
 469    domainName: str = Field(alias="domain_name")

 470
 471

[docs]
 472class TlsConfig(BaseModel):
 473    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 474
 475    customCertificateConfig: Optional[CustomCertificateConfig] = Field(
 476        default=None, alias="custom_certificate_config"
 477    )
 478    tlsCertificateOutputS3Uri: Optional[str] = Field(
 479        default=None, alias="tls_certificate_output_s3_uri"
 480    )

 481
 482

[docs]
 483class CaptureContentTypeHeader(BaseModel):
 484    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 485
 486    """Configuration for how to treat different content type headers during capture"""
 487
 488
 489    csvContentTypes: Optional[List[str]] = Field(
 490        default=None,
 491        alias="csv_content_types",
 492        description="List of content type headers to treat as CSV",
 493    )
 494    jsonContentTypes: Optional[List[str]] = Field(
 495        default=None,
 496        alias="json_content_types",
 497        description="List of content type headers to treat as JSON",
 498    )

 499
 500

[docs]
 501class CaptureOptions(BaseModel):
 502    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 503
 504    """CaptureOption defines what data to capture (input, output, or both)."""
 505
 506
 507    captureMode: Literal["Input", "Output"] = Field(
 508        alias="capture_mode", description="Capture mode: Input or Output"
 509    )

 510
 511

[docs]
 512class BufferConfig(BaseModel):
 513    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 514
 515    """Configuration for buffering and flushing captured data"""
 516
 517
 518    batchSize: Optional[int] = Field(
 519        default=10,
 520        alias="batch_size",
 521        description="Number of records to batch before writing to S3",
 522    )
 523    flushIntervalSeconds: Optional[int] = Field(
 524        default=60,
 525        alias="flush_interval_seconds",
 526        description="Flush interval in seconds",
 527    )

 528
 529

[docs]
 530class PayloadConfig(BaseModel):
 531    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 532
 533    """Configuration for payload size limits"""
 534
 535
 536    maxPayloadSizeKB: Optional[int] = Field(
 537        default=0,
 538        alias="max_payload_size_kb",
 539        description="Maximum payload size in KB to capture. 0 means no limit.",
 540    )

 541
 542

[docs]
 543class DataCaptureModelPod(BaseModel):
 544    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 545
 546    """Configuration for Model Pod level data capture (Tier 3)"""
 547
 548
 549    bufferConfig: Optional[BufferConfig] = Field(
 550        default=None, alias="buffer_config",
 551    )
 552    captureContentTypeHeader: Optional[CaptureContentTypeHeader] = Field(
 553        default=None, alias="capture_content_type_header",
 554    )
 555    captureOptions: Optional[List[CaptureOptions]] = Field(
 556        default=None, alias="capture_options",
 557    )
 558    enabled: bool = Field(description="Enable or disable model pod data capture")
 559    initialSamplingPercentage: Optional[int] = Field(
 560        default=None, alias="initial_sampling_percentage",
 561    )
 562    kmsKeyId: Optional[str] = Field(default=None, alias="kms_key_id")
 563    payloadConfig: Optional[PayloadConfig] = Field(
 564        default=None, alias="payload_config",
 565    )

 566
 567

[docs]
 568class DataCaptureSagemakerEndpoint(BaseModel):
 569    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 570
 571    """Configuration for SageMaker Endpoint level data capture (Tier 1)"""
 572
 573
 574    captureContentTypeHeader: Optional[CaptureContentTypeHeader] = Field(
 575        default=None, alias="capture_content_type_header",
 576    )
 577    captureOptions: Optional[List[CaptureOptions]] = Field(
 578        default=None, alias="capture_options",
 579    )
 580    enabled: bool = Field(description="Enable or disable SageMaker endpoint data capture")
 581    initialSamplingPercentage: Optional[int] = Field(
 582        default=None, alias="initial_sampling_percentage",
 583    )
 584    kmsKeyId: Optional[str] = Field(default=None, alias="kms_key_id")

 585
 586

[docs]
 587class DataCaptureLoadBalancer(BaseModel):
 588    """Configuration for LoadBalancer level data capture (Tier 2)"""
 589
 590    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 591
 592    enabled: bool = Field(description="Enable or disable load balancer access logs")

 593
 594

[docs]
 595class DataCapture(BaseModel):
 596    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 597
 598    """Configuration for data capture across multiple tiers (SageMaker, LoadBalancer, Model Pod)"""
 599
 600
 601    loadBalancer: Optional[DataCaptureLoadBalancer] = Field(
 602        default=None, alias="load_balancer",
 603    )
 604    modelPod: Optional[DataCaptureModelPod] = Field(
 605        default=None, alias="model_pod",
 606    )
 607    s3Uri: Optional[str] = Field(default=None, alias="s3_uri")
 608    sagemakerEndpoint: Optional[DataCaptureSagemakerEndpoint] = Field(
 609        default=None, alias="sagemaker_endpoint",
 610    )

 611
 612

[docs]
 613class DnsConfig(BaseModel):
 614    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 615
 616    """DNS automation configuration for Route53."""
 617
 618
 619    hostedZoneId: str = Field(
 620        alias="hosted_zone_id",
 621        description="Route53 Hosted Zone ID where the DNS record will be created.",
 622    )

 623
 624
 625class _HPJumpStartEndpoint(BaseModel):
 626    """Config defines the desired state of JumpStartModel."""
 627
 628    model_config = ConfigDict(extra="ignore", populate_by_name=True)
 629
 630    autoScalingSpec: Optional[AutoScalingSpec] = Field(
 631        default=None, alias="auto_scaling_spec"
 632    )
 633    dataCapture: Optional[DataCapture] = Field(
 634        default=None,
 635        alias="data_capture",
 636        description="Configuration for data capture across multiple tiers (SageMaker, LoadBalancer, Model Pod)",
 637    )
 638    dnsConfig: Optional[DnsConfig] = Field(
 639        default=None,
 640        alias="dns_config",
 641        description="DNS automation configuration for Route53. Requires tlsConfig.customCertificateConfig to be set.",
 642    )
 643    environmentVariables: Optional[List[EnvironmentVariables]] = Field(
 644        default=None,
 645        alias="environment_variables",
 646        description="Additional environment variables to be passed to the inference server. Limited to 100 key-value pairs.",
 647    )
 648    maxDeployTimeInSeconds: Optional[int] = Field(
 649        default=3600,
 650        alias="max_deploy_time_in_seconds",
 651        description="Maximum allowed time in seconds for the deployment to complete before timing out. Defaults to 1 hour (3600 seconds)",
 652    )
 653    intelligentRoutingSpec: Optional[IntelligentRoutingSpec] = Field(
 654        default=None,
 655        alias="intelligent_routing_spec",
 656        description="Configuration for intelligent routing",
 657    )
 658    kvCacheSpec: Optional[KvCacheSpec] = Field(
 659        default=None,
 660        alias="kv_cache_spec",
 661        description="Configuration for KV Cache specification",
 662    )
 663    loadBalancer: Optional[LoadBalancer] = Field(
 664        default=None,
 665        alias="load_balancer",
 666        description="Configuration for Application Load Balancer",
 667    )
 668    metrics: Optional[Metrics] = Field(
 669        default=None, description="Configuration for metrics collection and exposure"
 670    )
 671    model: Model
 672    replicas: Optional[int] = Field(
 673        default=1,
 674        description="The desired number of inference server replicas. Default 1.",
 675    )
 676    sageMakerEndpoint: Optional[SageMakerEndpoint] = Field(
 677        default=None, alias="sage_maker_endpoint"
 678    )
 679    server: Server
 680    tlsConfig: Optional[TlsConfig] = Field(default=None, alias="tls_config")
 681
 682

[docs]
 683class Conditions(BaseModel):
 684    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 685
 686    """DeploymentCondition describes the state of a deployment at a certain point."""
 687
 688
 689    lastTransitionTime: Optional[str] = Field(
 690        default=None,
 691        alias="last_transition_time",
 692        description="Last time the condition transitioned from one status to another.",
 693    )
 694    lastUpdateTime: Optional[str] = Field(
 695        default=None,
 696        alias="last_update_time",
 697        description="The last time this condition was updated.",
 698    )
 699    message: Optional[str] = Field(
 700        default=None,
 701        description="A human readable message indicating details about the transition.",
 702    )
 703    reason: Optional[str] = Field(
 704        default=None, description="The reason for the condition's last transition."
 705    )
 706    status: str = Field(
 707        description="Status of the condition, one of True, False, Unknown."
 708    )
 709    type: str = Field(description="Type of deployment condition.")
 710    observedGeneration: Optional[int] = Field(
 711        default=None,
 712        alias="observed_generation",
 713        description="observedGeneration represents the .metadata.generation that the condition was set based upon. For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date with respect to the current state of the instance.",
 714    )

 715
 716

[docs]
 717class Status(BaseModel):
 718    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 719
 720    """Status of the Deployment Object"""
 721
 722
 723    availableReplicas: Optional[int] = Field(
 724        default=None,
 725        alias="available_replicas",
 726        description="Total number of available pods (ready for at least minReadySeconds) targeted by this deployment.",
 727    )
 728    collisionCount: Optional[int] = Field(
 729        default=None,
 730        alias="collision_count",
 731        description="Count of hash collisions for the Deployment. The Deployment controller uses this field as a collision avoidance mechanism when it needs to create the name for the newest ReplicaSet.",
 732    )
 733    conditions: Optional[List[Conditions]] = Field(
 734        default=None,
 735        description="Represents the latest available observations of a deployment's current state.",
 736    )
 737    observedGeneration: Optional[int] = Field(
 738        default=None,
 739        alias="observed_generation",
 740        description="The generation observed by the deployment controller.",
 741    )
 742    readyReplicas: Optional[int] = Field(
 743        default=None,
 744        alias="ready_replicas",
 745        description="readyReplicas is the number of pods targeted by this Deployment with a Ready Condition.",
 746    )
 747    replicas: Optional[int] = Field(
 748        default=None,
 749        description="Total number of non-terminated pods targeted by this deployment (their labels match the selector).",
 750    )
 751    terminatingReplicas: Optional[int] = Field(
 752        default=None,
 753        alias="terminating_replicas",
 754        description="Total number of terminating pods targeted by this deployment.",
 755    )
 756    unavailableReplicas: Optional[int] = Field(
 757        default=None,
 758        alias="unavailable_replicas",
 759        description="Total number of unavailable pods targeted by this deployment. This is the total number of pods that are still required for the deployment to have 100% available capacity. They may either be pods that are running but not yet available or pods that still have not been created.",
 760    )
 761    updatedReplicas: Optional[int] = Field(
 762        default=None,
 763        alias="updated_replicas",
 764        description="Total number of non-terminated pods targeted by this deployment that have the desired template spec.",
 765    )

 766
 767

[docs]
 768class DeploymentStatus(BaseModel):
 769    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 770
 771    """Details of the native kubernetes deployment that hosts the model"""
 772
 773
 774    deploymentObjectOverallState: Optional[str] = Field(
 775        default=None,
 776        alias="deployment_object_overall_state",
 777        description="Overall State of the Deployment Object",
 778    )
 779    lastUpdated: str = Field(alias="last_updated", description="Last Update Time")
 780    message: Optional[str] = Field(
 781        default=None,
 782        description="Message populated in the root CRD while updating the status of underlying Deployment",
 783    )
 784    name: str = Field(description="Name of the Deployment Object")
 785    reason: Optional[str] = Field(
 786        default=None,
 787        description="Reason populated in the root CRD while updating the status of underlying Deployment",
 788    )
 789    status: Optional[Status] = Field(
 790        default=None, description="Status of the Deployment Object"
 791    )

 792
 793

[docs]
 794class Sagemaker(BaseModel):
 795    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 796
 797    """Status of the SageMaker endpoint"""
 798
 799
 800    configArn: Optional[str] = Field(
 801        default=None,
 802        alias="config_arn",
 803        description="The Amazon Resource Name (ARN) of the endpoint configuration.",
 804    )
 805    endpointArn: Optional[str] = Field(
 806        default=None,
 807        alias="endpoint_arn",
 808        description="The Amazon Resource Name (ARN) of the SageMaker endpoint",
 809    )
 810    modelArn: Optional[str] = Field(
 811        default=None,
 812        alias="model_arn",
 813        description="The ARN of the model created in SageMaker.",
 814    )
 815    state: str = Field(description="The current state of the SageMaker endpoint")

 816
 817

[docs]
 818class Endpoints(BaseModel):
 819    """EndpointStatus contains the status of SageMaker endpoints"""
 820
 821    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 822
 823    sagemaker: Optional[Sagemaker] = Field(
 824        default=None, description="Status of the SageMaker endpoint"
 825    )

 826
 827

[docs]
 828class ModelMetricsStatus(BaseModel):
 829    """Status of model container metrics collection"""
 830
 831    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 832
 833    path: Optional[str] = Field(
 834        default=None, description="The path where metrics are available"
 835    )
 836    port: Optional[int] = Field(
 837        default=None, description="The port on which metrics are exposed"
 838    )

 839
 840

[docs]
 841class MetricsStatus(BaseModel):
 842    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 843
 844    """Status of metrics collection"""
 845
 846
 847    enabled: bool = Field(description="Whether metrics collection is enabled")
 848    errorMessage: Optional[str] = Field(
 849        default=None,
 850        alias="error_message",
 851        description="Error message if metrics collection is in error state",
 852    )
 853    metricsScrapeIntervalSeconds: Optional[int] = Field(
 854        default=None,
 855        alias="metrics_scrape_interval_seconds",
 856        description="Scrape interval in seconds for metrics collection from sidecar and model container.",
 857    )
 858    modelMetrics: Optional[ModelMetricsStatus] = Field(
 859        default=None,
 860        alias="model_metrics",
 861        description="Status of model container metrics collection",
 862    )
 863    state: Optional[str] = Field(
 864        default=None, description="Current state of metrics collection"
 865    )

 866
 867

[docs]
 868class TlsCertificate(BaseModel):
 869    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 870
 871    """CertificateStatus represents the status of TLS certificates"""
 872
 873
 874    certificateARN: Optional[str] = Field(
 875        default=None,
 876        alias="certificate_arn",
 877        description="The Amazon Resource Name (ARN) of the ACM certificate",
 878    )
 879    certificateDomainNames: Optional[List[str]] = Field(
 880        default=None,
 881        alias="certificate_domain_names",
 882        description="The certificate domain names that is attached to the certificate",
 883    )
 884    certificateHealth: Optional[Literal["Valid", "Expiring", "Expired"]] = Field(
 885        default=None,
 886        alias="certificate_health",
 887        description="Certificate health status",
 888    )
 889    certificateName: Optional[str] = Field(
 890        default=None,
 891        alias="certificate_name",
 892        description="The certificate name of cert manager",
 893    )
 894    importedCertificates: Optional[List[str]] = Field(
 895        default=None,
 896        alias="imported_certificates",
 897        description="Used for tracking the imported certificates to ACM",
 898    )
 899    issuerName: Optional[str] = Field(
 900        default=None, alias="issuer_name", description="The issuer name of cert manager"
 901    )
 902    lastCertExpiryTime: Optional[str] = Field(
 903        default=None,
 904        alias="last_cert_expiry_time",
 905        description="The last certificate expiry time",
 906    )
 907    tlsCertificateOutputS3Bucket: Optional[str] = Field(
 908        default=None,
 909        alias="tls_certificate_output_s3_bucket",
 910        description="S3 bucket that stores the certificate that needs to be trusted",
 911    )
 912    tlsCertificateS3Keys: Optional[List[str]] = Field(
 913        default=None,
 914        alias="tls_certificate_s3_keys",
 915        description="The output tls certificate S3 key that points to the .pem file",
 916    )

 917
 918

[docs]
 919class DataCaptureModelPodStatus(BaseModel):
 920    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 921
 922    """Health status of the model pod data capture tier"""
 923
 924
 925    lastTransitionTime: Optional[str] = Field(
 926        default=None,
 927        alias="last_transition_time",
 928        description="Time of the last health state transition",
 929    )
 930    message: Optional[str] = Field(
 931        default=None,
 932        description="Human-readable message describing the health state",
 933    )
 934    reason: Optional[str] = Field(
 935        default=None,
 936        description="Reason for unhealthy status (e.g., OOMKilled, S3UploadFailure, MultipleContainerRestarts)",
 937    )
 938    status: Literal["Healthy", "Unhealthy"] = Field(
 939        description="Current health status"
 940    )

 941
 942

[docs]
 943class DataCaptureStatus(BaseModel):
 944    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 945
 946    """Health status of the data capture pipeline"""
 947
 948
 949    modelPod: Optional[DataCaptureModelPodStatus] = Field(
 950        default=None,
 951        alias="model_pod",
 952        description="Health status of the model pod data capture tier",
 953    )

 954
 955

[docs]
 956class DnsStatus(BaseModel):
 957    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 958
 959    """Status of the operator-managed Route53 DNS record"""
 960
 961
 962    dnsHealth: Optional[Literal["Active", "Pending", "Error"]] = Field(
 963        default=None, alias="dns_health",
 964        description="DNS resolution status: Active, Pending, or Error.",
 965    )
 966    hostedZoneId: Optional[str] = Field(
 967        default=None, alias="hosted_zone_id",
 968        description="Route53 hosted zone ID.",
 969    )
 970    lastTransitionTime: Optional[str] = Field(
 971        default=None, alias="last_transition_time",
 972        description="When the status last transitioned, used for propagation timeout.",
 973    )
 974    managedByOperator: bool = Field(
 975        alias="managed_by_operator",
 976        description="Whether the operator manages this DNS record.",
 977    )
 978    message: Optional[str] = Field(
 979        default=None, description="Human-readable status or error message."
 980    )
 981    previousHostedZoneId: Optional[str] = Field(
 982        default=None, alias="previous_hosted_zone_id",
 983    )
 984    previousRecordName: Optional[str] = Field(
 985        default=None, alias="previous_record_name",
 986    )
 987    recordName: Optional[str] = Field(
 988        default=None, alias="record_name", description="Route53 record name."
 989    )

 990
 991

[docs]
 992class JumpStartModelStatus(BaseModel):
 993    model_config = ConfigDict(extra="forbid", populate_by_name=True)
 994
 995    """ModelDeploymentStatus defines the observed state of ModelDeployment"""
 996
 997
 998    conditions: Optional[List[Conditions]] = Field(
 999        default=None,
1000        description="Detailed conditions representing the state of the deployment",
1001    )
1002    dataCaptureStatus: Optional[DataCaptureStatus] = Field(
1003        default=None,
1004        alias="data_capture_status",
1005        description="Health status of the data capture pipeline",
1006    )
1007    deploymentStatus: Optional[DeploymentStatus] = Field(
1008        default=None,
1009        alias="deployment_status",
1010        description="Details of the native kubernetes deployment that hosts the model",
1011    )
1012    dnsStatus: Optional[DnsStatus] = Field(
1013        default=None,
1014        alias="dns_status",
1015        description="Status of the operator-managed Route53 DNS record",
1016    )
1017    endpoints: Optional[Endpoints] = Field(
1018        default=None,
1019        description="EndpointStatus contains the status of SageMaker endpoints",
1020    )
1021    metricsStatus: Optional[MetricsStatus] = Field(
1022        default=None, alias="metrics_status", description="Status of metrics collection"
1023    )
1024    observedGeneration: Optional[int] = Field(
1025        default=None,
1026        alias="observed_generation",
1027        description="Latest generation reconciled by controller",
1028    )
1029    replicas: Optional[int] = Field(
1030        default=None, description="The observed number of inference server replicas."
1031    )
1032    selector: Optional[str] = Field(
1033        default=None, description="LabelSelector for the deployment."
1034    )
1035    state: Optional[
1036        Literal[
1037            "DeploymentPending",
1038            "DeploymentInProgress",
1039            "DeploymentFailed",
1040            "DeploymentComplete",
1041            "DeletionPending",
1042            "DeletionInProgress",
1043            "DeletionFailed",
1044            "DeletionComplete",
1045        ]
1046    ] = Field(default=None, description="Current phase of the model deployment")
1047    tlsCertificate: Optional[TlsCertificate] = Field(
1048        default=None,
1049        alias="tls_certificate",
1050        description="CertificateStatus represents the status of TLS certificates",
1051    )