Source code for sagemaker.hyperpod.inference.config.hp_endpoint_config

  1from pydantic import BaseModel, ConfigDict, Field
  2from typing import Optional, List, Dict, Union, Literal
  3
  4
[docs] 5class Dimensions(BaseModel): 6 model_config = ConfigDict(extra="forbid") 7 8 name: str = Field(description="CloudWatch Metric dimension name") 9 value: str = Field(description="CloudWatch Metric dimension value")
10 11
[docs] 12class CloudWatchTrigger(BaseModel): 13 """CloudWatch metric trigger to use for autoscaling""" 14 15 model_config = ConfigDict(extra="forbid") 16 17 activationTargetValue: Optional[float] = Field( 18 default=0, 19 alias="activation_target_value", 20 description="Activation Value for CloudWatch metric to scale from 0 to 1. Only applicable if minReplicaCount = 0", 21 ) 22 dimensions: Optional[List[Dimensions]] = Field( 23 default=None, description="Dimensions for Cloudwatch metrics" 24 ) 25 metricCollectionPeriod: Optional[int] = Field( 26 default=300, 27 alias="metric_collection_period", 28 description="Defines the Period for CloudWatch query", 29 ) 30 metricCollectionStartTime: Optional[int] = Field( 31 default=300, 32 alias="metric_collection_start_time", 33 description="Defines the StartTime for CloudWatch query", 34 ) 35 metricName: Optional[str] = Field( 36 default=None, 37 alias="metric_name", 38 description="Metric name to query for Cloudwatch trigger", 39 ) 40 metricStat: Optional[str] = Field( 41 default="Average", 42 alias="metric_stat", 43 description="Statistics metric to be used by Trigger. Used to define Stat for CloudWatch query. Default is Average.", 44 ) 45 metricType: Optional[Literal["Value", "Average"]] = Field( 46 default="Average", 47 alias="metric_type", 48 description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value", 49 ) 50 minValue: Optional[float] = Field( 51 default=0, 52 alias="min_value", 53 description="Minimum metric value used in case of empty response from CloudWatch. Default is 0.", 54 ) 55 name: Optional[str] = Field( 56 default=None, description="Name for the CloudWatch trigger" 57 ) 58 namespace: Optional[str] = Field( 59 default=None, description="AWS CloudWatch namespace for metric" 60 ) 61 targetValue: Optional[float] = Field( 62 default=None, 63 alias="target_value", 64 description="TargetValue for CloudWatch metric", 65 ) 66 useCachedMetrics: Optional[bool] = Field( 67 default=True, 68 alias="use_cached_metrics", 69 description="Enable caching of metric values during polling interval. Default is true", 70 )
71 72
[docs] 73class CloudWatchTriggerList(BaseModel): 74 model_config = ConfigDict(extra="forbid") 75 76 activationTargetValue: Optional[float] = Field( 77 default=0, 78 alias="activation_target_value", 79 description="Activation Value for CloudWatch metric to scale from 0 to 1. Only applicable if minReplicaCount = 0", 80 ) 81 dimensions: Optional[List[Dimensions]] = Field( 82 default=None, description="Dimensions for Cloudwatch metrics" 83 ) 84 metricCollectionPeriod: Optional[int] = Field( 85 default=300, 86 alias="metric_collection_period", 87 description="Defines the Period for CloudWatch query", 88 ) 89 metricCollectionStartTime: Optional[int] = Field( 90 default=300, 91 alias="metric_collection_start_time", 92 description="Defines the StartTime for CloudWatch query", 93 ) 94 metricName: Optional[str] = Field( 95 default=None, 96 alias="metric_name", 97 description="Metric name to query for Cloudwatch trigger", 98 ) 99 metricStat: Optional[str] = Field( 100 default="Average", 101 alias="metric_stat", 102 description="Statistics metric to be used by Trigger. Used to define Stat for CloudWatch query. Default is Average.", 103 ) 104 metricType: Optional[Literal["Value", "Average"]] = Field( 105 default="Average", 106 alias="metric_type", 107 description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value", 108 ) 109 minValue: Optional[float] = Field( 110 default=0, 111 alias="min_value", 112 description="Minimum metric value used in case of empty response from CloudWatch. Default is 0.", 113 ) 114 name: Optional[str] = Field( 115 default=None, description="Name for the CloudWatch trigger" 116 ) 117 namespace: Optional[str] = Field( 118 default=None, description="AWS CloudWatch namespace for metric" 119 ) 120 targetValue: Optional[float] = Field( 121 default=None, 122 alias="target_value", 123 description="TargetValue for CloudWatch metric", 124 ) 125 useCachedMetrics: Optional[bool] = Field( 126 default=True, 127 alias="use_cached_metrics", 128 description="Enable caching of metric values during polling interval. Default is true", 129 )
130 131
[docs] 132class PrometheusTrigger(BaseModel): 133 """Prometheus metric trigger to use for autoscaling""" 134 135 model_config = ConfigDict(extra="forbid") 136 137 activationTargetValue: Optional[float] = Field( 138 default=0, 139 alias="activation_target_value", 140 description="Activation Value for Prometheus metric to scale from 0 to 1. Only applicable if minReplicaCount = 0", 141 ) 142 customHeaders: Optional[str] = Field( 143 default=None, 144 alias="custom_headers", 145 description="Custom headers to include while querying the prometheus endpoint.", 146 ) 147 metricType: Optional[Literal["Value", "Average"]] = Field( 148 default="Average", 149 alias="metric_type", 150 description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value", 151 ) 152 name: Optional[str] = Field( 153 default=None, description="Name for the Prometheus trigger" 154 ) 155 namespace: Optional[str] = Field( 156 default=None, description="Namespace for namespaced queries" 157 ) 158 query: Optional[str] = Field( 159 default=None, description="PromQLQuery for the metric." 160 ) 161 serverAddress: Optional[str] = Field( 162 default=None, 163 alias="server_address", 164 description="Server address for AMP workspace", 165 ) 166 targetValue: Optional[float] = Field( 167 default=None, 168 alias="target_value", 169 description="Target metric value for scaling", 170 ) 171 useCachedMetrics: Optional[bool] = Field( 172 default=True, 173 alias="use_cached_metrics", 174 description="Enable caching of metric values during polling interval. Default is true", 175 )
176 177
[docs] 178class PrometheusTriggerList(BaseModel): 179 model_config = ConfigDict(extra="forbid") 180 181 activationTargetValue: Optional[float] = Field( 182 default=0, 183 alias="activation_target_value", 184 description="Activation Value for Prometheus metric to scale from 0 to 1. Only applicable if minReplicaCount = 0", 185 ) 186 customHeaders: Optional[str] = Field( 187 default=None, 188 alias="custom_headers", 189 description="Custom headers to include while querying the prometheus endpoint.", 190 ) 191 metricType: Optional[Literal["Value", "Average"]] = Field( 192 default="Average", 193 alias="metric_type", 194 description="The type of metric to be used by HPA. Enum: AverageValue - Uses average value of metric per pod, Value - Uses absolute metric value", 195 ) 196 name: Optional[str] = Field( 197 default=None, description="Name for the Prometheus trigger" 198 ) 199 namespace: Optional[str] = Field( 200 default=None, description="Namespace for namespaced queries" 201 ) 202 query: Optional[str] = Field( 203 default=None, description="PromQLQuery for the metric." 204 ) 205 serverAddress: Optional[str] = Field( 206 default=None, 207 alias="server_address", 208 description="Server address for AMP workspace", 209 ) 210 targetValue: Optional[float] = Field( 211 default=None, 212 alias="target_value", 213 description="Target metric value for scaling", 214 ) 215 useCachedMetrics: Optional[bool] = Field( 216 default=True, 217 alias="use_cached_metrics", 218 description="Enable caching of metric values during polling interval. Default is true", 219 )
220 221
[docs] 222class AutoScalingSpec(BaseModel): 223 model_config = ConfigDict(extra="forbid") 224 225 cloudWatchTrigger: Optional[CloudWatchTrigger] = Field( 226 default=None, 227 alias="cloud_watch_trigger", 228 description="CloudWatch metric trigger to use for autoscaling", 229 ) 230 cloudWatchTriggerList: Optional[List[CloudWatchTriggerList]] = Field( 231 default=None, 232 alias="cloud_watch_trigger_list", 233 description="Multiple CloudWatch metric triggers to use for autoscaling. Takes priority over CloudWatchTrigger if both are provided.", 234 ) 235 cooldownPeriod: Optional[int] = Field( 236 default=300, 237 alias="cooldown_period", 238 description="The period to wait after the last trigger reported active before scaling the resource back to 0. Default 300 seconds.", 239 ) 240 initialCooldownPeriod: Optional[int] = Field( 241 default=300, 242 alias="initial_cooldown_period", 243 description="The delay before the cooldownPeriod starts after the initial creation of the ScaledObject. Default 300 seconds.", 244 ) 245 maxReplicaCount: Optional[int] = Field( 246 default=5, 247 alias="max_replica_count", 248 description="The maximum number of model pods to scale to. Default 5.", 249 ) 250 minReplicaCount: Optional[int] = Field( 251 default=1, 252 alias="min_replica_count", 253 description="The minimum number of model pods to scale down to. Default 1.", 254 ) 255 pollingInterval: Optional[int] = Field( 256 default=30, 257 alias="polling_interval", 258 description="This is the interval to check each trigger on. Default 30 seconds.", 259 ) 260 prometheusTrigger: Optional[PrometheusTrigger] = Field( 261 default=None, 262 alias="prometheus_trigger", 263 description="Prometheus metric trigger to use for autoscaling", 264 ) 265 prometheusTriggerList: Optional[List[PrometheusTriggerList]] = Field( 266 default=None, 267 alias="prometheus_trigger_list", 268 description="Multiple Prometheus metric triggers to use for autoscaling. Takes priority over PrometheusTrigger if both are provided.", 269 ) 270 scaleDownStabilizationTime: Optional[int] = Field( 271 default=300, 272 alias="scale_down_stabilization_time", 273 description="The time window to stabilize for HPA before scaling down. Default 300 seconds.", 274 ) 275 scaleUpStabilizationTime: Optional[int] = Field( 276 default=0, 277 alias="scale_up_stabilization_time", 278 description="The time window to stabilize for HPA before scaling up. Default 0 seconds.", 279 )
280 281
[docs] 282class IntelligentRoutingSpec(BaseModel): 283 """Configuration for intelligent routing This feature is currently not supported for existing deployments. Adding this configuration to an existing deployment will be rejected.""" 284 285 model_config = ConfigDict(extra="forbid") 286 287 autoScalingSpec: Optional[AutoScalingSpec] = Field( 288 default=None, alias="auto_scaling_spec" 289 ) 290 enabled: Optional[bool] = Field( 291 default=False, description="Once set, the enabled field cannot be modified" 292 ) 293 routingStrategy: Optional[ 294 Literal["prefixaware", "kvaware", "session", "roundrobin"] 295 ] = Field(default="prefixaware", alias="routing_strategy")
296 297
[docs] 298class L2CacheSpec(BaseModel): 299 """Configuration for providing L2 Cache offloading""" 300 301 model_config = ConfigDict(extra="forbid") 302 303 l2CacheBackend: Optional[str] = Field( 304 default=None, 305 alias="l2_cache_backend", 306 description="L2 cache backend type. Required when L2CacheSpec is provided.", 307 ) 308 l2CacheLocalUrl: Optional[str] = Field( 309 default=None, 310 alias="l2_cache_local_url", 311 description="Provide the L2 cache URL to local storage", 312 )
313 314
[docs] 315class KvCacheSpec(BaseModel): 316 """Configuration for KV Cache specification By default L1CacheOffloading will be enabled""" 317 318 model_config = ConfigDict(extra="forbid") 319 320 cacheConfigFile: Optional[str] = Field( 321 default=None, 322 alias="cache_config_file", 323 description="KVCache configuration file path. If specified, override other configurations provided via spec", 324 ) 325 enableL1Cache: Optional[bool] = Field( 326 default=True, alias="enable_l1_cache", description="Enable CPU offloading" 327 ) 328 enableL2Cache: Optional[bool] = Field(default=False, alias="enable_l2_cache") 329 l2CacheSpec: Optional[L2CacheSpec] = Field( 330 default=None, 331 alias="l2_cache_spec", 332 description="Configuration for providing L2 Cache offloading", 333 )
334 335
[docs] 336class LoadBalancer(BaseModel): 337 """Configuration for Application Load Balancer""" 338 339 model_config = ConfigDict(extra="forbid") 340 341 healthCheckPath: Optional[str] = Field( 342 default="/ping", 343 alias="health_check_path", 344 description="Health check path for the ALB target group. Defaults to /ping if not specified.", 345 ) 346 routingAlgorithm: Optional[Literal["least_outstanding_requests", "round_robin"]] = ( 347 Field( 348 default="least_outstanding_requests", 349 alias="routing_algorithm", 350 description="Routing algorithm for the ALB target group (least_oustanding_requests or round_robin)", 351 ) 352 )
353 354
[docs] 355class ModelMetrics(BaseModel): 356 """Configuration for model container metrics scraping""" 357 358 model_config = ConfigDict(extra="forbid") 359 360 path: Optional[str] = Field( 361 default="/metrics", description="Path where the model exposes metrics" 362 ) 363 port: Optional[int] = Field( 364 default=8080, 365 description="Port where the model exposes metrics. If not specified, a default port will be used.", 366 )
367 368
[docs] 369class Metrics(BaseModel): 370 """Configuration for metrics collection and exposure""" 371 372 model_config = ConfigDict(extra="forbid") 373 374 enabled: Optional[bool] = Field( 375 default=True, description="Enable metrics collection for this model deployment" 376 ) 377 metricsScrapeIntervalSeconds: Optional[int] = Field( 378 default=15, 379 alias="metrics_scrape_interval_seconds", 380 description="Scrape interval in seconds for metrics collection from sidecar and model container.", 381 ) 382 modelMetrics: Optional[ModelMetrics] = Field( 383 default=None, 384 alias="model_metrics", 385 description="Configuration for model container metrics scraping", 386 )
387 388
[docs] 389class FsxStorage(BaseModel): 390 model_config = ConfigDict(extra="forbid") 391 392 dnsName: Optional[str] = Field( 393 default=None, alias="dns_name", description="FSX File System DNS Name" 394 ) 395 fileSystemId: str = Field(alias="file_system_id", description="FSX File System ID") 396 mountName: Optional[str] = Field( 397 default=None, alias="mount_name", description="FSX File System Mount Name" 398 )
399 400
[docs] 401class S3Storage(BaseModel): 402 model_config = ConfigDict(extra="forbid") 403 404 bucketName: str = Field(alias="bucket_name", description="S3 bucket location") 405 region: str = Field(description="S3 bucket region")
406 407
[docs] 408class ModelSourceConfig(BaseModel): 409 model_config = ConfigDict(extra="forbid") 410 411 fsxStorage: Optional[FsxStorage] = Field(default=None, alias="fsx_storage") 412 modelLocation: Optional[str] = Field( 413 default=None, 414 alias="model_location", 415 description="Sepcific location where the model data exists", 416 ) 417 modelSourceType: Literal["fsx", "s3"] = Field(alias="model_source_type") 418 prefetchEnabled: Optional[bool] = Field( 419 default=False, 420 alias="prefetch_enabled", 421 description="In case the model seems to fit within the instance's memory (VRAM), this option can be used to pre-fetch the model to RAM and then the inference server will load to the GPU/CPU device thereafter.", 422 ) 423 s3Storage: Optional[S3Storage] = Field(default=None, alias="s3_storage")
424 425
[docs] 426class Tags(BaseModel): 427 model_config = ConfigDict(extra="forbid") 428 429 name: str 430 value: str
431 432
[docs] 433class TlsConfig(BaseModel): 434 """Configurations for TLS""" 435 436 model_config = ConfigDict(extra="forbid") 437 438 tlsCertificateOutputS3Uri: Optional[str] = Field( 439 default=None, alias="tls_certificate_output_s3_uri" 440 )
441 442
[docs] 443class ConfigMapKeyRef(BaseModel): 444 """Selects a key of a ConfigMap.""" 445 446 model_config = ConfigDict(extra="forbid") 447 448 key: str = Field(description="The key to select.") 449 name: Optional[str] = Field( 450 default="", 451 description="Name of the referent. This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names", 452 ) 453 optional: Optional[bool] = Field( 454 default=None, 455 description="Specify whether the ConfigMap or its key must be defined", 456 )
457 458
[docs] 459class FieldRef(BaseModel): 460 """Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.""" 461 462 model_config = ConfigDict(extra="forbid") 463 464 apiVersion: Optional[str] = Field( 465 default=None, 466 alias="api_version", 467 description='Version of the schema the FieldPath is written in terms of, defaults to "v1".', 468 ) 469 fieldPath: str = Field( 470 alias="field_path", 471 description="Path of the field to select in the specified API version.", 472 )
473 474
[docs] 475class ResourceFieldRef(BaseModel): 476 """Selects a resource of the container: only resources limits and requests (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.""" 477 478 model_config = ConfigDict(extra="forbid") 479 480 containerName: Optional[str] = Field( 481 default=None, 482 alias="container_name", 483 description="Container name: required for volumes, optional for env vars", 484 ) 485 divisor: Optional[Union[int, str]] = Field( 486 default=None, 487 description='Specifies the output format of the exposed resources, defaults to "1"', 488 ) 489 resource: str = Field(description="Required: resource to select")
490 491
[docs] 492class SecretKeyRef(BaseModel): 493 """Selects a key of a secret in the pod's namespace""" 494 495 model_config = ConfigDict(extra="forbid") 496 497 key: str = Field( 498 description="The key of the secret to select from. Must be a valid secret key." 499 ) 500 name: Optional[str] = Field( 501 default="", 502 description="Name of the referent. This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names", 503 ) 504 optional: Optional[bool] = Field( 505 default=None, 506 description="Specify whether the Secret or its key must be defined", 507 )
508 509
[docs] 510class ValueFrom(BaseModel): 511 """Source for the environment variable's value. Cannot be used if value is not empty.""" 512 513 model_config = ConfigDict(extra="forbid") 514 515 configMapKeyRef: Optional[ConfigMapKeyRef] = Field( 516 default=None, 517 alias="config_map_key_ref", 518 description="Selects a key of a ConfigMap.", 519 ) 520 fieldRef: Optional[FieldRef] = Field( 521 default=None, 522 alias="field_ref", 523 description="Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`, spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.", 524 ) 525 resourceFieldRef: Optional[ResourceFieldRef] = Field( 526 default=None, 527 alias="resource_field_ref", 528 description="Selects a resource of the container: only resources limits and requests (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.", 529 ) 530 secretKeyRef: Optional[SecretKeyRef] = Field( 531 default=None, 532 alias="secret_key_ref", 533 description="Selects a key of a secret in the pod's namespace", 534 )
535 536
[docs] 537class EnvironmentVariables(BaseModel): 538 """EnvVar represents an environment variable present in a Container.""" 539 540 model_config = ConfigDict(extra="forbid") 541 542 name: str = Field( 543 description="Name of the environment variable. Must be a C_IDENTIFIER." 544 ) 545 value: Optional[str] = Field( 546 default=None, 547 description='Variable references $(VAR_NAME) are expanded using the previously defined environment variables in the container and any service environment variables. If a variable cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless of whether the variable exists or not. Defaults to "".', 548 ) 549 valueFrom: Optional[ValueFrom] = Field( 550 default=None, 551 alias="value_from", 552 description="Source for the environment variable's value. Cannot be used if value is not empty.", 553 )
554 555
[docs] 556class ModelInvocationPort(BaseModel): 557 """Defines the port at which the model server will listen to the invocation requests.""" 558 559 model_config = ConfigDict(extra="forbid") 560 561 containerPort: int = Field( 562 alias="container_port", 563 description="Port on which the model server will be listening", 564 ) 565 name: Optional[str] = Field( 566 default="http", 567 description="This is name for the port within the deployed container where the model will listen. This will be referred to by the Load Balancer Service. This must be an IANA_SVC_NAME (for eg. http) and unique within the pod.", 568 )
569 570
[docs] 571class ModelVolumeMount(BaseModel): 572 """Defines the volume where model will be loaded""" 573 574 model_config = ConfigDict(extra="forbid") 575 576 mountPath: Optional[str] = Field( 577 default="/opt/ml/model", 578 alias="mount_path", 579 description="This is the path within the container where the model data will be available for the inference server to load it to GPU,CPU or other device", 580 ) 581 name: str = Field(description="Name of the model volume mount")
582 583
[docs] 584class Claims(BaseModel): 585 """ResourceClaim references one entry in PodSpec.ResourceClaims.""" 586 587 model_config = ConfigDict(extra="forbid") 588 589 name: str = Field( 590 description="Name must match the name of one entry in pod.spec.resourceClaims of the Pod where this field is used. It makes that resource available inside a container." 591 ) 592 request: Optional[str] = Field( 593 default=None, 594 description="Request is the name chosen for a request in the referenced claim. If empty, everything from the claim is made available, otherwise only the result of this request.", 595 )
596 597
[docs] 598class Resources(BaseModel): 599 """Defines the Resources in terms of CPU, GPU, Memory needed for the model to be deployed""" 600 601 model_config = ConfigDict(extra="forbid") 602 603 claims: Optional[List[Claims]] = Field( 604 default=None, 605 description="Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers.", 606 ) 607 limits: Optional[Dict[str, Union[int, str]]] = Field( 608 default=None, 609 description="Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/", 610 ) 611 requests: Optional[Dict[str, Union[int, str]]] = Field( 612 default=None, 613 description="Requests describes the minimum amount of compute resources required. If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, otherwise to an implementation-defined value. Requests cannot exceed Limits. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/", 614 )
615 616
[docs] 617class Worker(BaseModel): 618 """Details of the worker""" 619 620 model_config = ConfigDict(extra="forbid") 621 622 args: Optional[List[str]] = Field( 623 default=None, description="Defines the Arguments to the entrypoint." 624 ) 625 command: Optional[List[str]] = Field( 626 default=None, 627 description="Defines the Command which is Entrypoint array. Not executed within a shell.", 628 ) 629 environmentVariables: Optional[List[EnvironmentVariables]] = Field( 630 default=None, 631 alias="environment_variables", 632 description="List of environment variables to set in the container. Cannot be updated.", 633 ) 634 image: str = Field(description="The name of the inference server image to be used") 635 modelInvocationPort: ModelInvocationPort = Field( 636 alias="model_invocation_port", 637 description="Defines the port at which the model server will listen to the invocation requests.", 638 ) 639 modelVolumeMount: ModelVolumeMount = Field( 640 alias="model_volume_mount", 641 description="Defines the volume where model will be loaded", 642 ) 643 resources: Resources = Field( 644 description="Defines the Resources in terms of CPU, GPU, Memory needed for the model to be deployed" 645 ) 646 workingDir: Optional[str] = Field( 647 default=None, 648 alias="working_dir", 649 description="Defines the working directory of container.", 650 )
651 652 653class _HPEndpoint(BaseModel): 654 """InferenceEndpointConfigSpec defines the desired state of InferenceEndpointConfig.""" 655 656 model_config = ConfigDict(extra="ignore") 657 658 InitialReplicaCount: Optional[int] = Field( 659 default=None, 660 alias="initial_replica_count", 661 description="Number of desired pods. This is a pointer to distinguish between explicit zero and not specified. Defaults to 1.", 662 ) 663 autoScalingSpec: Optional[AutoScalingSpec] = Field( 664 default=None, alias="auto_scaling_spec" 665 ) 666 endpointName: Optional[str] = Field( 667 default=None, 668 alias="endpoint_name", 669 description="Name used for Sagemaker Endpoint Name of sagemaker endpoint. Defaults to empty string which represents that Sagemaker endpoint will not be created.", 670 ) 671 instanceType: str = Field( 672 alias="instance_type", description="Instance Type to deploy the model on" 673 ) 674 intelligentRoutingSpec: Optional[IntelligentRoutingSpec] = Field( 675 default=None, 676 alias="intelligent_routing_spec", 677 description="Configuration for intelligent routing This feature is currently not supported for existing deployments. Adding this configuration to an existing deployment will be rejected.", 678 ) 679 invocationEndpoint: Optional[str] = Field( 680 default="invocations", 681 alias="invocation_endpoint", 682 description="The invocation endpoint of the model server. http://<host>:<port>/ would be pre-populated based on the other fields. Please fill in the path after http://<host>:<port>/ specific to your model server.", 683 ) 684 kvCacheSpec: Optional[KvCacheSpec] = Field( 685 default=None, 686 alias="kv_cache_spec", 687 description="Configuration for KV Cache specification By default L1CacheOffloading will be enabled", 688 ) 689 loadBalancer: Optional[LoadBalancer] = Field( 690 default=None, 691 alias="load_balancer", 692 description="Configuration for Application Load Balancer", 693 ) 694 metrics: Optional[Metrics] = Field( 695 default=None, description="Configuration for metrics collection and exposure" 696 ) 697 modelName: str = Field( 698 alias="model_name", 699 description="Name of model that will be created on Sagemaker", 700 ) 701 modelSourceConfig: ModelSourceConfig = Field(alias="model_source_config") 702 modelVersion: Optional[str] = Field( 703 default=None, 704 alias="model_version", 705 description="Version of the model used in creating sagemaker endpoint", 706 ) 707 replicas: Optional[int] = Field( 708 default=1, 709 description="The desired number of inference server replicas. Default 1.", 710 ) 711 tags: Optional[List[Tags]] = Field( 712 default=None, 713 description="Mentions the tags to be added to the Sagemaker Endpoint", 714 ) 715 tlsConfig: Optional[TlsConfig] = Field( 716 default=None, alias="tls_config", description="Configurations for TLS" 717 ) 718 worker: Worker = Field(description="Details of the worker") 719 720
[docs] 721class Conditions(BaseModel): 722 """DeploymentCondition describes the state of a deployment at a certain point.""" 723 724 model_config = ConfigDict(extra="forbid") 725 726 lastTransitionTime: Optional[str] = Field( 727 default=None, 728 alias="last_transition_time", 729 description="Last time the condition transitioned from one status to another.", 730 ) 731 lastUpdateTime: Optional[str] = Field( 732 default=None, 733 alias="last_update_time", 734 description="The last time this condition was updated.", 735 ) 736 message: Optional[str] = Field( 737 default=None, 738 description="A human readable message indicating details about the transition.", 739 ) 740 reason: Optional[str] = Field( 741 default=None, description="The reason for the condition's last transition." 742 ) 743 status: str = Field( 744 description="Status of the condition, one of True, False, Unknown." 745 ) 746 type: str = Field(description="Type of deployment condition.") 747 observedGeneration: Optional[int] = Field( 748 default=None, 749 alias="observed_generation", 750 description="observedGeneration represents the .metadata.generation that the condition was set based upon. For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date with respect to the current state of the instance.", 751 )
752 753
[docs] 754class Status(BaseModel): 755 """Status of the Deployment Object""" 756 757 model_config = ConfigDict(extra="forbid") 758 759 availableReplicas: Optional[int] = Field( 760 default=None, 761 alias="available_replicas", 762 description="Total number of available pods (ready for at least minReadySeconds) targeted by this deployment.", 763 ) 764 collisionCount: Optional[int] = Field( 765 default=None, 766 alias="collision_count", 767 description="Count of hash collisions for the Deployment. The Deployment controller uses this field as a collision avoidance mechanism when it needs to create the name for the newest ReplicaSet.", 768 ) 769 conditions: Optional[List[Conditions]] = Field( 770 default=None, 771 description="Represents the latest available observations of a deployment's current state.", 772 ) 773 observedGeneration: Optional[int] = Field( 774 default=None, 775 alias="observed_generation", 776 description="The generation observed by the deployment controller.", 777 ) 778 readyReplicas: Optional[int] = Field( 779 default=None, 780 alias="ready_replicas", 781 description="readyReplicas is the number of pods targeted by this Deployment with a Ready Condition.", 782 ) 783 replicas: Optional[int] = Field( 784 default=None, 785 description="Total number of non-terminated pods targeted by this deployment (their labels match the selector).", 786 ) 787 unavailableReplicas: Optional[int] = Field( 788 default=None, 789 alias="unavailable_replicas", 790 description="Total number of unavailable pods targeted by this deployment. This is the total number of pods that are still required for the deployment to have 100% available capacity. They may either be pods that are running but not yet available or pods that still have not been created.", 791 ) 792 updatedReplicas: Optional[int] = Field( 793 default=None, 794 alias="updated_replicas", 795 description="Total number of non-terminated pods targeted by this deployment that have the desired template spec.", 796 )
797 798
[docs] 799class DeploymentStatus(BaseModel): 800 """Details of the native kubernetes deployment that hosts the model""" 801 802 model_config = ConfigDict(extra="forbid") 803 804 deploymentObjectOverallState: Optional[str] = Field( 805 default=None, 806 alias="deployment_object_overall_state", 807 description="Overall State of the Deployment Object", 808 ) 809 lastUpdated: str = Field(alias="last_updated", description="Last Update Time") 810 message: Optional[str] = Field( 811 default=None, 812 description="Message populated in the root CRD while updating the status of underlying Deployment", 813 ) 814 name: str = Field(description="Name of the Deployment Object") 815 reason: Optional[str] = Field( 816 default=None, 817 description="Reason populated in the root CRD while updating the status of underlying Deployment", 818 ) 819 status: Optional[Status] = Field( 820 default=None, description="Status of the Deployment Object" 821 )
822 823
[docs] 824class Sagemaker(BaseModel): 825 """Status of the SageMaker endpoint""" 826 827 model_config = ConfigDict(extra="forbid") 828 829 configArn: Optional[str] = Field( 830 default=None, 831 alias="config_arn", 832 description="The Amazon Resource Name (ARN) of the endpoint configuration.", 833 ) 834 endpointArn: Optional[str] = Field( 835 default=None, 836 alias="endpoint_arn", 837 description="The Amazon Resource Name (ARN) of the SageMaker endpoint", 838 ) 839 modelArn: Optional[str] = Field( 840 default=None, 841 alias="model_arn", 842 description="The ARN of the model created in SageMaker.", 843 ) 844 state: str = Field(description="The current state of the SageMaker endpoint")
845 846
[docs] 847class Endpoints(BaseModel): 848 """EndpointStatus contains the status of SageMaker endpoints""" 849 850 model_config = ConfigDict(extra="forbid") 851 852 sagemaker: Optional[Sagemaker] = Field( 853 default=None, description="Status of the SageMaker endpoint" 854 )
855 856
[docs] 857class ModelMetricsStatus(BaseModel): 858 """Status of model container metrics collection""" 859 860 model_config = ConfigDict(extra="forbid") 861 862 path: Optional[str] = Field( 863 default=None, description="The path where metrics are available" 864 ) 865 port: Optional[int] = Field( 866 default=None, description="The port on which metrics are exposed" 867 )
868 869
[docs] 870class MetricsStatus(BaseModel): 871 """Status of metrics collection""" 872 873 model_config = ConfigDict(extra="forbid") 874 875 enabled: bool = Field(description="Whether metrics collection is enabled") 876 errorMessage: Optional[str] = Field( 877 default=None, 878 alias="error_message", 879 description="Error message if metrics collection is in error state", 880 ) 881 metricsScrapeIntervalSeconds: Optional[int] = Field( 882 default=None, 883 alias="metrics_scrape_interval_seconds", 884 description="Scrape interval in seconds for metrics collection from sidecar and model container.", 885 ) 886 modelMetrics: Optional[ModelMetricsStatus] = Field( 887 default=None, 888 alias="model_metrics", 889 description="Status of model container metrics collection", 890 ) 891 state: Optional[str] = Field( 892 default=None, description="Current state of metrics collection" 893 )
894 895
[docs] 896class TlsCertificate(BaseModel): 897 """CertificateStatus represents the status of TLS certificates""" 898 899 model_config = ConfigDict(extra="forbid") 900 901 certificateARN: Optional[str] = Field( 902 default=None, 903 alias="certificate_arn", 904 description="The Amazon Resource Name (ARN) of the ACM certificate", 905 ) 906 certificateDomainNames: Optional[List[str]] = Field( 907 default=None, 908 alias="certificate_domain_names", 909 description="The certificate domain names that is attached to the certificate", 910 ) 911 certificateName: Optional[str] = Field( 912 default=None, 913 alias="certificate_name", 914 description="The certificate name of cert manager", 915 ) 916 importedCertificates: Optional[List[str]] = Field( 917 default=None, 918 alias="imported_certificates", 919 description="Used for tracking the imported certificates to ACM", 920 ) 921 issuerName: Optional[str] = Field( 922 default=None, alias="issuer_name", description="The issuer name of cert manager" 923 ) 924 lastCertExpiryTime: Optional[str] = Field( 925 default=None, 926 alias="last_cert_expiry_time", 927 description="The last certificate expiry time", 928 ) 929 tlsCertificateOutputS3Bucket: Optional[str] = Field( 930 default=None, 931 alias="tls_certificate_output_s3_bucket", 932 description="S3 bucket that stores the certificate that needs to be trusted", 933 ) 934 tlsCertificateS3Keys: Optional[List[str]] = Field( 935 default=None, 936 alias="tls_certificate_s3_keys", 937 description="The output tls certificate S3 key that points to the .pem file", 938 )
939 940
[docs] 941class InferenceEndpointConfigStatus(BaseModel): 942 """ModelDeploymentStatus defines the observed state of ModelDeployment""" 943 944 model_config = ConfigDict(extra="forbid") 945 946 conditions: Optional[List[Conditions]] = Field( 947 default=None, 948 description="Detailed conditions representing the state of the deployment", 949 ) 950 deploymentStatus: Optional[DeploymentStatus] = Field( 951 default=None, 952 alias="deployment_status", 953 description="Details of the native kubernetes deployment that hosts the model", 954 ) 955 endpoints: Optional[Endpoints] = Field( 956 default=None, 957 description="EndpointStatus contains the status of SageMaker endpoints", 958 ) 959 metricsStatus: Optional[MetricsStatus] = Field( 960 default=None, alias="metrics_status", description="Status of metrics collection" 961 ) 962 observedGeneration: Optional[int] = Field( 963 default=None, 964 alias="observed_generation", 965 description="Latest generation reconciled by controller", 966 ) 967 replicas: Optional[int] = Field( 968 default=None, description="The observed number of inference server replicas." 969 ) 970 selector: Optional[str] = Field( 971 default=None, description="LabelSelector for the deployment." 972 ) 973 state: Optional[ 974 Literal[ 975 "DeploymentPending", 976 "DeploymentInProgress", 977 "DeploymentFailed", 978 "DeploymentComplete", 979 "DeletionPending", 980 "DeletionInProgress", 981 "DeletionFailed", 982 "DeletionComplete", 983 ] 984 ] = Field(default=None, description="Current phase of the model deployment") 985 tlsCertificate: Optional[TlsCertificate] = Field( 986 default=None, 987 alias="tls_certificate", 988 description="CertificateStatus represents the status of TLS certificates", 989 )