1import importlib.resources
2import json
3import logging
4import uuid
5from pydantic import Field, field_validator
6from typing import Optional, List, Dict, Any, Union
7import ast
8import boto3
9import click
10import yaml
11from hyperpod_cluster_stack_template.v1_0.model import ClusterStackBase
12
13from sagemaker.hyperpod import create_boto3_client
14from sagemaker.hyperpod.common.telemetry import _hyperpod_telemetry_emitter
15from sagemaker.hyperpod.common.telemetry.constants import Feature
16
17CAPABILITIES_FOR_STACK_CREATION = [
18 'CAPABILITY_AUTO_EXPAND',
19 'CAPABILITY_IAM',
20 'CAPABILITY_NAMED_IAM'
21]
22log = logging.getLogger()
23
24
[docs]
25class HpClusterStack(ClusterStackBase):
26 """Manages SageMaker HyperPod cluster CloudFormation stacks.
27
28 This class provides functionality to create, manage, and monitor CloudFormation stacks
29 for SageMaker HyperPod clusters. It extends ClusterStackBase with stack lifecycle operations.
30
31 .. dropdown:: Usage Examples
32 :open:
33
34 .. code-block:: python
35
36 >>> # Create a cluster stack instance
37 >>> stack = HpClusterStack()
38 >>> response = stack.create(region="us-west-2")
39 >>>
40 >>> # Check stack status
41 >>> status = stack.get_status()
42 >>> print(status)
43 """
44 stack_id: Optional[str] = Field(
45 None,
46 description="CloudFormation stack ID set after stack creation"
47 )
48 stack_name: Optional[str] = Field(
49 None,
50 description="CloudFormation stack name set after stack creation"
51 )
52
53 def __init__(self, **data):
54 super().__init__(**data)
55
56 @staticmethod
57 def get_template() -> str:
58 try:
59 template_content = importlib.resources.read_text(
60 'hyperpod_cluster_stack_template',
61 'creation_template.yaml'
62 )
63 yaml_data = yaml.safe_load(template_content)
64 return json.dumps(yaml_data, indent=2, ensure_ascii=False)
65 except Exception as e:
66 raise RuntimeError(f"Failed to load template from package: {e}")
67
[docs]
68 @_hyperpod_telemetry_emitter(Feature.HYPERPOD, "create_cluster_stack")
69 def create(self,
70 region: Optional[str] = None,
71 template_version: Optional[int] = 1) -> str:
72 """Creates a new HyperPod cluster CloudFormation stack.
73
74 **Parameters:**
75
76 .. list-table::
77 :header-rows: 1
78 :widths: 20 20 60
79
80 * - Parameter
81 - Type
82 - Description
83 * - region
84 - str, optional
85 - AWS region for stack creation. Uses current session region if not specified
86
87 **Returns:**
88
89 dict: CloudFormation describe_stacks response containing stack details
90
91 **Raises:**
92
93 Exception: When CloudFormation stack creation fails
94
95 .. dropdown:: Usage Examples
96 :open:
97
98 .. code-block:: python
99
100 >>> # Create stack in default region
101 >>> stack = HpClusterStack()
102 >>> response = stack.create()
103 >>>
104 >>> # Create stack in specific region
105 >>> response = stack.create(region="us-east-1")
106 """
107 # Get the region from the boto3 session or use the provided region
108 region = region or boto3.session.Session().region_name
109 cf = create_boto3_client('cloudformation', region_name=region)
110
111 # Convert the input object to CloudFormation parameters
112 parameters = self._create_parameters()
113
114 stack_name = f"HyperpodClusterStack-{str(uuid.uuid4())[:5]}"
115 # Use the fixed bucket name from the model
116 bucket_name = "aws-sagemaker-hyperpod-cluster-setup"
117 template_key = f"{template_version}/templates/main-stack-eks-based-template.yaml"
118
119 try:
120 # Use TemplateURL for large templates (>51KB)
121 template_url = f"https://{bucket_name}-{region}-{self.stage}.s3.amazonaws.com/{template_key}"
122 response = cf.create_stack(
123 StackName=stack_name,
124 TemplateURL=template_url,
125 Parameters=parameters,
126 Tags=self._parse_tags(),
127 Capabilities=CAPABILITIES_FOR_STACK_CREATION
128 )
129
130 log.info(f"Stack creation initiated. Stack ID: {response['StackId']}")
131 click.secho(f"Stack creation initiated. Stack ID: {response['StackId']}")
132
133 self.stack_id = response['StackId']
134 # Setting the stack name here to avoid calling multiple cloud formation APIs again
135 self.stack_name = stack_name
136
137 describe_response = self.describe(stack_name, region)
138
139 return describe_response
140 except Exception as e:
141 log.error(f"Error creating stack: {e}")
142 raise
143
144 def _create_parameters(self) -> List[Dict[str, str]]:
145 parameters = []
146 for field_name, field_info in ClusterStackBase.model_fields.items():
147 value = getattr(self, field_name, None)
148 if value is not None:
149 # Handle array attributes that need to be converted to numbered parameters
150 if field_name == 'instance_group_settings':
151 # Handle both list and JSON string formats
152 if isinstance(value, list):
153 settings_list = value
154 else:
155 # Parse JSON string to list
156 try:
157 settings_list = json.loads(str(value))
158 except (json.JSONDecodeError, TypeError):
159 settings_list = []
160
161 for i, setting in enumerate(settings_list, 1):
162 formatted_setting = self._convert_nested_keys(setting)
163 parameters.append({
164 'ParameterKey': f'InstanceGroupSettings{i}',
165 'ParameterValue': "[" + json.dumps(formatted_setting) + "]" if isinstance(formatted_setting, (dict, list)) else str(formatted_setting)
166 })
167 elif field_name == 'rig_settings':
168 # Handle both list and JSON string formats
169 if isinstance(value, list):
170 settings_list = value
171 else:
172 # Parse JSON string to list
173 try:
174 settings_list = json.loads(str(value))
175 except (json.JSONDecodeError, TypeError):
176 settings_list = []
177
178 for i, setting in enumerate(settings_list, 1):
179 formatted_setting = self._convert_nested_keys(setting)
180 parameters.append({
181 'ParameterKey': f'RigSettings{i}',
182 'ParameterValue': "[" + json.dumps(formatted_setting) + "]" if isinstance(formatted_setting, (dict, list)) else str(formatted_setting)
183 })
184 else:
185 # Convert array fields to comma-separated strings
186 if field_name in ['availability_zone_ids', 'nat_gateway_ids', 'eks_private_subnet_ids',
187 'security_group_ids', 'private_route_table_ids', 'private_subnet_ids']:
188 if isinstance(value, list):
189 value = ','.join(str(item) for item in value)
190 elif isinstance(value, str) and value.startswith('['):
191 # Handle JSON string format from CLI
192 try:
193 parsed_list = json.loads(value)
194 value = ','.join(str(item) for item in parsed_list)
195 except (json.JSONDecodeError, TypeError):
196 pass # Keep original string value
197 # Convert tags array to JSON string
198 elif field_name == 'tags':
199 if isinstance(value, list):
200 value = json.dumps(value)
201 elif isinstance(value, str) and not value.startswith('['):
202 # If it's already a JSON string, keep it as is
203 pass
204 # Convert boolean values to strings for CloudFormation
205 elif isinstance(value, bool):
206 value = str(value).lower()
207
208 parameters.append({
209 'ParameterKey': self._snake_to_pascal(field_name),
210 'ParameterValue': str(value)
211 })
212 return parameters
213
214 def _parse_tags(self) -> List[Dict[str, str]]:
215 """Parse tags field and return proper CloudFormation tags format."""
216 if not self.tags:
217 return []
218
219 tags_list = self.tags
220 if isinstance(self.tags, str):
221 try:
222 tags_list = json.loads(self.tags)
223 except (json.JSONDecodeError, TypeError):
224 return []
225
226 # Convert array of strings to Key-Value format
227 if isinstance(tags_list, list) and tags_list:
228 # Check if already in Key-Value format
229 if isinstance(tags_list[0], dict) and 'Key' in tags_list[0]:
230 return tags_list
231 # Convert string array to Key-Value format
232 return [{'Key': tag, 'Value': ''} for tag in tags_list if isinstance(tag, str)]
233
234 return []
235
236 def _convert_nested_keys(self, obj: Any) -> Any:
237 """Convert nested JSON keys from snake_case to PascalCase."""
238 if isinstance(obj, dict):
239 return {self._snake_to_pascal(k): self._convert_nested_keys(v) for k, v in obj.items()}
240 elif isinstance(obj, list):
241 return [self._convert_nested_keys(item) for item in obj]
242 return obj
243
244 @staticmethod
245 def _snake_to_pascal(snake_str: str) -> str:
246 """Convert snake_case string to PascalCase."""
247 if not snake_str:
248 return snake_str
249
250 # Handle specific cases
251 mappings = {
252 "eks_cluster_name": "EKSClusterName",
253 "create_eks_cluster_stack": "CreateEKSClusterStack",
254 "create_hyperpod_cluster_stack": "CreateHyperPodClusterStack",
255 "create_sagemaker_iam_role_stack": "CreateSageMakerIAMRoleStack",
256 "create_vpc_stack": "CreateVPCStack",
257 "sagemaker_iam_role_name": "SageMakerIAMRoleName",
258 "vpc_cidr": "VpcCIDR",
259 "enable_hp_inference_feature": "EnableHPInferenceFeature",
260 "fsx_availability_zone_id": "FsxAvailabilityZoneId",
261 "hyperpod_cluster_name": "HyperPodClusterName",
262 "InstanceCount": "InstanceCount",
263 "InstanceGroupName": "InstanceGroupName",
264 "InstanceType": "InstanceType",
265 "TargetAvailabilityZoneId": "TargetAvailabilityZoneId",
266 "ThreadsPerCore": "ThreadsPerCore",
267 "InstanceStorageConfigs": "InstanceStorageConfigs",
268 "EbsVolumeConfig": "EbsVolumeConfig",
269 "VolumeSizeInGB": "VolumeSizeInGB"
270 }
271
272 if snake_str in mappings:
273 return mappings[snake_str]
274
275
276 # Default case: capitalize each word
277 return ''.join(word.capitalize() for word in snake_str.split('_'))
278
279 def _snake_to_camel(self, snake_str: str) -> str:
280 """Convert snake_case string to camelCase for nested JSON keys."""
281 if not snake_str:
282 return snake_str
283 words = snake_str.split('_')
284 return words[0] + ''.join(word.capitalize() for word in words[1:])
285
[docs]
286 @staticmethod
287 @_hyperpod_telemetry_emitter(Feature.HYPERPOD, "describe_cluster_stack")
288 def describe(stack_name, region: Optional[str] = None):
289 """Describes a CloudFormation stack by name.
290
291 .. note::
292 Stack descriptions are region-specific. You must use the correct region where the stack was created to retrieve its description.
293
294 **Parameters:**
295
296 .. list-table::
297 :header-rows: 1
298 :widths: 20 20 60
299
300 * - Parameter
301 - Type
302 - Description
303 * - stack_name
304 - str
305 - Name of the CloudFormation stack to describe. For ARN format arn:aws:cloudformation:region:account:stack/stack-name/stack-id, use the stack-name part
306 * - region
307 - str, optional
308 - AWS region where the stack exists
309
310 **Returns:**
311
312 dict: CloudFormation describe_stacks response
313
314 **Raises:**
315
316 ValueError: When stack is not accessible or doesn't exist
317 RuntimeError: When CloudFormation operation fails
318
319 .. dropdown:: Usage Examples
320 :open:
321
322 .. code-block:: python
323
324 >>> # Describe a stack by name
325 >>> response = HpClusterStack.describe("my-stack-name")
326 >>>
327 >>> # Describe stack in specific region
328 >>> response = HpClusterStack.describe("my-stack", region="us-west-2")
329 """
330 cf = create_boto3_client('cloudformation', region_name=region)
331
332 try:
333 response = cf.describe_stacks(StackName=stack_name)
334 return response
335 except cf.exceptions.ClientError as e:
336 error_code = e.response['Error']['Code']
337
338 log.debug(f"CloudFormation error: {error_code} for operation on stack")
339
340 if error_code in ['ValidationError', 'AccessDenied']:
341 log.error("Stack operation failed - check stack name and permissions")
342 raise ValueError("Stack not accessible")
343 else:
344 log.error("CloudFormation operation failed")
345 raise RuntimeError("Stack operation failed")
346 except Exception as e:
347 log.error("Unexpected error during stack operation")
348 raise RuntimeError("Stack operation failed")
349
[docs]
350 @staticmethod
351 @_hyperpod_telemetry_emitter(Feature.HYPERPOD, "list_cluster_stack")
352 def list(region: Optional[str] = None, stack_status_filter: Optional[List[str]] = None):
353 """Lists all CloudFormation stacks in the specified region.
354
355 .. note::
356 Stack listings are region-specific. If no region is provided, uses the default region from your AWS configuration.
357
358 **Parameters:**
359
360 .. list-table::
361 :header-rows: 1
362 :widths: 20 20 60
363
364 * - Parameter
365 - Type
366 - Description
367 * - region
368 - str, optional
369 - AWS region to list stacks from. Uses default region if not specified
370
371 **Returns:**
372
373 dict: CloudFormation list_stacks response containing stack summaries
374
375 **Raises:**
376
377 ValueError: When insufficient permissions to list stacks
378 RuntimeError: When CloudFormation list operation fails
379
380 .. dropdown:: Usage Examples
381 :open:
382
383 .. code-block:: python
384
385 >>> # List stacks in current region
386 >>> stacks = HpClusterStack.list()
387 >>>
388 >>> # List stacks in specific region
389 >>> stacks = HpClusterStack.list(region="us-east-1")
390 """
391 cf = create_boto3_client('cloudformation', region_name=region)
392
393 # All valid stack statuses except DELETE_COMPLETE, used to avoid paginating
394 # through tens of thousands of deleted stacks which causes throttling.
395 _ACTIVE_STACK_STATUSES = [
396 'CREATE_IN_PROGRESS', 'CREATE_FAILED', 'CREATE_COMPLETE',
397 'ROLLBACK_IN_PROGRESS', 'ROLLBACK_FAILED', 'ROLLBACK_COMPLETE',
398 'DELETE_IN_PROGRESS', 'DELETE_FAILED',
399 'UPDATE_IN_PROGRESS', 'UPDATE_COMPLETE_CLEANUP_IN_PROGRESS',
400 'UPDATE_COMPLETE', 'UPDATE_FAILED',
401 'UPDATE_ROLLBACK_IN_PROGRESS', 'UPDATE_ROLLBACK_FAILED',
402 'UPDATE_ROLLBACK_COMPLETE_CLEANUP_IN_PROGRESS', 'UPDATE_ROLLBACK_COMPLETE',
403 'REVIEW_IN_PROGRESS', 'IMPORT_IN_PROGRESS', 'IMPORT_COMPLETE',
404 'IMPORT_ROLLBACK_IN_PROGRESS', 'IMPORT_ROLLBACK_FAILED', 'IMPORT_ROLLBACK_COMPLETE',
405 ]
406
407 try:
408 # Prepare API call parameters
409 list_params = {}
410
411 if stack_status_filter is not None:
412 list_params['StackStatusFilter'] = stack_status_filter
413 else:
414 # Exclude DELETE_COMPLETE at the API level to avoid paginating through
415 # large numbers of deleted stacks, which causes throttling errors.
416 list_params['StackStatusFilter'] = _ACTIVE_STACK_STATUSES
417
418 response = cf.list_stacks(**list_params)
419
420 # Paginate through all results
421 all_summaries = response.get('StackSummaries', [])
422 while 'NextToken' in response:
423 list_params['NextToken'] = response['NextToken']
424 response = cf.list_stacks(**list_params)
425 all_summaries.extend(response.get('StackSummaries', []))
426
427 return {'StackSummaries': all_summaries}
428 except cf.exceptions.ClientError as e:
429 error_code = e.response['Error']['Code']
430
431 log.debug(f"CloudFormation error: {error_code} for list stacks operation")
432
433 if error_code == 'AccessDenied':
434 log.error("List stacks operation failed - check permissions")
435 raise ValueError("Insufficient permissions to list stacks")
436 else:
437 log.error("CloudFormation list operation failed")
438 raise RuntimeError("List stacks operation failed")
439 except Exception as e:
440 log.error("Unexpected error during list stacks operation")
441 raise RuntimeError("List stacks operation failed")
442
443 @staticmethod
444 def _get_stack_status_helper(stack_name: str, region: Optional[str] = None):
445 """Helper method to get stack status for any stack identifier."""
446 log.debug(f"Getting status for stack: {stack_name}")
447 stack_description = HpClusterStack.describe(stack_name, region)
448
449 if stack_description.get('Stacks'):
450 status = stack_description['Stacks'][0].get('StackStatus')
451 log.debug(f"Stack {stack_name} status: {status}")
452 return status
453
454 log.debug(f"Stack {stack_name} not found")
455 click.secho(f"Stack {stack_name} not found")
456 return None
457
[docs]
458 def get_status(self, region: Optional[str] = None):
459 """Gets the status of the current stack instance.
460
461 **Parameters:**
462
463 .. list-table::
464 :header-rows: 1
465 :widths: 20 20 60
466
467 * - Parameter
468 - Type
469 - Description
470 * - region
471 - str, optional
472 - AWS region where the stack exists
473
474 **Returns:**
475
476 str: CloudFormation stack status (e.g., 'CREATE_COMPLETE', 'UPDATE_IN_PROGRESS')
477
478 **Raises:**
479
480 ValueError: When stack hasn't been created yet (call create() first)
481
482 .. dropdown:: Usage Examples
483 :open:
484
485 .. code-block:: python
486
487 >>> # Create stack first, then check status
488 >>> stack = HpClusterStack()
489 >>> stack.create()
490 >>> status = stack.get_status()
491 >>> print(f"Stack status: {status}")
492 """
493 if not self.stack_name:
494 raise ValueError("Stack must be created first. Call create() before checking status.")
495 return self._get_stack_status_helper(self.stack_name, region)
496
[docs]
497 @staticmethod
498 def check_status(stack_name: str, region: Optional[str] = None):
499 """Checks the status of any CloudFormation stack by name.
500
501 **Parameters:**
502
503 .. list-table::
504 :header-rows: 1
505 :widths: 20 20 60
506
507 * - Parameter
508 - Type
509 - Description
510 * - stack_name
511 - str
512 - Name of the CloudFormation stack
513 * - region
514 - str, optional
515 - AWS region where the stack exists
516
517 **Returns:**
518
519 str: CloudFormation stack status or None if stack not found
520
521 .. dropdown:: Usage Examples
522 :open:
523
524 .. code-block:: python
525
526 >>> # Check status of any stack
527 >>> status = HpClusterStack.check_status("my-stack-name")
528 >>>
529 >>> # Check status in specific region
530 >>> status = HpClusterStack.check_status("my-stack", region="us-west-2")
531 """
532 return HpClusterStack._get_stack_status_helper(stack_name, region)
533
534 @staticmethod
535 def delete(stack_name: str, region: Optional[str] = None, retain_resources: Optional[List[str]] = None,
536 logger: Optional[logging.Logger] = None) -> None:
537 """Deletes a HyperPod cluster CloudFormation stack.
538
539 Removes the specified CloudFormation stack and all associated AWS resources.
540 This operation cannot be undone and proceeds automatically without confirmation.
541
542 **Parameters:**
543
544 .. list-table::
545 :header-rows: 1
546 :widths: 20 20 60
547
548 * - Parameter
549 - Type
550 - Description
551 * - stack_name
552 - str
553 - Name of the CloudFormation stack to delete
554 * - region
555 - str, optional
556 - AWS region where the stack exists
557 * - retain_resources
558 - List[str], optional
559 - List of logical resource IDs to retain during deletion (only works on DELETE_FAILED stacks)
560 * - logger
561 - logging.Logger, optional
562 - Logger instance for output messages. Uses default logger if not provided
563
564 **Raises:**
565
566 ValueError: When stack doesn't exist or retain_resources limitation is encountered
567 RuntimeError: When CloudFormation deletion fails
568 Exception: For other deletion errors
569
570 .. dropdown:: Usage Examples
571 :open:
572
573 .. code-block:: python
574
575 >>> # Delete a stack (automatically proceeds without confirmation)
576 >>> HpClusterStack.delete("my-stack-name")
577 >>>
578 >>> # Delete in specific region
579 >>> HpClusterStack.delete("my-stack-name", region="us-west-2")
580 >>>
581 >>> # Delete with retained resources (only works on DELETE_FAILED stacks)
582 >>> HpClusterStack.delete("my-stack-name", retain_resources=["S3Bucket", "EFSFileSystem"])
583 >>>
584 >>> # Delete with custom logger
585 >>> import logging
586 >>> logger = logging.getLogger(__name__)
587 >>> HpClusterStack.delete("my-stack-name", logger=logger)
588 """
589 from sagemaker.hyperpod.cli.cluster_stack_utils import (
590 delete_stack_with_confirmation,
591 StackNotFoundError
592 )
593
594 if logger is None:
595 logger = logging.getLogger(__name__)
596
597 # Convert retain_resources list to comma-separated string for the utility function
598 retain_resources_str = ",".join(retain_resources) if retain_resources else ""
599
600 def sdk_confirm_callback(message: str) -> bool:
601 """SDK-specific confirmation callback - always auto-confirms."""
602 logger.info(f"Auto-confirming: {message}")
603 return True
604
605 try:
606 delete_stack_with_confirmation(
607 stack_name=stack_name,
608 region=region or boto3.session.Session().region_name,
609 retain_resources_str=retain_resources_str,
610 message_callback=logger.info,
611 confirm_callback=sdk_confirm_callback,
612 success_callback=logger.info
613 )
614 except StackNotFoundError:
615 error_msg = f"Stack '{stack_name}' not found"
616 logger.error(error_msg)
617 raise ValueError(error_msg)
618 except Exception as e:
619 error_str = str(e)
620
621 # Handle CloudFormation retain-resources limitation with clear exception for SDK
622 if retain_resources and "specify which resources to retain only when the stack is in the DELETE_FAILED state" in error_str:
623 error_msg = (
624 f"CloudFormation limitation: retain_resources can only be used on stacks in DELETE_FAILED state. "
625 f"Current stack state allows normal deletion. Try deleting without retain_resources first, "
626 f"then retry with retain_resources if deletion fails."
627 )
628 logger.error(error_msg)
629 raise ValueError(error_msg)
630
631 # Handle termination protection
632 if "TerminationProtection is enabled" in error_str:
633 error_msg = (
634 f"Stack deletion blocked: Termination Protection is enabled. "
635 f"Disable termination protection first using AWS CLI or Console."
636 )
637 logger.error(error_msg)
638 raise RuntimeError(error_msg)
639
640 # Handle other errors
641 logger.error(f"Failed to delete stack: {error_str}")
642 raise RuntimeError(f"Stack deletion failed: {error_str}")
643
[docs]
644 @staticmethod
645 def delete(stack_name: str, region: Optional[str] = None, retain_resources: Optional[List[str]] = None,
646 logger: Optional[logging.Logger] = None) -> None:
647 """Deletes a HyperPod cluster CloudFormation stack.
648
649 Removes the specified CloudFormation stack and all associated AWS resources.
650 This operation cannot be undone and proceeds automatically without confirmation.
651
652 **Parameters:**
653
654 .. list-table::
655 :header-rows: 1
656 :widths: 20 20 60
657
658 * - Parameter
659 - Type
660 - Description
661 * - stack_name
662 - str
663 - Name of the CloudFormation stack to delete
664 * - region
665 - str, optional
666 - AWS region where the stack exists
667 * - retain_resources
668 - List[str], optional
669 - List of logical resource IDs to retain during deletion (only works on DELETE_FAILED stacks)
670 * - logger
671 - logging.Logger, optional
672 - Logger instance for output messages. Uses default logger if not provided
673
674 **Raises:**
675
676 ValueError: When stack doesn't exist or retain_resources limitation is encountered
677 RuntimeError: When CloudFormation deletion fails
678 Exception: For other deletion errors
679
680 .. dropdown:: Usage Examples
681 :open:
682
683 .. code-block:: python
684
685 >>> # Delete a stack (automatically proceeds without confirmation)
686 >>> HpClusterStack.delete("my-stack-name")
687 >>>
688 >>> # Delete in specific region
689 >>> HpClusterStack.delete("my-stack-name", region="us-west-2")
690 >>>
691 >>> # Delete with retained resources (only works on DELETE_FAILED stacks)
692 >>> HpClusterStack.delete("my-stack-name", retain_resources=["S3Bucket", "EFSFileSystem"])
693 >>>
694 >>> # Delete with custom logger
695 >>> import logging
696 >>> logger = logging.getLogger(__name__)
697 >>> HpClusterStack.delete("my-stack-name", logger=logger)
698 """
699 from sagemaker.hyperpod.cli.cluster_stack_utils import (
700 delete_stack_with_confirmation,
701 StackNotFoundError
702 )
703
704 if logger is None:
705 logger = logging.getLogger(__name__)
706
707 # Convert retain_resources list to comma-separated string for the utility function
708 retain_resources_str = ",".join(retain_resources) if retain_resources else ""
709
710 def sdk_confirm_callback(message: str) -> bool:
711 """SDK-specific confirmation callback - always auto-confirms."""
712 logger.info(f"Auto-confirming: {message}")
713 return True
714
715 try:
716 delete_stack_with_confirmation(
717 stack_name=stack_name,
718 region=region or boto3.session.Session().region_name,
719 retain_resources_str=retain_resources_str,
720 message_callback=logger.info,
721 confirm_callback=sdk_confirm_callback,
722 success_callback=logger.info
723 )
724 except StackNotFoundError:
725 error_msg = f"Stack '{stack_name}' not found"
726 logger.error(error_msg)
727 raise ValueError(error_msg)
728 except Exception as e:
729 error_str = str(e)
730
731 # Handle CloudFormation retain-resources limitation with clear exception for SDK
732 if retain_resources and "specify which resources to retain only when the stack is in the DELETE_FAILED state" in error_str:
733 error_msg = (
734 f"CloudFormation limitation: retain_resources can only be used on stacks in DELETE_FAILED state. "
735 f"Current stack state allows normal deletion. Try deleting without retain_resources first, "
736 f"then retry with retain_resources if deletion fails."
737 )
738 logger.error(error_msg)
739 raise ValueError(error_msg)
740
741 # Handle termination protection
742 if "TerminationProtection is enabled" in error_str:
743 error_msg = (
744 f"Stack deletion blocked: Termination Protection is enabled. "
745 f"Disable termination protection first using AWS CLI or Console."
746 )
747 logger.error(error_msg)
748 raise RuntimeError(error_msg)
749
750 # Handle other errors
751 logger.error(f"Failed to delete stack: {error_str}")
752 raise RuntimeError(f"Stack deletion failed: {error_str}")
753
754
755 def _yaml_to_json_string(yaml_path) -> str:
756 """Convert YAML file to JSON string"""
757 with open(yaml_path, 'r') as file:
758 yaml_data = yaml.safe_load(file)
759 return json.dumps(yaml_data, indent=2, ensure_ascii=False)