From 369385d2383bc2d06d777dea5f71fd309098d627 Mon Sep 17 00:00:00 2001 From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com> Date: Thu, 11 Dec 2025 19:44:32 -0800 Subject: [PATCH] Remove duplicate remote_function folder --- .../core/remote_function/__init__.py | 19 - .../remote_function/checkpoint_location.py | 47 - .../sagemaker/core/remote_function/client.py | 1285 -------- .../core/remote_function/core/__init__.py | 0 .../core/_custom_dispatch_table.py | 72 - .../core/pipeline_variables.py | 353 --- .../remote_function/core/serialization.py | 422 --- .../remote_function/core/stored_function.py | 226 -- .../remote_function/custom_file_filter.py | 128 - .../sagemaker/core/remote_function/errors.py | 104 - .../core/remote_function/invoke_function.py | 172 - .../src/sagemaker/core/remote_function/job.py | 2140 ------------- .../core/remote_function/logging_config.py | 38 - .../runtime_environment/__init__.py | 14 - .../bootstrap_runtime_environment.py | 605 ---- .../runtime_environment/mpi_utils_remote.py | 252 -- .../runtime_environment_manager.py | 554 ---- .../runtime_environment/spark_app.py | 18 - .../core/remote_function/spark_config.py | 149 - .../core/workflow/execution_variables.py | 2 +- .../src/sagemaker/core/workflow/parameters.py | 2 +- .../src/sagemaker/core/workflow/properties.py | 2 +- .../tests/unit/remote_function/__init__.py | 12 - .../runtime_environment/__init__.py | 12 - .../test_bootstrap_runtime_environment.py | 548 ---- .../test_mpi_utils_remote.py | 366 --- .../test_runtime_environment_manager.py | 562 ---- .../tests/unit/remote_function/test_client.py | 97 - .../tests/unit/remote_function/test_job.py | 935 ------ .../remote_function/test_job_comprehensive.py | 535 ---- .../sagemaker/mlops/workflow/function_step.py | 18 +- .../src/sagemaker/mlops/workflow/pipeline.py | 8 +- .../tests/unit/workflow/test_pipeline.py | 10 +- .../sagemaker/serve/model_builder_utils.py | 2 +- .../multi_model_server/prepare.py | 2 +- .../serve/model_server/smd/prepare.py | 2 +- .../tensorflow_serving/prepare.py | 2 +- .../serve/model_server/torchserve/prepare.py | 2 +- .../serve/validations/check_integrity.py | 2 +- .../evaluate/benchmark_demo.ipynb | 2817 ----------------- .../evaluate/custom_scorer_demo.ipynb | 1842 ----------- .../evaluate/llm_as_judge_demo.ipynb | 2472 --------------- .../train/remote_function/__init__.py | 25 +- .../sagemaker/train/remote_function/client.py | 1281 +++++++- .../train/remote_function/core/__init__.py | 27 - .../core/_custom_dispatch_table.py | 22 +- .../core/pipeline_variables.py | 351 +- .../remote_function/core/serialization.py | 418 ++- .../remote_function/core/stored_function.py | 222 +- .../remote_function/custom_file_filter.py | 2 +- .../sagemaker/train/remote_function/errors.py | 102 +- .../train/remote_function/invoke_function.py | 2 +- .../sagemaker/train/remote_function/job.py | 2134 ++++++++++++- .../bootstrap_runtime_environment.py | 7 +- .../runtime_environment/mpi_utils_remote.py | 2 +- .../runtime_environment_manager.py | 125 +- .../train/remote_function/spark_config.py | 147 +- .../remote_function/test_invoke_function.py | 4 +- 58 files changed, 4729 insertions(+), 16992 deletions(-) delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/__init__.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/checkpoint_location.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/client.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/core/__init__.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/core/_custom_dispatch_table.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/core/pipeline_variables.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/core/serialization.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/core/stored_function.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/custom_file_filter.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/errors.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/invoke_function.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/job.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/logging_config.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/__init__.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/bootstrap_runtime_environment.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/mpi_utils_remote.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/runtime_environment_manager.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/spark_app.py delete mode 100644 sagemaker-core/src/sagemaker/core/remote_function/spark_config.py delete mode 100644 sagemaker-core/tests/unit/remote_function/__init__.py delete mode 100644 sagemaker-core/tests/unit/remote_function/runtime_environment/__init__.py delete mode 100644 sagemaker-core/tests/unit/remote_function/runtime_environment/test_bootstrap_runtime_environment.py delete mode 100644 sagemaker-core/tests/unit/remote_function/runtime_environment/test_mpi_utils_remote.py delete mode 100644 sagemaker-core/tests/unit/remote_function/runtime_environment/test_runtime_environment_manager.py delete mode 100644 sagemaker-core/tests/unit/remote_function/test_client.py delete mode 100644 sagemaker-core/tests/unit/remote_function/test_job.py delete mode 100644 sagemaker-core/tests/unit/remote_function/test_job_comprehensive.py delete mode 100644 sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb delete mode 100644 sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb delete mode 100644 sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb diff --git a/sagemaker-core/src/sagemaker/core/remote_function/__init__.py b/sagemaker-core/src/sagemaker/core/remote_function/__init__.py deleted file mode 100644 index 6436ddaa22..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Defines classes and helper methods used in remote function executions.""" -from __future__ import absolute_import - -from sagemaker.core.remote_function.client import remote, RemoteExecutor # noqa: F401 -from sagemaker.core.remote_function.checkpoint_location import CheckpointLocation # noqa: F401 -from sagemaker.core.remote_function.custom_file_filter import CustomFileFilter # noqa: F401 -from sagemaker.core.remote_function.spark_config import SparkConfig # noqa: F401 diff --git a/sagemaker-core/src/sagemaker/core/remote_function/checkpoint_location.py b/sagemaker-core/src/sagemaker/core/remote_function/checkpoint_location.py deleted file mode 100644 index 4153fe03d3..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/checkpoint_location.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""This module is used to define the CheckpointLocation to remote function.""" -from __future__ import absolute_import - -from os import PathLike -import re - -# Regex is taken from https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CheckpointConfig.html -S3_URI_REGEX_PATTERN = r"^(https|s3)://([^/]+)/?(.*)$" - -_JOB_CHECKPOINT_LOCATION = "/opt/ml/checkpoints/" - - -def _validate_s3_uri_for_checkpoint(s3_uri: str): - """Validate if checkpoint location is specified with a valid s3 URI.""" - return re.match(S3_URI_REGEX_PATTERN, s3_uri) - - -class CheckpointLocation(PathLike): - """Class to represent the location where checkpoints are accessed in a remote function. - - To save or load checkpoints in a remote function, pass an CheckpointLocation object as a - function parameter and use it as a os.PathLike object. This CheckpointLocation object - represents the local directory (/opt/ml/checkpoints/) of checkpoints in side the job. - """ - - _local_path = _JOB_CHECKPOINT_LOCATION - - def __init__(self, s3_uri): - if not _validate_s3_uri_for_checkpoint(s3_uri): - raise ValueError("CheckpointLocation should be specified with valid s3 URI.") - self._s3_uri = s3_uri - - def __fspath__(self): - """Return job local path where checkpoints are stored.""" - return self._local_path diff --git a/sagemaker-core/src/sagemaker/core/remote_function/client.py b/sagemaker-core/src/sagemaker/core/remote_function/client.py deleted file mode 100644 index b140c03901..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/client.py +++ /dev/null @@ -1,1285 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""SageMaker remote function client.""" -from __future__ import absolute_import - -from concurrent.futures import ThreadPoolExecutor -from collections import deque -import time -import threading -from typing import Callable, Dict, List, Optional, Tuple, Any, Union -import functools -import itertools -import inspect - -from botocore.exceptions import ClientError -from sagemaker.core.exceptions import UnexpectedStatusException -from sagemaker.core.experiments._run_context import _RunContext - -import sagemaker.core.remote_function.core.serialization as serialization -from sagemaker.core.remote_function.errors import ( - RemoteFunctionError, - ServiceError, - DeserializationError, -) -from sagemaker.core.remote_function.core.stored_function import RESULTS_FOLDER, EXCEPTION_FOLDER -from sagemaker.core.remote_function.runtime_environment.runtime_environment_manager import ( - RuntimeEnvironmentError, -) - -from sagemaker.core.helper.session_helper import Session -from sagemaker.core.s3 import s3_path_join -from sagemaker.core.remote_function.job import _JobSettings, _Job, _RunInfo -from sagemaker.core.remote_function import logging_config -from sagemaker.core.common_utils import name_from_base, base_from_name -from sagemaker.core.remote_function.spark_config import SparkConfig -from sagemaker.core.remote_function.custom_file_filter import CustomFileFilter -from sagemaker.core.telemetry.telemetry_logging import _telemetry_emitter -from sagemaker.core.telemetry.constants import Feature - -_API_CALL_LIMIT = { - "SubmittingIntervalInSecs": 1, - "MinBatchPollingIntervalInSecs": 10, - "PollingIntervalInSecs": 0.5, -} - -# Possible future states. -_PENDING = "PENDING" -_RUNNING = "RUNNING" -# The future was cancelled by the user... -_CANCELLED = "CANCELLED" -_FINISHED = "FINISHED" - -logger = logging_config.get_logger() - - -@_telemetry_emitter(feature=Feature.REMOTE_FUNCTION, func_name="remote_function.remote") -def remote( - _func=None, - *, - dependencies: str = None, - pre_execution_commands: List[str] = None, - pre_execution_script: str = None, - environment_variables: Dict[str, str] = None, - image_uri: str = None, - include_local_workdir: bool = None, - custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None, - instance_count: int = 1, - instance_type: str = None, - job_conda_env: str = None, - job_name_prefix: str = None, - keep_alive_period_in_seconds: int = 0, - max_retry_attempts: int = 1, - max_runtime_in_seconds: int = 24 * 60 * 60, - role: str = None, - s3_kms_key: str = None, - s3_root_uri: str = None, - sagemaker_session: Session = None, - security_group_ids: List[str] = None, - subnets: List[str] = None, - tags: List[Tuple[str, str]] = None, - volume_kms_key: str = None, - volume_size: int = 30, - encrypt_inter_container_traffic: bool = None, - spark_config: SparkConfig = None, - use_spot_instances=False, - max_wait_time_in_seconds=None, - disable_output_compression: bool = False, - use_torchrun: bool = False, - use_mpirun: bool = False, - nproc_per_node: Optional[int] = None, -): - """Decorator for running the annotated function as a SageMaker training job. - - This decorator wraps the annotated code and runs it as a new SageMaker job synchronously - with the provided runtime settings. - - If a parameter value is not set, the decorator first looks up the value from the SageMaker - configuration file. If no value is specified in the configuration file or no configuration file - is found, the decorator selects the default as specified below. For more information, see - `Configuring and using defaults with the SageMaker Python SDK `_. - - Args: - _func (Optional): A Python function to run as a SageMaker training job. - - dependencies (str): Either the path to a dependencies file or the reserved keyword - ``auto_capture``. Defaults to ``None``. - If ``dependencies`` is provided, the value must be one of the following: - - * A path to a conda environment.yml file. The following conditions apply. - - * If job_conda_env is set, then the conda environment is updated by installing - dependencies from the yaml file and the function is invoked within that - conda environment. For this to succeed, the specified conda environment must - already exist in the image. - * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, then the - conda environment is updated by installing dependencies from the yaml file and the - function is invoked within that conda environment. For this to succeed, the - conda environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and - ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. - * If none of the previous conditions are met, a new conda environment named - ``sagemaker-runtime-env`` is created and the function annotated with the remote - decorator is invoked in that conda environment. - - * A path to a requirements.txt file. The following conditions apply. - - * If ``job_conda_env`` is set in the remote decorator, dependencies are installed - within that conda environment and the function annotated with the remote decorator - is invoked in the same conda environment. For this to succeed, the specified - conda environment must already exist in the image. - * If an environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, - dependencies are installed within that conda environment and the function annotated - with the remote decorator is invoked in the same. For this to succeed, the conda - environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and - ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. - * If none of the above conditions are met, conda is not used. Dependencies are - installed at the system level, without any virtual environment, and the function - annotated with the remote decorator is invoked using the Python runtime available - in the system path. - - * The parameter dependencies is set to ``auto_capture``. SageMaker will automatically - generate an env_snapshot.yml corresponding to the current active conda environment’s - snapshot. You do not need to provide a dependencies file. The following conditions - apply: - - * You must run the remote function within an active conda environment. - * When installing the dependencies on the training job, the same conditions as when - dependencies is set to a path to a conda environment file apply. These conditions are - as follows: - - * If job_conda_env is set, then the conda environment is updated by installing - dependencies from the yaml file and the function is invoked within that - conda environment. For this to succeed, the specified conda environment must - already exist in the image. - * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, then - the conda environment is updated by installing dependencies from the yaml file - and the function is invoked within that conda environment. For this to - succeed, the conda environment name must already be set in - ``SAGEMAKER_JOB_CONDA_ENV``, and ``SAGEMAKER_JOB_CONDA_ENV`` must already exist - in the image. - * If none of the previous conditions are met, a new conda environment with name - ``sagemaker-runtime-env`` is created and the function annotated with the - remote decorator is invoked in that conda environment. - - * ``None``. SageMaker will assume that there are no dependencies to install while - executing the remote annotated function in the training job. - - pre_execution_commands (List[str]): List of commands to be executed prior to executing - remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` - can be specified at the same time. Defaults to None. - - pre_execution_script (str): Path to script file to be executed prior to executing - remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` - can be specified at the same time. Defaults to None. - - environment_variables (Dict): The environment variables used inside the decorator function. - Defaults to ``None``. - - image_uri (str): The universal resource identifier (URI) location of a Docker image on - Amazon Elastic Container Registry (ECR). Defaults to the following based on where the SDK - is running: - - * For users who specify ``spark_config`` and want to run the function in a Spark - application, the ``image_uri`` should be ``None``. A SageMaker Spark image will - be used for training, otherwise a ``ValueError`` is thrown. - * For users on SageMaker Studio notebooks, the image used as the kernel image for the - notebook is used. - * For other users, it is resolved to base python image with the same python version - as the environment running the local code. - - If no compatible image is found, a ValueError is thrown. - - include_local_workdir (bool): A flag to indicate that the remote function should include - local directories. Set to ``True`` if the remote function code imports local modules and - methods that are not available via PyPI or conda. Only python files are included. - Default value is ``False``. - - custom_file_filter (Callable[[str, List], List], CustomFileFilter): Either a function - that filters job dependencies to be uploaded to S3 or a ``CustomFileFilter`` object - that specifies the local directories and files to be included in the remote function. - If a callable is passed in, the function should follow the protocol of ``ignore`` argument - of ``shutil.copytree``. Defaults to ``None``, which means only python - files are accepted and uploaded to S3. - - instance_count (int): The number of instances to use. Defaults to 1. - NOTE: Remote function supports instance_count > 1 for Spark jobs, torchrun and - mpirun utilities - - instance_type (str): The Amazon Elastic Compute Cloud (EC2) instance type to use to run - the SageMaker job. e.g. ml.c4.xlarge. If not provided, a ValueError is thrown. - - job_conda_env (str): The name of the conda environment to activate during job's runtime. - Defaults to ``None``. - - job_name_prefix (str): The prefix used used to create the underlying SageMaker job. - - keep_alive_period_in_seconds (int): The duration in seconds to retain and reuse provisioned - infrastructure after the completion of a training job, also known as SageMaker managed - warm pools. The use of warmpools reduces the latency time spent to provision new - resources. The default value for ``keep_alive_period_in_seconds`` is 0. - NOTE: Additional charges associated with warm pools may apply. Using this parameter also - activates a new persistent cache feature, which will further reduce job start up - latency than over using SageMaker managed warm pools alone by caching the package source - downloaded in the previous runs. - - max_retry_attempts (int): The max number of times the job is retried on - ``InternalServerFailure`` Error from SageMaker service. Defaults to 1. - - max_runtime_in_seconds (int): The upper limit in seconds to be used for training. After - this specified amount of time, SageMaker terminates the job regardless of its current - status. Defaults to 1 day or (86400 seconds). - - role (str): The IAM role (either name or full ARN) used to run your SageMaker training - job. Defaults to: - - * the SageMaker default IAM role if the SDK is running in SageMaker Notebooks or - SageMaker Studio Notebooks. - * if not above, a ValueError is be thrown. - - s3_kms_key (str): The key used to encrypt the input and output data. Default to ``None``. - - s3_root_uri (str): The root S3 folder to which the code archives and data are - uploaded to. Defaults to ``s3://``. - - sagemaker_session (sagemaker.core.helper.session.Session): The underlying SageMaker session to which - SageMaker service calls are delegated to (default: None). If not provided, one is created - using a default configuration chain. - - security_group_ids (List[str): A list of security group IDs. Defaults to ``None`` and the - training job is created without VPC config. - - subnets (List[str): A list of subnet IDs. Defaults to ``None`` and the job is created - without VPC config. - - tags (List[Tuple[str, str]): A list of tags attached to the job. Defaults to ``None`` and - the training job is created without tags. - - volume_kms_key (str): An Amazon Key Management Service (KMS) key used to encrypt an - Amazon Elastic Block Storage (EBS) volume attached to the training instance. Defaults to - ``None``. - - volume_size (int): The size in GB of the storage volume for storing input and output data - during training. Defaults to ``30``. - - encrypt_inter_container_traffic (bool): A flag that specifies whether traffic between - training containers is encrypted for the training job. Defaults to ``False``. - - spark_config (SparkConfig): Configurations to the Spark application that runs on - Spark image. If ``spark_config`` is specified, a SageMaker Spark image uri - will be used for training. Note that ``image_uri`` can not be specified at the - same time otherwise a ``ValueError`` is thrown. Defaults to ``None``. - - use_spot_instances (bool): Specifies whether to use SageMaker Managed Spot instances for - training. If enabled then the ``max_wait_time_in_seconds`` arg should also be set. - Defaults to ``False``. - - max_wait_time_in_seconds (int): Timeout in seconds waiting for spot training job. - After this amount of time Amazon SageMaker will stop waiting for managed spot training - job to complete. Defaults to ``None``. - - disable_output_compression (bool): Optional. When set to true, Model is uploaded to - Amazon S3 without compression after training finishes. - - use_torchrun (bool): Specifies whether to use torchrun for distributed training. - Defaults to ``False``. - - use_mpirun (bool): Specifies whether to use mpirun for distributed training. - Defaults to ``False``. - - nproc_per_node (int): Optional. Specifies the number of processes per node for - distributed training. Defaults to ``None``. - This is defined automatically configured on the instance type. - """ - - def _remote(func): - - job_settings = _JobSettings( - dependencies=dependencies, - pre_execution_commands=pre_execution_commands, - pre_execution_script=pre_execution_script, - environment_variables=environment_variables, - image_uri=image_uri, - include_local_workdir=include_local_workdir, - custom_file_filter=custom_file_filter, - instance_count=instance_count, - instance_type=instance_type, - job_conda_env=job_conda_env, - job_name_prefix=job_name_prefix, - keep_alive_period_in_seconds=keep_alive_period_in_seconds, - max_retry_attempts=max_retry_attempts, - max_runtime_in_seconds=max_runtime_in_seconds, - role=role, - s3_kms_key=s3_kms_key, - s3_root_uri=s3_root_uri, - sagemaker_session=sagemaker_session, - security_group_ids=security_group_ids, - subnets=subnets, - tags=tags, - volume_kms_key=volume_kms_key, - volume_size=volume_size, - encrypt_inter_container_traffic=encrypt_inter_container_traffic, - spark_config=spark_config, - use_spot_instances=use_spot_instances, - max_wait_time_in_seconds=max_wait_time_in_seconds, - disable_output_compression=disable_output_compression, - use_torchrun=use_torchrun, - use_mpirun=use_mpirun, - nproc_per_node=nproc_per_node, - ) - - @functools.wraps(func) - def wrapper(*args, **kwargs): - - if instance_count > 1 and not ( - (spark_config is not None and not use_torchrun and not use_mpirun) - or (spark_config is None and use_torchrun and not use_mpirun) - or (spark_config is None and not use_torchrun and use_mpirun) - ): - raise ValueError( - "Remote function do not support training on multi instances " - + "without spark_config or use_torchrun or use_mpirun. " - + "Please provide instance_count = 1" - ) - - RemoteExecutor._validate_submit_args(func, *args, **kwargs) - - job = _Job.start(job_settings, func, args, kwargs) - - try: - job.wait() - except UnexpectedStatusException as usex: - if usex.actual_status == "Failed": - try: - exception = serialization.deserialize_exception_from_s3( - sagemaker_session=job_settings.sagemaker_session, - s3_uri=s3_path_join( - job_settings.s3_root_uri, job.job_name, EXCEPTION_FOLDER - ), - hmac_key=job.hmac_key, - ) - except ServiceError as serr: - chained_e = serr.__cause__ - if ( - isinstance(chained_e, ClientError) - and chained_e.response["Error"]["Code"] # pylint: disable=no-member - == "404" - and chained_e.response["Error"]["Message"] # pylint: disable=no-member - == "Not Found" - ): - describe_result = job.describe() - if ( - "FailureReason" in describe_result - and describe_result["FailureReason"] - and "RuntimeEnvironmentError: " in describe_result["FailureReason"] - ): - failure_msg = describe_result["FailureReason"].replace( - "RuntimeEnvironmentError: ", "" - ) - raise RuntimeEnvironmentError(failure_msg) - raise RemoteFunctionError( - "Failed to execute remote function. " - + "Check corresponding job for details." - ) - raise serr - - raise exception - - raise TimeoutError( - "Job for remote function timed out before reaching a termination status." - ) - - if job.describe()["TrainingJobStatus"] == "Completed": - return serialization.deserialize_obj_from_s3( - sagemaker_session=job_settings.sagemaker_session, - s3_uri=s3_path_join(job_settings.s3_root_uri, job.job_name, RESULTS_FOLDER), - hmac_key=job.hmac_key, - ) - - if job.describe()["TrainingJobStatus"] == "Stopped": - raise RemoteFunctionError("Job for remote function has been aborted.") - - return None - - wrapper.job_settings = job_settings - wrapper.wrapped_func = func - return wrapper - - if _func is None: - return _remote - return _remote(_func) - - -class _SubmitRequest: - """Class that holds parameters and data for creating a new job.""" - - def __init__( - self, future, job_settings: _JobSettings, func, func_args, func_kwargs, run_info=None - ): - self.future = future - self.job_settings = job_settings - self.func = func - self.args = func_args - self.kwargs = func_kwargs - self.run_info = run_info - - -def _submit_worker(executor): - """Background worker that submits job requests.""" - - def has_work_to_do(): - return ( - len(executor._pending_request_queue) > 0 - and len(executor._running_jobs) < executor.max_parallel_jobs - ) - - try: - while True: - with executor._state_condition: - executor._state_condition.wait_for(has_work_to_do) - request = executor._pending_request_queue[0] - - if request is None: - with executor._state_condition: - # remove the anchor from the pending queue - executor._pending_request_queue.popleft() - return - - time.sleep(_API_CALL_LIMIT["SubmittingIntervalInSecs"]) - # submit a new job - job = request.future._start_and_notify( - request.job_settings, request.func, request.args, request.kwargs, request.run_info - ) - - with executor._state_condition: - if job: - executor._running_jobs[job.job_name] = job - # remove the request from the pending queue - executor._pending_request_queue.popleft() - except Exception: # pylint: disable=broad-except - logger.exception("Error occurred while submitting CreateTrainingJob requests.") - - -def _polling_worker(executor): - """Background worker that polls the status of the running jobs.""" - try: - while True: - with executor._state_condition: - if ( - executor._shutdown - and len(executor._running_jobs) + len(executor._pending_request_queue) == 0 - ): - return - - time.sleep( - max( - _API_CALL_LIMIT["MinBatchPollingIntervalInSecs"] - - len(executor._running_jobs) * _API_CALL_LIMIT["PollingIntervalInSecs"], - 0, - ) - ) - - # check if running jobs are terminated - for job_name in list(executor._running_jobs.keys()): - try: - time.sleep(_API_CALL_LIMIT["PollingIntervalInSecs"]) - if executor._running_jobs[job_name].describe()["TrainingJobStatus"] in [ - "Completed", - "Failed", - "Stopped", - ]: - with executor._state_condition: - del executor._running_jobs[job_name] - executor._state_condition.notify_all() - except Exception as e: # pylint: disable=broad-except - if ( - not isinstance(e, ClientError) - or e.response["Error"]["Code"] # pylint: disable=no-member - != "LimitExceededException" - ): - # Couldn't check the job status, move on - logger.exception( - "Error occurred while checking the status of job %s", job_name - ) - with executor._state_condition: - del executor._running_jobs[job_name] - executor._state_condition.notify_all() - except Exception: # pylint: disable=broad-except - logger.exception("Error occurred while monitoring the job statuses.") - - -class RemoteExecutor(object): - """Run Python functions asynchronously as SageMaker jobs""" - - def __init__( - self, - *, - dependencies: str = None, - pre_execution_commands: List[str] = None, - pre_execution_script: str = None, - environment_variables: Dict[str, str] = None, - image_uri: str = None, - include_local_workdir: bool = None, - custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None, - instance_count: int = 1, - instance_type: str = None, - job_conda_env: str = None, - job_name_prefix: str = None, - keep_alive_period_in_seconds: int = 0, - max_parallel_jobs: int = 1, - max_retry_attempts: int = 1, - max_runtime_in_seconds: int = 24 * 60 * 60, - role: str = None, - s3_kms_key: str = None, - s3_root_uri: str = None, - sagemaker_session: Session = None, - security_group_ids: List[str] = None, - subnets: List[str] = None, - tags: List[Tuple[str, str]] = None, - volume_kms_key: str = None, - volume_size: int = 30, - encrypt_inter_container_traffic: bool = None, - spark_config: SparkConfig = None, - use_spot_instances=False, - max_wait_time_in_seconds=None, - disable_output_compression: bool = False, - use_torchrun: bool = False, - use_mpirun: bool = False, - nproc_per_node: Optional[int] = None, - ): - """Constructor for RemoteExecutor - - If a parameter value is not set, the constructor first looks up the value from the - SageMaker configuration file. If no value is specified in the configuration file or - no configuration file is found, the constructor selects the default as specified below. - For more information, see `Configuring and using defaults with the SageMaker Python SDK - `_. - - Args: - _func (Optional): A Python function to run as a SageMaker training job. - - dependencies (str): Either the path to a dependencies file or the reserved keyword - ``auto_capture``. Defaults to ``None``. - If ``dependencies`` is provided, the value must be one of the following: - - * A path to a conda environment.yml file. The following conditions apply. - - * If job_conda_env is set, then the conda environment is updated by installing - dependencies from the yaml file and the function is invoked within that - conda environment. For this to succeed, the specified conda environment must - already exist in the image. - * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, then - the conda environment is updated by installing dependencies from the yaml file and - the function is invoked within that conda environment. For this to succeed, the - conda environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and - ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. - * If none of the previous conditions are met, a new conda environment named - ``sagemaker-runtime-env`` is created and the function annotated with the remote - decorator is invoked in that conda environment. - - * A path to a requirements.txt file. The following conditions apply. - - * If ``job_conda_env`` is set in the remote decorator, dependencies are installed - within that conda environment and the function annotated with the remote decorator - is invoked in the same conda environment. For this to succeed, the specified - conda environment must already exist in the image. - * If an environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, - dependencies are installed within that conda environment and the function annotated - with the remote decorator is invoked in the same. For this to succeed, the - conda environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and - ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. - * If none of the above conditions are met, conda is not used. Dependencies are - installed at the system level, without any virtual environment, and the function - annotated with the remote decorator is invoked using the Python runtime available - in the system path. - - * The parameter dependencies is set to ``auto_capture``. SageMaker will automatically - generate an env_snapshot.yml corresponding to the current active conda environment’s - snapshot. You do not need to provide a dependencies file. The following conditions - apply: - - * You must run the remote function within an active conda environment. - * When installing the dependencies on the training job, the same conditions as when - dependencies is set to a path to a conda environment file apply. These conditions - are as follows: - - * If job_conda_env is set, then the conda environment is updated by installing - dependencies from the yaml file and the function is invoked within that - conda environment. For this to succeed, the specified conda environment must - already exist in the image. - * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, - then the conda environment is updated by installing dependencies from the yaml - file and the function is invoked within that conda environment. For this to - succeed, the conda environment name must already be set in - ``SAGEMAKER_JOB_CONDA_ENV``, and ``SAGEMAKER_JOB_CONDA_ENV`` must already exist - in the image. - * If none of the previous conditions are met, a new conda environment with name - ``sagemaker-runtime-env`` is created and the function annotated with the - remote decorator is invoked in that conda environment. - - * ``None``. SageMaker will assume that there are no dependencies to install while - executing the remote annotated function in the training job. - - pre_execution_commands (List[str]): List of commands to be executed prior to executing - remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` - can be specified at the same time. Defaults to None. - - pre_execution_script (str): Path to script file to be executed prior to executing - remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` - can be specified at the same time. Defaults to None. - - environment_variables (Dict): The environment variables used inside the decorator - function. Defaults to ``None``. - - image_uri (str): The universal resource identifier (URI) location of a Docker image on - Amazon Elastic Container Registry (ECR). Defaults to the following based on where the - SDK is running: - - * For users who specify ``spark_config`` and want to run the function in a Spark - application, the ``image_uri`` should be ``None``. A SageMaker Spark image will - be used for training, otherwise a ``ValueError`` is thrown. - * For users on SageMaker Studio notebooks, the image used as the kernel image for - the notebook is used. - * For other users, it is resolved to base python image with the same python - version as the environment running the local code. - - If no compatible image is found, a ValueError is thrown. - - include_local_workdir (bool): A flag to indicate that the remote function should include - local directories. Set to ``True`` if the remote function code imports local modules - and methods that are not available via PyPI or conda. Default value is ``False``. - - custom_file_filter (Callable[[str, List], List], CustomFileFilter): Either a function - that filters job dependencies to be uploaded to S3 or a ``CustomFileFilter`` object - that specifies the local directories and files to be included in the remote function. - If a callable is passed in, that function is passed to the ``ignore`` argument of - ``shutil.copytree``. Defaults to ``None``, which means only python - files are accepted and uploaded to S3. - - instance_count (int): The number of instances to use. Defaults to 1. - NOTE: Remote function supports instance_count > 1 for Spark jobs, torchrun and - mpirun utilities - - instance_type (str): The Amazon Elastic Compute Cloud (EC2) instance type to use to run - the SageMaker job. e.g. ml.c4.xlarge. If not provided, a ValueError is thrown. - - job_conda_env (str): The name of the conda environment to activate during job's runtime. - Defaults to ``None``. - - job_name_prefix (str): The prefix used used to create the underlying SageMaker job. - - keep_alive_period_in_seconds (int): The duration in seconds to retain and reuse - provisioned infrastructure after the completion of a training job, also known as - SageMaker managed warm pools. The use of warmpools reduces the latency time spent to - provision new resources. The default value for ``keep_alive_period_in_seconds`` is 0. - NOTE: Additional charges associated with warm pools may apply. Using this parameter - also activates a new pesistent cache feature, which will further reduce job start - up latency than over using SageMaker managed warm pools alone by caching the package - source downloaded in the previous runs. - - max_parallel_jobs (int): Maximum number of jobs that run in parallel. Defaults to 1. - - max_retry_attempts (int): The max number of times the job is retried on - ``InternalServerFailure`` Error from SageMaker service. Defaults to 1. - - max_runtime_in_seconds (int): The upper limit in seconds to be used for training. After - this specified amount of time, SageMaker terminates the job regardless of its current - status. Defaults to 1 day or (86400 seconds). - - role (str): The IAM role (either name or full ARN) used to run your SageMaker training - job. Defaults to: - - * the SageMaker default IAM role if the SDK is running in SageMaker Notebooks or - SageMaker Studio Notebooks. - * if not above, a ValueError is be thrown. - - s3_kms_key (str): The key used to encrypt the input and output data. - Default to ``None``. - - s3_root_uri (str): The root S3 folder to which the code archives and data are - uploaded to. Defaults to ``s3://``. - - sagemaker_session (sagemaker.core.helper.session.Session): The underlying SageMaker session to which - SageMaker service calls are delegated to (default: None). If not provided, one is - created using a default configuration chain. - - security_group_ids (List[str): A list of security group IDs. Defaults to ``None`` and - the training job is created without VPC config. - - subnets (List[str): A list of subnet IDs. Defaults to ``None`` and the job is - created without VPC config. - - tags (List[Tuple[str, str]): A list of tags attached to the job. Defaults to ``None`` - and the training job is created without tags. - - volume_kms_key (str): An Amazon Key Management Service (KMS) key used to encrypt an - Amazon Elastic Block Storage (EBS) volume attached to the training instance. - Defaults to ``None``. - - volume_size (int): The size in GB of the storage volume for storing input and output - data during training. Defaults to ``30``. - - encrypt_inter_container_traffic (bool): A flag that specifies whether traffic between - training containers is encrypted for the training job. Defaults to ``False``. - - spark_config (SparkConfig): Configurations to the Spark application that runs on - Spark image. If ``spark_config`` is specified, a SageMaker Spark image uri - will be used for training. Note that ``image_uri`` can not be specified at the - same time otherwise a ``ValueError`` is thrown. Defaults to ``None``. - - use_spot_instances (bool): Specifies whether to use SageMaker Managed Spot instances for - training. If enabled then the ``max_wait_time_in_seconds`` arg should also be set. - Defaults to ``False``. - - max_wait_time_in_seconds (int): Timeout in seconds waiting for spot training job. - After this amount of time Amazon SageMaker will stop waiting for managed spot training - job to complete. Defaults to ``None``. - - disable_output_compression (bool): Optional. When set to true, Model is uploaded to - Amazon S3 without compression after training finishes. - - use_torchrun (bool): Specifies whether to use torchrun for distributed training. - Defaults to ``False``. - - use_mpirun (bool): Specifies whether to use mpirun for distributed training. - Defaults to ``False``. - - nproc_per_node (int): Optional. Specifies the number of processes per node for - distributed training. Defaults to ``None``. - This is defined automatically configured on the instance type. - """ - self.max_parallel_jobs = max_parallel_jobs - - if self.max_parallel_jobs <= 0: - raise ValueError("max_parallel_jobs must be greater than 0.") - - if instance_count > 1 and not ( - (spark_config is not None and not use_torchrun and not use_mpirun) - or (spark_config is None and use_torchrun and not use_mpirun) - or (spark_config is None and not use_torchrun and use_mpirun) - ): - raise ValueError( - "Remote function do not support training on multi instances " - + "without spark_config or use_torchrun or use_mpirun. " - + "Please provide instance_count = 1" - ) - - self.job_settings = _JobSettings( - dependencies=dependencies, - pre_execution_commands=pre_execution_commands, - pre_execution_script=pre_execution_script, - environment_variables=environment_variables, - image_uri=image_uri, - include_local_workdir=include_local_workdir, - custom_file_filter=custom_file_filter, - instance_count=instance_count, - instance_type=instance_type, - job_conda_env=job_conda_env, - job_name_prefix=job_name_prefix, - keep_alive_period_in_seconds=keep_alive_period_in_seconds, - max_retry_attempts=max_retry_attempts, - max_runtime_in_seconds=max_runtime_in_seconds, - role=role, - s3_kms_key=s3_kms_key, - s3_root_uri=s3_root_uri, - sagemaker_session=sagemaker_session, - security_group_ids=security_group_ids, - subnets=subnets, - tags=tags, - volume_kms_key=volume_kms_key, - volume_size=volume_size, - encrypt_inter_container_traffic=encrypt_inter_container_traffic, - spark_config=spark_config, - use_spot_instances=use_spot_instances, - max_wait_time_in_seconds=max_wait_time_in_seconds, - disable_output_compression=disable_output_compression, - use_torchrun=use_torchrun, - use_mpirun=use_mpirun, - nproc_per_node=nproc_per_node, - ) - - self._state_condition = threading.Condition() - self._pending_request_queue = deque() - # For thread safety, see - # https://web.archive.org/web/20201108091210/http://effbot.org/pyfaq/what-kinds-of-global-value-mutation-are-thread-safe.htm - self._running_jobs = dict() - self._shutdown = False - - self._workers: ThreadPoolExecutor = None - - def submit(self, func, *args, **kwargs): - """Execute the input function as a SageMaker job asynchronously. - - Args: - func: Python function to run as a SageMaker job. - *args: Positional arguments to the input function. - **kwargs: keyword arguments to the input function - """ - if self._shutdown: - raise RuntimeError("Cannot schedule new remote function executions after shutdown") - - self._validate_submit_args(func, *args, **kwargs) - - with self._state_condition: - future = Future() - - run_info = None - if _RunContext.get_current_run() is not None: - run = _RunContext.get_current_run() - run_info = _RunInfo(run.experiment_name, run.run_name) - - self._pending_request_queue.append( - _SubmitRequest(future, self.job_settings, func, args, kwargs, run_info) - ) - - if self._workers is None: - self._workers = ThreadPoolExecutor(2) - self._workers.submit(_submit_worker, self) - self._workers.submit(_polling_worker, self) - - self._state_condition.notify_all() - - return future - - def map(self, func, *iterables): - """Return an iterator that applies function to every item of iterable, yielding the results. - - If additional iterables arguments are passed, function must take that many arguments and - is applied to the items from all iterables in parallel. With multiple iterables, the - iterator stops when the shortest iterable is exhausted. - - Args: - func: Python function to run as a SageMaker job. - iterables: Arguments of the input python function. - """ - - futures = map(self.submit, itertools.repeat(func), *iterables) - return [future.result() for future in futures] - - def shutdown(self): - """Prevent more function executions to be submitted to this executor.""" - with self._state_condition: - self._shutdown = True - - # give a signal to the submitting worker so that it doesn't block on empty queue forever - self._pending_request_queue.append(None) - - self._state_condition.notify_all() - - if self._workers is not None: - self._workers.shutdown(wait=True) - - def __enter__(self): - """Create an executor instance and return it""" - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """Make sure the executor instance is shutdown.""" - self.shutdown() - return False - - @staticmethod - def _validate_submit_args(func, *args, **kwargs): - """Validates input args passed to submit method.""" - - full_arg_spec = inspect.getfullargspec(func) - - # args related validations - - is_accepting_variable_positional_args = full_arg_spec.varargs is not None - num_default_positional_args = len(full_arg_spec.defaults) if full_arg_spec.defaults else 0 - minimum_num_expected_positional_args = len(full_arg_spec.args) - num_default_positional_args - - if not is_accepting_variable_positional_args and len(args) > len(full_arg_spec.args): - raise TypeError( - f"{func.__name__}() takes {len(full_arg_spec.args)} positional " - + f"{'arguments' if len(full_arg_spec.args) > 1 else 'argument'} but {len(args)} " - + f"{'were' if len(args) > 1 else 'was'} given." - ) - - if len(args) < minimum_num_expected_positional_args: - missing_positional_args = full_arg_spec.args[ - len(args) : minimum_num_expected_positional_args - ] - missing_args = list(filter(lambda arg: arg not in kwargs, missing_positional_args)) - if missing_args: - missing_args_str = ( - ", ".join(map(lambda x: f"'{x}'", missing_args[:-1])) - + f", and '{missing_args[-1]}'" - if len(missing_args) > 1 - else f"'{missing_args[0]}'" - ) - raise TypeError( - f"{func.__name__}() missing {len(missing_args)} required positional " - + f"{'arguments' if len(missing_args) > 1 else 'argument'}: {missing_args_str}" - ) - - # kwargs related validations - - for k in kwargs: - if k in full_arg_spec.args and len(args) > full_arg_spec.args.index(k): - raise TypeError(f"{func.__name__}() got multiple values for argument '{k}'") - if k not in full_arg_spec.kwonlyargs and k not in full_arg_spec.args: - raise TypeError(f"{func.__name__}() got an unexpected keyword argument '{k}'") - - missing_kwargs = [ - k - for k in full_arg_spec.kwonlyargs - if k not in full_arg_spec.kwonlydefaults and k not in kwargs - ] - if missing_kwargs: - missing_kwargs_string = ( - ", ".join(map(lambda x: f"'{x}'", missing_kwargs[:-1])) - + f", and '{missing_kwargs[-1]}'" - if len(missing_kwargs) > 1 - else f"'{missing_kwargs[0]}'" - ) - - raise TypeError( - f"{func.__name__}() missing {len(missing_kwargs)} required keyword-only " - + f"{'arguments' if len(missing_kwargs) > 1 else 'argument'}: " - + f"{missing_kwargs_string}" - ) - - -class Future(object): - """Class representing a reference to a SageMaker job result. - - Reference to the SageMaker job created as a result of the remote function run. The job may - or may not have finished running. - """ - - def __init__(self): - self._condition = threading.Condition() - self._state = _PENDING - self._job = None - self._exception = None - self._return = None - - @staticmethod - def from_describe_response(describe_training_job_response, sagemaker_session): - """Construct a Future from a describe_training_job_response object.""" - future = Future() - job_exception = None - client_exception = None - job_return = None - job = _Job.from_describe_response(describe_training_job_response, sagemaker_session) - if describe_training_job_response["TrainingJobStatus"] in ["Stopping", "Stopped"]: - state = _CANCELLED - elif describe_training_job_response["TrainingJobStatus"] == "Completed": - state = _FINISHED - try: - job_return = serialization.deserialize_obj_from_s3( - sagemaker_session=sagemaker_session, - s3_uri=s3_path_join(job.s3_uri, RESULTS_FOLDER), - hmac_key=job.hmac_key, - ) - except DeserializationError as e: - client_exception = e - except ServiceError as e: - client_exception = e - elif describe_training_job_response["TrainingJobStatus"] == "Failed": - state = _FINISHED - try: - job_exception = serialization.deserialize_exception_from_s3( - sagemaker_session=sagemaker_session, - s3_uri=s3_path_join(job.s3_uri, EXCEPTION_FOLDER), - hmac_key=job.hmac_key, - ) - except ServiceError as serr: - chained_e = serr.__cause__ - if ( - isinstance(chained_e, ClientError) - and chained_e.response["Error"]["Code"] == "404" # pylint: disable=no-member - and chained_e.response["Error"]["Message"] # pylint: disable=no-member - == "Not Found" - ): - if ( - "FailureReason" in describe_training_job_response - and describe_training_job_response["FailureReason"] - and "RuntimeEnvironmentError: " - in describe_training_job_response["FailureReason"] - ): - failure_msg = describe_training_job_response["FailureReason"].replace( - "RuntimeEnvironmentError: ", "" - ) - job_exception = RuntimeEnvironmentError(failure_msg) - else: - job_exception = RemoteFunctionError( - "Failed to execute remote function. " - + "Check corresponding job for details." - ) - else: - job_exception = serr - except DeserializationError as e: - client_exception = e - else: - state = _RUNNING - - future._job = job - future._state = state - future._exception = job_exception or client_exception - future._return = job_return - return future - - def _start_and_notify( - self, job_settings: _JobSettings, func, func_args, func_kwargs, run_info=None - ): - """Start and record the newly created job in the future object. - - The job is recorded if one is successfully started. Otherwise, the exception is - recorded. The state update is broadcast to other waiting threads. - """ - with self._condition: - if self._state in [_PENDING]: - - try: - self._job = _Job.start(job_settings, func, func_args, func_kwargs, run_info) - except (Exception,) as e: # pylint: disable=broad-except - self._exception = e - self._state = _FINISHED - self._condition.notify_all() - return None - - self._state = _RUNNING - self._condition.notify_all() - return self._job - return None - - def result(self, timeout: float = None) -> Any: - """Returns the SageMaker job result. - - This method waits for the SageMaker job created from the remote function execution to - complete for up to the timeout value (if specified). If timeout is ``None``, - this method will wait until the SageMaker job completes. - - Args: - timeout (float): Timeout in seconds to wait until the job is completed. ``None`` by - default. - - Returns: - The Python object returned by the remote function. - """ - try: - self.wait(timeout) - except UnexpectedStatusException: - pass - - with self._condition: - if self._state == _PENDING: - raise RuntimeError() - - if self._state == _RUNNING: - if self._job.describe()["TrainingJobStatus"] == "Completed": - self._return = serialization.deserialize_obj_from_s3( - sagemaker_session=self._job.sagemaker_session, - s3_uri=s3_path_join(self._job.s3_uri, RESULTS_FOLDER), - hmac_key=self._job.hmac_key, - ) - self._state = _FINISHED - return self._return - if self._job.describe()["TrainingJobStatus"] == "Failed": - try: - self._exception = serialization.deserialize_exception_from_s3( - sagemaker_session=self._job.sagemaker_session, - s3_uri=s3_path_join(self._job.s3_uri, EXCEPTION_FOLDER), - hmac_key=self._job.hmac_key, - ) - except ServiceError as serr: - chained_e = serr.__cause__ - if ( - isinstance(chained_e, ClientError) - and chained_e.response["Error"]["Code"] # pylint: disable=no-member - == "404" - and chained_e.response["Error"]["Message"] # pylint: disable=no-member - == "Not Found" - ): - if ( - "FailureReason" in self._job.describe() - and self._job.describe()["FailureReason"] - and "RuntimeEnvironmentError: " - in self._job.describe()["FailureReason"] - ): - failure_msg = self._job.describe()["FailureReason"].replace( - "RuntimeEnvironmentError: ", "" - ) - self._exception = RuntimeEnvironmentError(failure_msg) - else: - self._exception = RemoteFunctionError( - "Failed to execute remote function. " - + "Check corresponding job for details." - ) - else: - self._exception = serr - self._state = _FINISHED - elif self._job.describe()["TrainingJobStatus"] == "Stopped": - self._state = _CANCELLED - raise RemoteFunctionError("Job for remote function has been aborted.") - else: - raise TimeoutError( - "Job for remote function timed out before reaching a termination status." - ) - - if self._state == _FINISHED: - if self._exception: - raise self._exception - return self._return - - return None - - def wait( - self, - timeout: int = None, - ) -> None: - """Wait for the underlying SageMaker job to complete. - - This method waits for the SageMaker job created as a result of the remote function run - to complete for up to the timeout value (if specified). If timeout is ``None``, this method - will block until the job is completed. - - Args: - timeout (int): Timeout in seconds to wait until the job is completed before it is - stopped. Defaults to ``None``. - - Returns: - None - """ - - with self._condition: - if self._state == _PENDING: - self._condition.wait(timeout=timeout) - - if self._state == _RUNNING: - self._job.wait(timeout=timeout) - - def cancel(self) -> bool: - """Cancel the function execution. - - This method prevents the SageMaker job being created or stops the underlying SageMaker job - early if it is already in progress. - - Returns: - ``True`` if the underlying SageMaker job created as a result of the remote function - run is cancelled. - """ - with self._condition: - if self._state == _FINISHED: - return False - if self._state == _CANCELLED: - return True - - if self._job: - self._job.stop() - self._state = _CANCELLED - return True - - def running(self) -> bool: - """Check if the underlying SageMaker job is running. - - Returns: - ``True`` if the underlying SageMaker job is still running. ``False``, otherwise. - """ - with self._condition: - return self._state == _RUNNING - - def cancelled(self) -> bool: - """Check if the underlying SageMaker job was cancelled. - - Returns: - ``True`` if the underlying SageMaker job was cancelled. ``False``, otherwise. - """ - with self._condition: - return self._state == _CANCELLED - - def done(self) -> bool: - """Check if the underlying SageMaker job is finished. - - Returns: - ``True`` if the underlying SageMaker job finished running. ``False``, otherwise. - """ - with self._condition: - if self._state == _RUNNING and self._job.describe()["TrainingJobStatus"] in [ - "Completed", - "Failed", - ]: - self._state = _FINISHED - return True - - if self._state == _FINISHED: - return True - - return False - - -def get_future(job_name, sagemaker_session=None) -> Future: - """Get a future object with information about a job with the given job_name. - - Args: - job_name (str): name of the underlying SageMaker job created as a result of the remote - function run. - - sagemaker_session (sagemaker.core.helper.session.Session): A session object that manages interactions - with Amazon SageMaker APIs and any other AWS services needed. - - Returns: - A `sagemaker.remote_function.client.Future` instance. - """ - if not sagemaker_session: - sagemaker_session = Session() - describe_training_job_response = sagemaker_session.sagemaker_client.describe_training_job( - TrainingJobName=job_name - ) - return Future.from_describe_response(describe_training_job_response, sagemaker_session) - - -def list_futures(job_name_prefix, sagemaker_session=None): - """Generates Future objects with information about jobs with given job_name_prefix. - - Args: - job_name_prefix (str): A prefix used to identify the SageMaker jobs associated with remote - function run. - sagemaker_session (sagemaker.core.helper.session.Session): A session object that manages interactions - with Amazon SageMaker APIs and any other AWS services needed. - - Yields: - A `sagemaker.remote_function.client.Future` instance. - """ - if not sagemaker_session: - sagemaker_session = Session() - job_name = name_from_base(job_name_prefix) - # perform the following transformation because we might have trimmed the job_name_prefix while - # creating the job. - transformed_job_name_prefix = base_from_name(job_name) - next_token = None - list_training_job_kwargs = {"NameContains": transformed_job_name_prefix} - while True: - if next_token: - list_training_job_kwargs["NextToken"] = next_token - list_training_job_response = sagemaker_session.sagemaker_client.list_training_jobs( - **list_training_job_kwargs - ) - training_job_names = [ - job["TrainingJobName"] for job in list_training_job_response["TrainingJobSummaries"] - ] - for training_job_name in training_job_names: - describe_training_job_response = ( - sagemaker_session.sagemaker_client.describe_training_job( - TrainingJobName=training_job_name - ) - ) - yield Future.from_describe_response(describe_training_job_response, sagemaker_session) - if "NextToken" in list_training_job_response: - next_token = list_training_job_response["NextToken"] - else: - break diff --git a/sagemaker-core/src/sagemaker/core/remote_function/core/__init__.py b/sagemaker-core/src/sagemaker/core/remote_function/core/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sagemaker-core/src/sagemaker/core/remote_function/core/_custom_dispatch_table.py b/sagemaker-core/src/sagemaker/core/remote_function/core/_custom_dispatch_table.py deleted file mode 100644 index 3217e88672..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/core/_custom_dispatch_table.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""SageMaker remote function data serializer/deserializer.""" -from __future__ import absolute_import - -from sagemaker.core.remote_function.errors import SerializationError - -from sagemaker.core.helper.pipeline_variable import PipelineVariable -from sagemaker.core.workflow.parameters import ( - ParameterInteger, - ParameterFloat, - ParameterString, - ParameterBoolean, -) -from sagemaker.core.workflow.execution_variables import ExecutionVariable -from sagemaker.core.workflow.properties import ( - Properties, - PropertiesMap, - PropertiesList, -) - - -# Lazy import to avoid circular dependency -# DelayedReturn is in MLOps package which depends on Core -def _get_delayed_return_class(): - """Lazy import of DelayedReturn to avoid circular dependency.""" - try: - from sagemaker.mlops.workflow.function_step import DelayedReturn - - return DelayedReturn - except ImportError: - # If MLOps is not installed, return None - return None - - -def _pipeline_variable_reducer(pipeline_variable): - """Reducer for pipeline variable.""" - - raise SerializationError( - """Please pass the pipeline variable to the function decorated with @step as an argument. - Referencing to a pipeline variable from within the function - or passing a pipeline variable nested in a data structure are not supported.""" - ) - - -# Build dispatch table with lazy loading for DelayedReturn -dispatch_table = { - ParameterInteger: _pipeline_variable_reducer, - ParameterFloat: _pipeline_variable_reducer, - ParameterString: _pipeline_variable_reducer, - ParameterBoolean: _pipeline_variable_reducer, - ExecutionVariable: _pipeline_variable_reducer, - PipelineVariable: _pipeline_variable_reducer, - Properties: _pipeline_variable_reducer, - PropertiesMap: _pipeline_variable_reducer, - PropertiesList: _pipeline_variable_reducer, -} - -# Add DelayedReturn to dispatch table if MLOps is available -_delayed_return_class = _get_delayed_return_class() -if _delayed_return_class is not None: - dispatch_table[_delayed_return_class] = _pipeline_variable_reducer diff --git a/sagemaker-core/src/sagemaker/core/remote_function/core/pipeline_variables.py b/sagemaker-core/src/sagemaker/core/remote_function/core/pipeline_variables.py deleted file mode 100644 index 5278306063..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/core/pipeline_variables.py +++ /dev/null @@ -1,353 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""SageMaker remote function data serializer/deserializer.""" -from __future__ import absolute_import - -from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass, field -from typing import Any, Union, Dict, List, Tuple - -from sagemaker.core.s3 import s3_path_join -from sagemaker.core.remote_function.core.serialization import deserialize_obj_from_s3 -from sagemaker.core.workflow.step_outputs import get_step - - -@dataclass -class Context: - """Context for an execution.""" - - step_name: str = None - execution_id: str = None - property_references: Dict[str, str] = field(default_factory=dict) - serialize_output_to_json: bool = False - func_step_s3_dir: str = None - - -@dataclass -class _Parameter: - """Parameter to a function.""" - - name: str - - -class _ParameterInteger(_Parameter): - """Integer parameter to a function.""" - - ... - - -class _ParameterFloat(_Parameter): - """Float parameter to a function.""" - - ... - - -class _ParameterString(_Parameter): - """String parameter to a function.""" - - ... - - -class _ParameterBoolean(_Parameter): - """Boolean parameter to a function.""" - - ... - - -@dataclass -class _Properties: - """Properties of classic steps.""" - - path: str - - -@dataclass -class _ExecutionVariable: - """Execution variable.""" - - name: str - - -@dataclass -class _S3BaseUriIdentifier: - """Identifies that the class refers to function step s3 base uri. - - The s3_base_uri = s3_root_uri + pipeline_name. - This identifier is resolved in function step runtime by SDK. - """ - - NAME = "S3_BASE_URI" - - -@dataclass -class _DelayedReturn: - """Delayed return from a function.""" - - uri: Union[_Properties, List[Union[str, _Parameter, _ExecutionVariable]]] - reference_path: Tuple = field(default_factory=tuple) - - -class _ExecutionVariableResolver: - """Resolve execution variables.""" - - def __init__(self, context: Context): - """Resolve execution variables.""" - self._context = context - - def resolve(self, execution_variable: _ExecutionVariable): - """Resolve a single execution variable. - - Args: - execution_variable: execution variable to resolve. - Returns: - resolved value - """ - return self._context.property_references[f"Execution.{execution_variable.name}"] - - -class _ParameterResolver: - """Resolve parameters.""" - - def __init__(self, context: Context): - """Resolve parameters.""" - self._context = context - - def resolve(self, parameter: _Parameter): - """Resolve a single property reference. - - Args: - parameter: parameter to resolve. - Returns: - resolved value - """ - if isinstance(parameter, _ParameterInteger): - return int(self._context.property_references[f"Parameters.{parameter.name}"]) - if isinstance(parameter, _ParameterFloat): - return float(self._context.property_references[f"Parameters.{parameter.name}"]) - if isinstance(parameter, _ParameterString): - return self._context.property_references[f"Parameters.{parameter.name}"] - - return self._context.property_references[f"Parameters.{parameter.name}"] == "true" - - -class _PropertiesResolver: - """Resolve classic step properties.""" - - def __init__(self, context: Context): - """Resolve classic step properties.""" - self._context = context - - def resolve(self, properties: _Properties): - """Resolve classic step properties. - - Args: - properties: classic step properties. - Returns: - resolved value - """ - return self._context.property_references[properties.path] - - -class _DelayedReturnResolver: - """Resolve delayed returns.""" - - def __init__( - self, - delayed_returns: List[_DelayedReturn], - hmac_key: str, - properties_resolver: _PropertiesResolver, - parameter_resolver: _ParameterResolver, - execution_variable_resolver: _ExecutionVariableResolver, - s3_base_uri: str, - **settings, - ): - """Resolve delayed return. - - Args: - delayed_returns: list of delayed returns to resolve. - hmac_key: key used to encrypt serialized and deserialized function and arguments. - properties_resolver: resolver used to resolve step properties. - parameter_resolver: resolver used to pipeline parameters. - execution_variable_resolver: resolver used to resolve execution variables. - s3_base_uri (str): the s3 base uri of the function step that - the serialized artifacts will be uploaded to. - The s3_base_uri = s3_root_uri + pipeline_name. - **settings: settings to pass to the deserialization function. - """ - self._s3_base_uri = s3_base_uri - self._parameter_resolver = parameter_resolver - self._execution_variable_resolver = execution_variable_resolver - self._properties_resolver = properties_resolver - # different delayed returns can have the same uri, so we need to dedupe - uris = { - self._resolve_delayed_return_uri(delayed_return) for delayed_return in delayed_returns - } - - def deserialization_task(uri): - return uri, deserialize_obj_from_s3( - sagemaker_session=settings["sagemaker_session"], - s3_uri=uri, - hmac_key=hmac_key, - ) - - with ThreadPoolExecutor() as executor: - self._deserialized_objects = dict(executor.map(deserialization_task, uris)) - - def resolve(self, delayed_return: _DelayedReturn) -> Any: - """Resolve a single delayed return. - - Args: - delayed_return: delayed return to resolve. - Returns: - resolved delayed return. - """ - deserialized_obj = self._deserialized_objects[ - self._resolve_delayed_return_uri(delayed_return) - ] - return _retrieve_child_item(delayed_return, deserialized_obj) - - def _resolve_delayed_return_uri(self, delayed_return: _DelayedReturn): - """Resolve the s3 uri of the delayed return.""" - if isinstance(delayed_return.uri, _Properties): - return self._properties_resolver.resolve(delayed_return.uri) - - # Keep the following old resolution logics to keep backward compatible - uri = [] - for component in delayed_return.uri: - if isinstance(component, _Parameter): - uri.append(self._parameter_resolver.resolve(component)) - elif isinstance(component, _ExecutionVariable): - uri.append(self._execution_variable_resolver.resolve(component)) - elif isinstance(component, _S3BaseUriIdentifier): - uri.append(self._s3_base_uri) - else: - uri.append(component) - return s3_path_join(*uri) - - -def _retrieve_child_item(delayed_return: _DelayedReturn, deserialized_obj: Any): - """Retrieve child item from deserialized object.""" - result = deserialized_obj - for component in delayed_return.reference_path: - result = result[component[1]] - return result - - -def resolve_pipeline_variables( - context: Context, - func_args: Tuple, - func_kwargs: Dict, - hmac_key: str, - s3_base_uri: str, - **settings, -): - """Resolve pipeline variables. - - Args: - context: context for the execution. - func_args: function args. - func_kwargs: function kwargs. - hmac_key: key used to encrypt serialized and deserialized function and arguments. - s3_base_uri: the s3 base uri of the function step that the serialized artifacts - will be uploaded to. The s3_base_uri = s3_root_uri + pipeline_name. - **settings: settings to pass to the deserialization function. - """ - - delayed_returns = [] - - if func_args is not None: - for arg in func_args: - if isinstance(arg, _DelayedReturn): - delayed_returns.append(arg) - if func_kwargs is not None: - for arg in func_kwargs.values(): - if isinstance(arg, _DelayedReturn): - delayed_returns.append(arg) - - # build the resolvers - parameter_resolver = _ParameterResolver(context) - execution_variable_resolver = _ExecutionVariableResolver(context) - properties_resolver = _PropertiesResolver(context) - delayed_return_resolver = _DelayedReturnResolver( - delayed_returns=delayed_returns, - hmac_key=hmac_key, - properties_resolver=properties_resolver, - parameter_resolver=parameter_resolver, - execution_variable_resolver=execution_variable_resolver, - s3_base_uri=s3_base_uri, - **settings, - ) - - # resolve the pipeline variables - resolved_func_args = None - if func_args is not None: - resolved_func_args = [] - for arg in func_args: - if isinstance(arg, _Parameter): - resolved_func_args.append(parameter_resolver.resolve(arg)) - elif isinstance(arg, _ExecutionVariable): - resolved_func_args.append(execution_variable_resolver.resolve(arg)) - elif isinstance(arg, _Properties): - resolved_func_args.append(properties_resolver.resolve(arg)) - elif isinstance(arg, _DelayedReturn): - resolved_func_args.append(delayed_return_resolver.resolve(arg)) - else: - resolved_func_args.append(arg) - resolved_func_args = tuple(resolved_func_args) - - resolved_func_kwargs = None - if func_kwargs is not None: - resolved_func_kwargs = {} - for key, value in func_kwargs.items(): - if isinstance(value, _Parameter): - resolved_func_kwargs[key] = parameter_resolver.resolve(value) - elif isinstance(value, _ExecutionVariable): - resolved_func_kwargs[key] = execution_variable_resolver.resolve(value) - elif isinstance(value, _Properties): - resolved_func_kwargs[key] = properties_resolver.resolve(value) - elif isinstance(value, _DelayedReturn): - resolved_func_kwargs[key] = delayed_return_resolver.resolve(value) - else: - resolved_func_kwargs[key] = value - - return resolved_func_args, resolved_func_kwargs - - -def convert_pipeline_variables_to_pickleable(func_args: Tuple, func_kwargs: Dict): - """Convert pipeline variables to pickleable. - - Args: - func_args: function args. - func_kwargs: function kwargs. - """ - - from sagemaker.core.helper.pipeline_variable import PipelineVariable - - from sagemaker.mlops.workflow.function_step import DelayedReturn - - def convert(arg): - if isinstance(arg, DelayedReturn): - return _DelayedReturn( - uri=get_step(arg)._properties.OutputDataConfig.S3OutputPath._pickleable, - reference_path=arg._reference_path, - ) - - if isinstance(arg, PipelineVariable): - return arg._pickleable - - return arg - - converted_func_args = tuple(convert(arg) for arg in func_args) - converted_func_kwargs = {key: convert(arg) for key, arg in func_kwargs.items()} - - return converted_func_args, converted_func_kwargs diff --git a/sagemaker-core/src/sagemaker/core/remote_function/core/serialization.py b/sagemaker-core/src/sagemaker/core/remote_function/core/serialization.py deleted file mode 100644 index 39517bdc6b..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/core/serialization.py +++ /dev/null @@ -1,422 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""SageMaker remote function data serializer/deserializer.""" -from __future__ import absolute_import - -import dataclasses -import json - -import io - -import sys -import hmac -import hashlib -import pickle - -from typing import Any, Callable, Union - -import cloudpickle -from tblib import pickling_support - -from sagemaker.core.remote_function.errors import ( - ServiceError, - SerializationError, - DeserializationError, -) -from sagemaker.core.s3 import S3Downloader, S3Uploader -from sagemaker.core.helper.session_helper import Session -from ._custom_dispatch_table import dispatch_table - -# Note: do not use os.path.join for s3 uris, fails on windows - - -def _get_python_version(): - """Returns the current python version.""" - return f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" - - -@dataclasses.dataclass -class _MetaData: - """Metadata about the serialized data or functions.""" - - sha256_hash: str - version: str = "2023-04-24" - python_version: str = _get_python_version() - serialization_module: str = "cloudpickle" - - def to_json(self): - """Converts metadata to json string.""" - return json.dumps(dataclasses.asdict(self)).encode() - - @staticmethod - def from_json(s): - """Converts json string to metadata object.""" - try: - obj = json.loads(s) - except json.decoder.JSONDecodeError: - raise DeserializationError("Corrupt metadata file. It is not a valid json file.") - - sha256_hash = obj.get("sha256_hash") - metadata = _MetaData(sha256_hash=sha256_hash) - metadata.version = obj.get("version") - metadata.python_version = obj.get("python_version") - metadata.serialization_module = obj.get("serialization_module") - - if not sha256_hash: - raise DeserializationError( - "Corrupt metadata file. SHA256 hash for the serialized data does not exist. " - "Please make sure to install SageMaker SDK version >= 2.156.0 on the client side " - "and try again." - ) - - if not ( - metadata.version == "2023-04-24" and metadata.serialization_module == "cloudpickle" - ): - raise DeserializationError( - f"Corrupt metadata file. Serialization approach {s} is not supported." - ) - - return metadata - - -class CloudpickleSerializer: - """Serializer using cloudpickle.""" - - @staticmethod - def serialize(obj: Any) -> bytes: - """Serializes data object and uploads it to S3. - - Args: - obj: object to be serialized and persisted - Raises: - SerializationError: when fail to serialize object to bytes. - """ - try: - io_buffer = io.BytesIO() - custom_pickler = cloudpickle.CloudPickler(io_buffer) - dt = pickle.Pickler.dispatch_table.__get__(custom_pickler) # pylint: disable=no-member - new_dt = dt.new_child(dispatch_table) - pickle.Pickler.dispatch_table.__set__( # pylint: disable=no-member - custom_pickler, new_dt - ) - custom_pickler.dump(obj) - return io_buffer.getvalue() - except Exception as e: - if isinstance( - e, NotImplementedError - ) and "Instance of Run type is not allowed to be pickled." in str(e): - raise SerializationError( - """You are trying to pass a sagemaker.experiments.run.Run object to - a remote function - or are trying to access a global sagemaker.experiments.run.Run object - from within the function. This is not supported. - You must use `load_run` to load an existing Run in the remote function - or instantiate a new Run in the function.""" - ) - - raise SerializationError( - "Error when serializing object of type [{}]: {}".format(type(obj).__name__, repr(e)) - ) from e - - @staticmethod - def deserialize(s3_uri: str, bytes_to_deserialize: bytes) -> Any: - """Downloads from S3 and then deserializes data objects. - - Args: - s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. - bytes_to_deserialize: bytes to be deserialized. - Returns : - List of deserialized python objects. - Raises: - DeserializationError: when fail to serialize object to bytes. - """ - - try: - return cloudpickle.loads(bytes_to_deserialize) - except Exception as e: - raise DeserializationError( - "Error when deserializing bytes downloaded from {}: {}. " - "NOTE: this may be caused by inconsistent sagemaker python sdk versions " - "where remote function runs versus the one used on client side. " - "If the sagemaker versions do not match, a warning message would " - "be logged starting with 'Inconsistent sagemaker versions found'. " - "Please check it to validate.".format(s3_uri, repr(e)) - ) from e - - -# TODO: use dask serializer in case dask distributed is installed in users' environment. -def serialize_func_to_s3( - func: Callable, sagemaker_session: Session, s3_uri: str, hmac_key: str, s3_kms_key: str = None -): - """Serializes function and uploads it to S3. - - Args: - sagemaker_session (sagemaker.core.helper.session.Session): - The underlying Boto3 session which AWS service calls are delegated to. - s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. - hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized func. - s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. - func: function to be serialized and persisted - Raises: - SerializationError: when fail to serialize function to bytes. - """ - - _upload_payload_and_metadata_to_s3( - bytes_to_upload=CloudpickleSerializer.serialize(func), - hmac_key=hmac_key, - s3_uri=s3_uri, - sagemaker_session=sagemaker_session, - s3_kms_key=s3_kms_key, - ) - - -def deserialize_func_from_s3(sagemaker_session: Session, s3_uri: str, hmac_key: str) -> Callable: - """Downloads from S3 and then deserializes data objects. - - This method downloads the serialized training job outputs to a temporary directory and - then deserializes them using dask. - - Args: - sagemaker_session (sagemaker.core.helper.session.Session): - The underlying sagemaker session which AWS service calls are delegated to. - s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. - hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized func. - Returns : - The deserialized function. - Raises: - DeserializationError: when fail to serialize function to bytes. - """ - metadata = _MetaData.from_json( - _read_bytes_from_s3(f"{s3_uri}/metadata.json", sagemaker_session) - ) - - bytes_to_deserialize = _read_bytes_from_s3(f"{s3_uri}/payload.pkl", sagemaker_session) - - _perform_integrity_check( - expected_hash_value=metadata.sha256_hash, secret_key=hmac_key, buffer=bytes_to_deserialize - ) - - return CloudpickleSerializer.deserialize(f"{s3_uri}/payload.pkl", bytes_to_deserialize) - - -def serialize_obj_to_s3( - obj: Any, sagemaker_session: Session, s3_uri: str, hmac_key: str, s3_kms_key: str = None -): - """Serializes data object and uploads it to S3. - - Args: - sagemaker_session (sagemaker.core.helper.session.Session): - The underlying Boto3 session which AWS service calls are delegated to. - s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. - s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. - hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized obj. - obj: object to be serialized and persisted - Raises: - SerializationError: when fail to serialize object to bytes. - """ - - _upload_payload_and_metadata_to_s3( - bytes_to_upload=CloudpickleSerializer.serialize(obj), - hmac_key=hmac_key, - s3_uri=s3_uri, - sagemaker_session=sagemaker_session, - s3_kms_key=s3_kms_key, - ) - - -def json_serialize_obj_to_s3( - obj: Any, - json_key: str, - sagemaker_session: Session, - s3_uri: str, - s3_kms_key: str = None, -): - """Json serializes data object and uploads it to S3. - - If a function step's output is data referenced by other steps via JsonGet, - its output should be json serialized and uploaded to S3. - - Args: - obj: (Any) object to be serialized and persisted. - json_key: (str) the json key pointing to function step output. - sagemaker_session (sagemaker.core.helper.session.Session): - The underlying Boto3 session which AWS service calls are delegated to. - s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. - s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. - """ - json_serialized_result = {} - try: - to_dump = {json_key: obj, "Exception": None} - json_serialized_result = json.dumps(to_dump) - except TypeError as e: - if "is not JSON serializable" in str(e): - to_dump = { - json_key: None, - "Exception": f"The function return ({obj}) is not JSON serializable.", - } - json_serialized_result = json.dumps(to_dump) - - S3Uploader.upload_string_as_file_body( - body=json_serialized_result, - desired_s3_uri=s3_uri, - sagemaker_session=sagemaker_session, - kms_key=s3_kms_key, - ) - - -def deserialize_obj_from_s3(sagemaker_session: Session, s3_uri: str, hmac_key: str) -> Any: - """Downloads from S3 and then deserializes data objects. - - Args: - sagemaker_session (sagemaker.core.helper.session.Session): - The underlying sagemaker session which AWS service calls are delegated to. - s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. - hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized obj. - Returns : - Deserialized python objects. - Raises: - DeserializationError: when fail to serialize object to bytes. - """ - - metadata = _MetaData.from_json( - _read_bytes_from_s3(f"{s3_uri}/metadata.json", sagemaker_session) - ) - - bytes_to_deserialize = _read_bytes_from_s3(f"{s3_uri}/payload.pkl", sagemaker_session) - - _perform_integrity_check( - expected_hash_value=metadata.sha256_hash, secret_key=hmac_key, buffer=bytes_to_deserialize - ) - - return CloudpickleSerializer.deserialize(f"{s3_uri}/payload.pkl", bytes_to_deserialize) - - -def serialize_exception_to_s3( - exc: Exception, sagemaker_session: Session, s3_uri: str, hmac_key: str, s3_kms_key: str = None -): - """Serializes exception with traceback and uploads it to S3. - - Args: - sagemaker_session (sagemaker.core.helper.session.Session): - The underlying Boto3 session which AWS service calls are delegated to. - s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. - s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. - hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized exception. - exc: Exception to be serialized and persisted - Raises: - SerializationError: when fail to serialize object to bytes. - """ - pickling_support.install() - - _upload_payload_and_metadata_to_s3( - bytes_to_upload=CloudpickleSerializer.serialize(exc), - hmac_key=hmac_key, - s3_uri=s3_uri, - sagemaker_session=sagemaker_session, - s3_kms_key=s3_kms_key, - ) - - -def _upload_payload_and_metadata_to_s3( - bytes_to_upload: Union[bytes, io.BytesIO], - hmac_key: str, - s3_uri: str, - sagemaker_session: Session, - s3_kms_key, -): - """Uploads serialized payload and metadata to s3. - - Args: - bytes_to_upload (bytes): Serialized bytes to upload. - hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized obj. - s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. - sagemaker_session (sagemaker.core.helper.session.Session): - The underlying Boto3 session which AWS service calls are delegated to. - s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. - """ - _upload_bytes_to_s3(bytes_to_upload, f"{s3_uri}/payload.pkl", s3_kms_key, sagemaker_session) - - sha256_hash = _compute_hash(bytes_to_upload, secret_key=hmac_key) - - _upload_bytes_to_s3( - _MetaData(sha256_hash).to_json(), - f"{s3_uri}/metadata.json", - s3_kms_key, - sagemaker_session, - ) - - -def deserialize_exception_from_s3(sagemaker_session: Session, s3_uri: str, hmac_key: str) -> Any: - """Downloads from S3 and then deserializes exception. - - Args: - sagemaker_session (sagemaker.core.helper.session.Session): - The underlying sagemaker session which AWS service calls are delegated to. - s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. - hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized exception. - Returns : - Deserialized exception with traceback. - Raises: - DeserializationError: when fail to serialize object to bytes. - """ - - metadata = _MetaData.from_json( - _read_bytes_from_s3(f"{s3_uri}/metadata.json", sagemaker_session) - ) - - bytes_to_deserialize = _read_bytes_from_s3(f"{s3_uri}/payload.pkl", sagemaker_session) - - _perform_integrity_check( - expected_hash_value=metadata.sha256_hash, secret_key=hmac_key, buffer=bytes_to_deserialize - ) - - return CloudpickleSerializer.deserialize(f"{s3_uri}/payload.pkl", bytes_to_deserialize) - - -def _upload_bytes_to_s3(b: Union[bytes, io.BytesIO], s3_uri, s3_kms_key, sagemaker_session): - """Wrapping s3 uploading with exception translation for remote function.""" - try: - S3Uploader.upload_bytes(b, s3_uri, kms_key=s3_kms_key, sagemaker_session=sagemaker_session) - except Exception as e: - raise ServiceError( - "Failed to upload serialized bytes to {}: {}".format(s3_uri, repr(e)) - ) from e - - -def _read_bytes_from_s3(s3_uri, sagemaker_session): - """Wrapping s3 downloading with exception translation for remote function.""" - try: - return S3Downloader.read_bytes(s3_uri, sagemaker_session=sagemaker_session) - except Exception as e: - raise ServiceError( - "Failed to read serialized bytes from {}: {}".format(s3_uri, repr(e)) - ) from e - - -def _compute_hash(buffer: bytes, secret_key: str) -> str: - """Compute the hmac-sha256 hash""" - return hmac.new(secret_key.encode(), msg=buffer, digestmod=hashlib.sha256).hexdigest() - - -def _perform_integrity_check(expected_hash_value: str, secret_key: str, buffer: bytes): - """Performs integrity checks for serialized code/arguments uploaded to s3. - - Verifies whether the hash read from s3 matches the hash calculated - during remote function execution. - """ - actual_hash_value = _compute_hash(buffer=buffer, secret_key=secret_key) - if not hmac.compare_digest(expected_hash_value, actual_hash_value): - raise DeserializationError( - "Integrity check for the serialized function or data failed. " - "Please restrict access to your S3 bucket" - ) diff --git a/sagemaker-core/src/sagemaker/core/remote_function/core/stored_function.py b/sagemaker-core/src/sagemaker/core/remote_function/core/stored_function.py deleted file mode 100644 index 48724d8e36..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/core/stored_function.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""SageMaker job function serializer/deserializer.""" -from __future__ import absolute_import - -import os -from dataclasses import dataclass -from typing import Any - - -from sagemaker.core.s3 import s3_path_join -from sagemaker.core.remote_function import logging_config -from sagemaker.core.remote_function.core.pipeline_variables import ( - Context, - resolve_pipeline_variables, -) - -import sagemaker.core.remote_function.core.serialization as serialization -from sagemaker.core.helper.session_helper import Session - - -logger = logging_config.get_logger() - - -FUNCTION_FOLDER = "function" -ARGUMENTS_FOLDER = "arguments" -RESULTS_FOLDER = "results" -EXCEPTION_FOLDER = "exception" -JSON_SERIALIZED_RESULT_KEY = "Result" -JSON_RESULTS_FILE = "results.json" - - -@dataclass -class _SerializedData: - """Data class to store serialized function and arguments""" - - func: bytes - args: bytes - - -class StoredFunction: - """Class representing a remote function stored in S3.""" - - def __init__( - self, - sagemaker_session: Session, - s3_base_uri: str, - hmac_key: str, - s3_kms_key: str = None, - context: Context = Context(), - ): - """Construct a StoredFunction object. - - Args: - sagemaker_session: (sagemaker.session.Session): The underlying sagemaker session which - AWS service calls are delegated to. - s3_base_uri: the base uri to which serialized artifacts will be uploaded. - s3_kms_key: KMS key used to encrypt artifacts uploaded to S3. - hmac_key: Key used to encrypt serialized and deserialized function and arguments. - context: Build or run context of a pipeline step. - """ - self.sagemaker_session = sagemaker_session - self.s3_base_uri = s3_base_uri - self.s3_kms_key = s3_kms_key - self.hmac_key = hmac_key - self.context = context - - # For pipeline steps, function code is at: base/step_name/build_timestamp/ - # For results, path is: base/step_name/build_timestamp/execution_id/ - # This ensures uniqueness: build_timestamp per build, execution_id per run - if context.step_name and context.func_step_s3_dir: - # Pipeline step: include build timestamp in both paths - self.func_upload_path = s3_path_join( - s3_base_uri, context.step_name, context.func_step_s3_dir - ) - self.results_upload_path = s3_path_join( - s3_base_uri, context.step_name, context.func_step_s3_dir, context.execution_id - ) - else: - # Regular remote function: original behavior - self.func_upload_path = s3_path_join( - s3_base_uri, context.step_name, context.func_step_s3_dir - ) - self.results_upload_path = s3_path_join( - s3_base_uri, context.execution_id, context.step_name - ) - - def save(self, func, *args, **kwargs): - """Serialize and persist the function and arguments. - - Args: - func: the python function. - args: the positional arguments to func. - kwargs: the keyword arguments to func. - Returns: - None - """ - - logger.info( - "Serializing function code to %s", s3_path_join(self.func_upload_path, FUNCTION_FOLDER) - ) - serialization.serialize_func_to_s3( - func=func, - sagemaker_session=self.sagemaker_session, - s3_uri=s3_path_join(self.func_upload_path, FUNCTION_FOLDER), - s3_kms_key=self.s3_kms_key, - hmac_key=self.hmac_key, - ) - - logger.info( - "Serializing function arguments to %s", - s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), - ) - - serialization.serialize_obj_to_s3( - obj=(args, kwargs), - sagemaker_session=self.sagemaker_session, - s3_uri=s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), - hmac_key=self.hmac_key, - s3_kms_key=self.s3_kms_key, - ) - - def save_pipeline_step_function(self, serialized_data): - """Upload serialized function and arguments to s3. - - Args: - serialized_data (_SerializedData): The serialized function - and function arguments of a function step. - """ - - logger.info( - "Uploading serialized function code to %s", - s3_path_join(self.func_upload_path, FUNCTION_FOLDER), - ) - serialization._upload_payload_and_metadata_to_s3( - bytes_to_upload=serialized_data.func, - hmac_key=self.hmac_key, - s3_uri=s3_path_join(self.func_upload_path, FUNCTION_FOLDER), - sagemaker_session=self.sagemaker_session, - s3_kms_key=self.s3_kms_key, - ) - - logger.info( - "Uploading serialized function arguments to %s", - s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), - ) - serialization._upload_payload_and_metadata_to_s3( - bytes_to_upload=serialized_data.args, - hmac_key=self.hmac_key, - s3_uri=s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), - sagemaker_session=self.sagemaker_session, - s3_kms_key=self.s3_kms_key, - ) - - def load_and_invoke(self) -> Any: - """Load and deserialize the function and the arguments and then execute it.""" - - logger.info( - "Deserializing function code from %s", - s3_path_join(self.func_upload_path, FUNCTION_FOLDER), - ) - func = serialization.deserialize_func_from_s3( - sagemaker_session=self.sagemaker_session, - s3_uri=s3_path_join(self.func_upload_path, FUNCTION_FOLDER), - hmac_key=self.hmac_key, - ) - - logger.info( - "Deserializing function arguments from %s", - s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), - ) - args, kwargs = serialization.deserialize_obj_from_s3( - sagemaker_session=self.sagemaker_session, - s3_uri=s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), - hmac_key=self.hmac_key, - ) - - logger.info("Resolving pipeline variables") - resolved_args, resolved_kwargs = resolve_pipeline_variables( - self.context, - args, - kwargs, - hmac_key=self.hmac_key, - s3_base_uri=self.s3_base_uri, - sagemaker_session=self.sagemaker_session, - ) - - logger.info("Invoking the function") - result = func(*resolved_args, **resolved_kwargs) - - logger.info( - "Serializing the function return and uploading to %s", - s3_path_join(self.results_upload_path, RESULTS_FOLDER), - ) - serialization.serialize_obj_to_s3( - obj=result, - sagemaker_session=self.sagemaker_session, - s3_uri=s3_path_join(self.results_upload_path, RESULTS_FOLDER), - hmac_key=self.hmac_key, - s3_kms_key=self.s3_kms_key, - ) - - if self.context and self.context.serialize_output_to_json: - logger.info( - "JSON Serializing the function return and uploading to %s", - s3_path_join(self.results_upload_path, RESULTS_FOLDER), - ) - serialization.json_serialize_obj_to_s3( - obj=result, - json_key=JSON_SERIALIZED_RESULT_KEY, - sagemaker_session=self.sagemaker_session, - s3_uri=s3_path_join( - os.path.join(self.results_upload_path, RESULTS_FOLDER, JSON_RESULTS_FILE) - ), - s3_kms_key=self.s3_kms_key, - ) diff --git a/sagemaker-core/src/sagemaker/core/remote_function/custom_file_filter.py b/sagemaker-core/src/sagemaker/core/remote_function/custom_file_filter.py deleted file mode 100644 index c82cc7eee7..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/custom_file_filter.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""SageMaker remote function client.""" -from __future__ import absolute_import - -import fnmatch -import os -import shutil -from typing import List, Optional, Callable, Union - -from sagemaker.core.common_utils import resolve_value_from_config -from sagemaker.core.config.config_schema import REMOTE_FUNCTION_PATH, CUSTOM_FILE_FILTER - - -class CustomFileFilter: - """Configuration that specifies how the local working directory should be packaged.""" - - def __init__(self, *, ignore_name_patterns: List[str] = None): - """Initialize a CustomFileFilter. - - Args: - ignore_name_patterns (List[str]): ignore files or directories with names - that match one of the glob-style patterns. Defaults to None. - """ - - if ignore_name_patterns is None: - ignore_name_patterns = [] - - self._workdir = os.getcwd() - self._ignore_name_patterns = ignore_name_patterns - - @property - def ignore_name_patterns(self): - """Get the ignore name patterns.""" - return self._ignore_name_patterns - - @property - def workdir(self): - """Get the working directory.""" - return self._workdir - - -def resolve_custom_file_filter_from_config_file( - direct_input: Union[Callable[[str, List], List], CustomFileFilter] = None, - sagemaker_session=None, -) -> Union[Callable[[str, List], List], CustomFileFilter, None]: - """Resolve the CustomFileFilter configuration from the config file. - - Args: - direct_input (Callable[[str, List], List], CustomFileFilter): direct input from the user. - sagemaker_session (sagemaker.core.helper.session.Session): sagemaker session. - Returns: - CustomFileFilter: configuration that specifies how the local - working directory should be packaged. - """ - if direct_input is not None: - return direct_input - ignore_name_patterns = resolve_value_from_config( - direct_input=None, - config_path=".".join([REMOTE_FUNCTION_PATH, CUSTOM_FILE_FILTER, "IgnoreNamePatterns"]), - default_value=None, - sagemaker_session=sagemaker_session, - ) - if ignore_name_patterns is not None: - return CustomFileFilter(ignore_name_patterns=ignore_name_patterns) - return None - - -def copy_workdir( - dst: str, - custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None, -): - """Copy the local working directory to the destination. - - Args: - dst (str): destination path. - custom_file_filter (Union[Callable[[str, List], List], CustomFileFilter): configuration that - specifies how the local working directory should be packaged. - """ - - def _ignore_patterns(path: str, names: List): # pylint: disable=unused-argument - ignored_names = set() - if custom_file_filter.ignore_name_patterns is not None: - for pattern in custom_file_filter.ignore_name_patterns: - ignored_names.update(fnmatch.filter(names, pattern)) - return ignored_names - - def _filter_non_python_files(path: str, names: List) -> List: - """Ignore function for filtering out non python files.""" - to_ignore = [] - for name in names: - full_path = os.path.join(path, name) - if os.path.isfile(full_path): - if not name.endswith(".py"): - to_ignore.append(name) - elif os.path.isdir(full_path): - if name == "__pycache__": - to_ignore.append(name) - else: - to_ignore.append(name) - - return to_ignore - - _ignore = None - _src = os.getcwd() - if not custom_file_filter: - _ignore = _filter_non_python_files - elif callable(custom_file_filter): - _ignore = custom_file_filter - elif isinstance(custom_file_filter, CustomFileFilter): - _ignore = _ignore_patterns - _src = custom_file_filter.workdir - - shutil.copytree( - _src, - dst, - ignore=_ignore, - ) diff --git a/sagemaker-core/src/sagemaker/core/remote_function/errors.py b/sagemaker-core/src/sagemaker/core/remote_function/errors.py deleted file mode 100644 index d12fde52d6..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/errors.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Definitions for reomote job errors and error handling""" -from __future__ import absolute_import - -import os - -from tblib import pickling_support -from sagemaker.core.s3 import s3_path_join -import sagemaker.core.remote_function.core.serialization as serialization - - -DEFAULT_FAILURE_CODE = 1 -FAILURE_REASON_PATH = "/opt/ml/output/failure" - - -@pickling_support.install -class RemoteFunctionError(Exception): - """The base exception class for remote function exceptions""" - - def __init__(self, message): - self.message = message - super().__init__(self.message) - - -@pickling_support.install -class ServiceError(RemoteFunctionError): - """Raised when errors encountered during interaction with SageMaker, S3 service APIs""" - - -@pickling_support.install -class SerializationError(RemoteFunctionError): - """Raised when errors encountered during serialization of remote function objects""" - - -@pickling_support.install -class DeserializationError(RemoteFunctionError): - """Raised when errors encountered during deserialization of remote function objects""" - - -def _get_valid_failure_exit_code(exit_code) -> int: - """Normalize exit code for terminating the process""" - try: - valid_exit_code = int(exit_code) - except (TypeError, ValueError): - valid_exit_code = DEFAULT_FAILURE_CODE - - return valid_exit_code - - -def _write_failure_reason_file(failure_msg): - """Create a file 'failure' with failure reason written if remote function execution failed. - - See: https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-training-algo.html - Args: - failure_msg: The content of file to be written. - """ - if not os.path.exists(FAILURE_REASON_PATH): - with open(FAILURE_REASON_PATH, "w") as f: - f.write(failure_msg) - - -def handle_error(error, sagemaker_session, s3_base_uri, s3_kms_key, hmac_key) -> int: - """Handle all exceptions raised during remote function execution. - - Args: - error (Exception): The error to be handled. - sagemaker_session (sagemaker.core.helper.session.Session): The underlying Boto3 session which - AWS service calls are delegated to. - s3_base_uri (str): S3 root uri to which resulting serialized exception will be uploaded. - s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. - hmac_key (str): Key used to calculate hmac hash of the serialized exception. - Returns : - exit_code (int): Exit code to terminate current job. - """ - - failure_reason = repr(error) - if isinstance(error, RemoteFunctionError): - exit_code = DEFAULT_FAILURE_CODE - else: - error_number = getattr(error, "errno", DEFAULT_FAILURE_CODE) - exit_code = _get_valid_failure_exit_code(error_number) - - _write_failure_reason_file(failure_reason) - - serialization.serialize_exception_to_s3( - exc=error, - sagemaker_session=sagemaker_session, - s3_uri=s3_path_join(s3_base_uri, "exception"), - hmac_key=hmac_key, - s3_kms_key=s3_kms_key, - ) - - return exit_code diff --git a/sagemaker-core/src/sagemaker/core/remote_function/invoke_function.py b/sagemaker-core/src/sagemaker/core/remote_function/invoke_function.py deleted file mode 100644 index d353232b57..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/invoke_function.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""An entry point for invoking remote function inside a job.""" - -from __future__ import absolute_import - -import argparse -import sys -import json -import os -from typing import TYPE_CHECKING - -import boto3 -from sagemaker.core.remote_function.job import ( - KEY_EXPERIMENT_NAME, - KEY_RUN_NAME, -) - -from sagemaker.core.helper.session_helper import Session -from sagemaker.core.s3 import s3_path_join -from sagemaker.core.remote_function.errors import handle_error -from sagemaker.core.remote_function import logging_config -from sagemaker.core.remote_function.core.pipeline_variables import Context - -if TYPE_CHECKING: - from sagemaker.core.experiments.run import Run - - -SUCCESS_EXIT_CODE = 0 - - -def _parse_args(args): - """Parses CLI arguments.""" - parser = argparse.ArgumentParser() - parser.add_argument("--region", type=str, required=True) - parser.add_argument("--s3_base_uri", type=str, required=True) - parser.add_argument("--s3_kms_key", type=str) - parser.add_argument("--run_in_context", type=str) - parser.add_argument("--pipeline_step_name", type=str) - parser.add_argument("--pipeline_execution_id", type=str) - parser.add_argument("--property_references", nargs="+", type=str, default=[]) - parser.add_argument( - "--serialize_output_to_json", default=False, type=lambda x: (str(x).lower() == "true") - ) - parser.add_argument("--func_step_s3_dir", type=str) - - args, _ = parser.parse_known_args(args) - return args - - -def _get_sagemaker_session(region): - """Get sagemaker session for interacting with AWS or Sagemaker services""" - boto_session = boto3.session.Session(region_name=region) - return Session(boto_session=boto_session) - - -def _load_run_object(run_in_context: str, sagemaker_session: Session) -> "Run": - """Load current run in json string into run object""" - from sagemaker.core.experiments.run import Run - - run_dict = json.loads(run_in_context) - return Run( - experiment_name=run_dict.get(KEY_EXPERIMENT_NAME), - run_name=run_dict.get(KEY_RUN_NAME), - sagemaker_session=sagemaker_session, - ) - - -def _load_pipeline_context(args) -> Context: - """Load pipeline build or run context into context object""" - - pipeline_step_name = args.pipeline_step_name - pipeline_execution_id = args.pipeline_execution_id - property_references = args.property_references - serialize_output_to_json = args.serialize_output_to_json - func_step_s3_dir = args.func_step_s3_dir - - property_references_dict = {} - for i in range(0, len(property_references), 2): - property_references_dict[property_references[i]] = property_references[i + 1] - return Context( - step_name=pipeline_step_name, - execution_id=pipeline_execution_id, - property_references=property_references_dict, - serialize_output_to_json=serialize_output_to_json, - func_step_s3_dir=func_step_s3_dir, - ) - - -def _execute_remote_function( - sagemaker_session, s3_base_uri, s3_kms_key, run_in_context, hmac_key, context -): - """Execute stored remote function""" - from sagemaker.core.remote_function.core.stored_function import StoredFunction - - stored_function = StoredFunction( - sagemaker_session=sagemaker_session, - s3_base_uri=s3_base_uri, - s3_kms_key=s3_kms_key, - hmac_key=hmac_key, - context=context, - ) - - if run_in_context: - run_obj = _load_run_object(run_in_context, sagemaker_session) - with run_obj: - stored_function.load_and_invoke() - else: - stored_function.load_and_invoke() - - -def main(sys_args=None): - """Entry point for invoke function script - - Args: - sys_args (list): List of arguments to parse. If not specified, sys.argv is used. - """ - - logger = logging_config.get_logger() - - exit_code = SUCCESS_EXIT_CODE - - try: - args = _parse_args(sys_args) - region = args.region - s3_base_uri = args.s3_base_uri - s3_kms_key = args.s3_kms_key - run_in_context = args.run_in_context - pipeline_context = _load_pipeline_context(args) - - hmac_key = os.getenv("REMOTE_FUNCTION_SECRET_KEY") - - sagemaker_session = _get_sagemaker_session(region) - _execute_remote_function( - sagemaker_session=sagemaker_session, - s3_base_uri=s3_base_uri, - s3_kms_key=s3_kms_key, - run_in_context=run_in_context, - hmac_key=hmac_key, - context=pipeline_context, - ) - - except Exception as e: # pylint: disable=broad-except - logger.exception("Error encountered while invoking the remote function.") - s3_uri = ( - s3_path_join(s3_base_uri, pipeline_context.execution_id, pipeline_context.step_name) - if pipeline_context.step_name - else s3_base_uri - ) - exit_code = handle_error( - error=e, - sagemaker_session=sagemaker_session, - s3_base_uri=s3_uri, - s3_kms_key=s3_kms_key, - hmac_key=hmac_key, - ) - finally: - sys.exit(exit_code) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/sagemaker-core/src/sagemaker/core/remote_function/job.py b/sagemaker-core/src/sagemaker/core/remote_function/job.py deleted file mode 100644 index bed00e148f..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/job.py +++ /dev/null @@ -1,2140 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Helper classes that interact with SageMaker Training service.""" -from __future__ import absolute_import - -import dataclasses -import json -import os -import re -import secrets -import shutil -import sys -import time -from io import BytesIO -from typing import Callable, Dict, List, Optional, Tuple, Union, TYPE_CHECKING -from urllib.parse import urlparse - -import botocore -from botocore.exceptions import ClientError - -from sagemaker.core.config.config_schema import ( - REMOTE_FUNCTION_ENVIRONMENT_VARIABLES, - REMOTE_FUNCTION_IMAGE_URI, - REMOTE_FUNCTION_DEPENDENCIES, - REMOTE_FUNCTION_PRE_EXECUTION_COMMANDS, - REMOTE_FUNCTION_PRE_EXECUTION_SCRIPT, - REMOTE_FUNCTION_INCLUDE_LOCAL_WORKDIR, - REMOTE_FUNCTION_INSTANCE_TYPE, - REMOTE_FUNCTION_JOB_CONDA_ENV, - REMOTE_FUNCTION_ROLE_ARN, - REMOTE_FUNCTION_S3_ROOT_URI, - REMOTE_FUNCTION_S3_KMS_KEY_ID, - REMOTE_FUNCTION_VOLUME_KMS_KEY_ID, - REMOTE_FUNCTION_TAGS, - REMOTE_FUNCTION_VPC_CONFIG_SUBNETS, - REMOTE_FUNCTION_VPC_CONFIG_SECURITY_GROUP_IDS, - REMOTE_FUNCTION_ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION, -) -from sagemaker.core.experiments._run_context import _RunContext -from sagemaker.core.experiments.run import Run -from sagemaker.core.image_uris import get_base_python_image_uri -from sagemaker.core import image_uris -from sagemaker.core.remote_function.checkpoint_location import CheckpointLocation -from sagemaker.core.helper.session_helper import get_execution_role, expand_role, Session -from sagemaker.core.common_utils import ( - name_from_base, - _tmpdir, - resolve_value_from_config, - format_tags, - Tags, -) -from sagemaker.core.s3 import s3_path_join, S3Uploader - -from sagemaker.core.remote_function.core.stored_function import StoredFunction, _SerializedData -from sagemaker.core.remote_function.core.pipeline_variables import Context - -from sagemaker.core.remote_function.runtime_environment.runtime_environment_manager import ( - RuntimeEnvironmentManager, - _DependencySettings, -) -from sagemaker.core.remote_function import logging_config -from sagemaker.core.remote_function.spark_config import SparkConfig -from sagemaker.core.remote_function.custom_file_filter import ( - CustomFileFilter, - copy_workdir, - resolve_custom_file_filter_from_config_file, -) - -# Lazy import to avoid circular dependency - DelayedReturn is in MLOps which depends on Core -# from sagemaker.mlops.workflow.function_step import DelayedReturn -from sagemaker.core.workflow.step_outputs import get_step -from sagemaker.core import exceptions -from sagemaker.core import network as vpc_utils - -from sagemaker.core import logs as sagemaker_logs - -from sagemaker.core.common_utils import ( - _wait_until, - secondary_training_status_changed, - secondary_training_status_message, -) -from sagemaker.core.config.config_utils import _append_sagemaker_config_tags - -if TYPE_CHECKING: - from sagemaker.core.helper.pipeline_variable import PipelineVariable - -# runtime script names -BOOTSTRAP_SCRIPT_NAME = "bootstrap_runtime_environment.py" -MPI_UTILS_SCRIPT_NAME = "mpi_utils_remote.py" -ENTRYPOINT_SCRIPT_NAME = "job_driver.sh" -PRE_EXECUTION_SCRIPT_NAME = "pre_exec.sh" -RUNTIME_MANAGER_SCRIPT_NAME = "runtime_environment_manager.py" -SPARK_APP_SCRIPT_NAME = "spark_app.py" - -# training channel names -RUNTIME_SCRIPTS_CHANNEL_NAME = "sagemaker_remote_function_bootstrap" -REMOTE_FUNCTION_WORKSPACE = "sm_rf_user_ws" -JOB_REMOTE_FUNCTION_WORKSPACE = "sagemaker_remote_function_workspace" -SCRIPT_AND_DEPENDENCIES_CHANNEL_NAME = "pre_exec_script_and_dependencies" - -# Spark config channel and file name -SPARK_CONF_CHANNEL_NAME = "conf" -SPARK_CONF_FILE_NAME = "configuration.json" - -# Spark submitted files workspace names on S3 -SPARK_SUBMIT_JARS_WORKSPACE = "sm_rf_spark_jars" -SPARK_SUBMIT_PY_FILES_WORKSPACE = "sm_rf_spark_py_files" -SPARK_SUBMIT_FILES_WORKSPACE = "sm_rf_spark_data_files" -SPARK_CONF_WORKSPACE = "sm_rf_spark_conf" - -# default spark version -DEFAULT_SPARK_VERSION = "3.3" -DEFAULT_SPARK_CONTAINER_VERSION = "v1" - -SPARK_NAME = "spark" - -# run context dictionary keys -KEY_EXPERIMENT_NAME = "experiment_name" -KEY_RUN_NAME = "run_name" - -JOBS_CONTAINER_ENTRYPOINT = [ - "/bin/bash", - f"/opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{ENTRYPOINT_SCRIPT_NAME}", -] - -SPARK_APP_SCRIPT_PATH = f"/opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{SPARK_APP_SCRIPT_NAME}" - -ENTRYPOINT_SCRIPT = f""" -#!/bin/bash - -# Entry point for bootstrapping runtime environment and invoking remote function - -set -eu - -PERSISTENT_CACHE_DIR=${{SAGEMAKER_MANAGED_WARMPOOL_CACHE_DIRECTORY:-/opt/ml/cache}} -export CONDA_PKGS_DIRS=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/conda/pkgs -printf "INFO: CONDA_PKGS_DIRS is set to '$CONDA_PKGS_DIRS'\\n" -export PIP_CACHE_DIR=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/pip -printf "INFO: PIP_CACHE_DIR is set to '$PIP_CACHE_DIR'\\n" - -printf "INFO: /opt/ml/input/config/resourceconfig.json:\\n" -cat /opt/ml/input/config/resourceconfig.json - -printf "INFO: Bootstraping runtime environment.\\n" -python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{BOOTSTRAP_SCRIPT_NAME} "$@" -source /opt/ml/input/sm_training.env - -if [ -d {JOB_REMOTE_FUNCTION_WORKSPACE} ] -then - if [ -f "remote_function_conda_env.txt" ] - then - cp remote_function_conda_env.txt {JOB_REMOTE_FUNCTION_WORKSPACE}/remote_function_conda_env.txt - fi - printf "INFO: Changing workspace to {JOB_REMOTE_FUNCTION_WORKSPACE}.\\n" - cd {JOB_REMOTE_FUNCTION_WORKSPACE} -fi - -if [ -f "remote_function_conda_env.txt" ] -then - conda_env=$(cat remote_function_conda_env.txt) - - if which mamba >/dev/null; then - conda_exe="mamba" - else - conda_exe="conda" - fi - - printf "INFO: Invoking remote function inside conda environment: $conda_env.\\n" - printf "INFO: $conda_exe run -n $conda_env python -m sagemaker.train.remote_function.invoke_function \\n" - $conda_exe run -n $conda_env python -m sagemaker.train.remote_function.invoke_function "$@" -else - printf "INFO: No conda env provided. Invoking remote function\\n" - printf "INFO: python -m sagemaker.train.remote_function.invoke_function \\n" - python -m sagemaker.train.remote_function.invoke_function "$@" -fi -""" - -ENTRYPOINT_MPIRUN_SCRIPT = f""" -#!/bin/bash - -# Entry point for bootstrapping runtime environment and invoking remote function with mpirun - -set -eu - -PERSISTENT_CACHE_DIR=${{SAGEMAKER_MANAGED_WARMPOOL_CACHE_DIRECTORY:-/opt/ml/cache}} -export CONDA_PKGS_DIRS=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/conda/pkgs -printf "INFO: CONDA_PKGS_DIRS is set to '$CONDA_PKGS_DIRS'\\n" -export PIP_CACHE_DIR=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/pip -printf "INFO: PIP_CACHE_DIR is set to '$PIP_CACHE_DIR'\\n" - -printf "INFO: /opt/ml/input/config/resourceconfig.json:\\n" -cat /opt/ml/input/config/resourceconfig.json - -printf "INFO: Bootstraping runtime environment.\\n" -python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{BOOTSTRAP_SCRIPT_NAME} "$@" -source /opt/ml/input/sm_training.env - -if [ -d {JOB_REMOTE_FUNCTION_WORKSPACE} ] -then - if [ -f "remote_function_conda_env.txt" ] - then - cp remote_function_conda_env.txt {JOB_REMOTE_FUNCTION_WORKSPACE}/remote_function_conda_env.txt - fi - printf "INFO: Changing workspace to {JOB_REMOTE_FUNCTION_WORKSPACE}.\\n" - cd {JOB_REMOTE_FUNCTION_WORKSPACE} -fi - -if [ -f "remote_function_conda_env.txt" ] -then - conda_env=$(cat remote_function_conda_env.txt) - - if which mamba >/dev/null; then - conda_exe="mamba" - else - conda_exe="conda" - fi - - if [ "$SM_CURRENT_HOST" = "$SM_MASTER_ADDR" ]; then - python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} - - printf "INFO: Invoking remote function with mpirun inside conda environment: $conda_env.\\n" - printf "INFO: $conda_exe run -n $conda_env mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \ - --allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \ - -mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \ - -mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \ - -x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \ - - python -m mpi4py -m sagemaker.train.remote_function.invoke_function \\n" - $conda_exe run -n $conda_env mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \ - --allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \ - -mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \ - -mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \ - -x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \ - $SM_FI_PROVIDER $SM_NCCL_PROTO $SM_FI_EFA_USE_DEVICE_RDMA \ - python -m mpi4py -m sagemaker.train.remote_function.invoke_function "$@" - - python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} --job_ended 1 - else - printf "INFO: This is the instance $SM_CURRENT_HOST. mpirun command terminated\\n" - python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} - fi -else - if [ "$SM_CURRENT_HOST" = "$SM_MASTER_ADDR" ]; then - python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} - - printf "INFO: No conda env provided. Invoking remote function with mpirun\\n" - printf "INFO: mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \ - --allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \ - -mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \ - -mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \ - -x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \ - $SM_FI_PROVIDER $SM_NCCL_PROTO $SM_FI_EFA_USE_DEVICE_RDMA \ - python -m mpi4py -m sagemaker.train.remote_function.invoke_function \\n" - - mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \ - --allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \ - -mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \ - -mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \ - -x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \ - $SM_FI_PROVIDER $SM_NCCL_PROTO $SM_FI_EFA_USE_DEVICE_RDMA \ - python -m mpi4py -m sagemaker.train.remote_function.invoke_function "$@" - - python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} --job_ended 1 - else - printf "INFO: This is the instance $SM_CURRENT_HOST.\\n" - python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} - fi -fi -""" - -ENTRYPOINT_TORCHRUN_SCRIPT = f""" -#!/bin/bash - -# Entry point for bootstrapping runtime environment and invoking remote function with torchrun - -set -eu - -PERSISTENT_CACHE_DIR=${{SAGEMAKER_MANAGED_WARMPOOL_CACHE_DIRECTORY:-/opt/ml/cache}} -export CONDA_PKGS_DIRS=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/conda/pkgs -printf "INFO: CONDA_PKGS_DIRS is set to '$CONDA_PKGS_DIRS'\\n" -export PIP_CACHE_DIR=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/pip -printf "INFO: PIP_CACHE_DIR is set to '$PIP_CACHE_DIR'\\n" - -printf "INFO: /opt/ml/input/config/resourceconfig.json:\\n" -cat /opt/ml/input/config/resourceconfig.json - -printf "INFO: Bootstraping runtime environment.\\n" -python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{BOOTSTRAP_SCRIPT_NAME} "$@" -source /opt/ml/input/sm_training.env - -if [ -d {JOB_REMOTE_FUNCTION_WORKSPACE} ] -then - if [ -f "remote_function_conda_env.txt" ] - then - cp remote_function_conda_env.txt {JOB_REMOTE_FUNCTION_WORKSPACE}/remote_function_conda_env.txt - fi - printf "INFO: Changing workspace to {JOB_REMOTE_FUNCTION_WORKSPACE}.\\n" - cd {JOB_REMOTE_FUNCTION_WORKSPACE} -fi - -if [ -f "remote_function_conda_env.txt" ] -then - conda_env=$(cat remote_function_conda_env.txt) - - if which mamba >/dev/null; then - conda_exe="mamba" - else - conda_exe="conda" - fi - - printf "INFO: Invoking remote function with torchrun inside conda environment: $conda_env.\\n" - printf "INFO: $conda_exe run -n $conda_env torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE \ - --master_addr $SM_MASTER_ADDR --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK \ - -m sagemaker.train.remote_function.invoke_function \\n" - - $conda_exe run -n $conda_env torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE \ - --master_addr $SM_MASTER_ADDR --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK \ - -m sagemaker.train.remote_function.invoke_function "$@" -else - printf "INFO: No conda env provided. Invoking remote function with torchrun\\n" - printf "INFO: torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE --master_addr $SM_MASTER_ADDR \ - --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK -m sagemaker.train.remote_function.invoke_function \\n" - - torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE --master_addr $SM_MASTER_ADDR \ - --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK -m sagemaker.train.remote_function.invoke_function "$@" -fi -""" - -SPARK_ENTRYPOINT_SCRIPT = f""" -#!/bin/bash - -# Entry point for bootstrapping runtime environment and invoking remote function for Spark - -set -eu - -printf "INFO: Bootstraping Spark runtime environment.\\n" - -python3 /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{BOOTSTRAP_SCRIPT_NAME} "$@" - -# Spark Container entry point script to initiate the spark application -smspark-submit "$@" -""" - -_STATUS_CODE_TABLE = { - "COMPLETED": "Completed", - "INPROGRESS": "InProgress", - "IN_PROGRESS": "InProgress", - "FAILED": "Failed", - "STOPPED": "Stopped", - "STOPPING": "Stopping", - "STARTING": "Starting", - "PENDING": "Pending", -} - -logger = logging_config.get_logger() - - -class LogState(object): - """Placeholder docstring""" - - STARTING = 1 - WAIT_IN_PROGRESS = 2 - TAILING = 3 - JOB_COMPLETE = 4 - COMPLETE = 5 - - -class _JobSettings: - """Helper class that processes the job settings. - - It validates the job settings and provides default values if necessary. - """ - - def __init__( - self, - *, - dependencies: str = None, - pre_execution_commands: List[str] = None, - pre_execution_script: str = None, - environment_variables: Dict[str, Union[str, "PipelineVariable"]] = None, - image_uri: Union[str, "PipelineVariable"] = None, - include_local_workdir: bool = None, - custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None, - instance_count: Union[int, "PipelineVariable"] = 1, - instance_type: Union[str, "PipelineVariable"] = None, - job_conda_env: Union[str, "PipelineVariable"] = None, - job_name_prefix: str = None, - keep_alive_period_in_seconds: Union[int, "PipelineVariable"] = 0, - max_retry_attempts: Union[int, "PipelineVariable"] = 1, - max_runtime_in_seconds: Union[int, "PipelineVariable"] = 24 * 60 * 60, - role: str = None, - s3_kms_key: Union[str, "PipelineVariable"] = None, - s3_root_uri: str = None, - sagemaker_session: Session = None, - security_group_ids: List[Union[str, "PipelineVariable"]] = None, - subnets: List[Union[str, "PipelineVariable"]] = None, - tags: Optional[Tags] = None, - volume_kms_key: Union[str, "PipelineVariable"] = None, - volume_size: Union[int, "PipelineVariable"] = 30, - encrypt_inter_container_traffic: Union[bool, "PipelineVariable"] = None, - spark_config: SparkConfig = None, - use_spot_instances=False, - max_wait_time_in_seconds=None, - disable_output_compression: bool = False, - use_torchrun: bool = False, - use_mpirun: bool = False, - nproc_per_node: Optional[int] = None, - ): - """Initialize a _JobSettings instance which configures the remote job. - - Args: - dependencies (str): Either the path to a dependencies file or the reserved keyword - ``auto_capture``. Defaults to ``None``. - If ``dependencies`` is provided, the value must be one of the following: - - * A path to a conda environment.yml file. The following conditions apply. - - * If job_conda_env is set, then the conda environment is updated by installing - dependencies from the yaml file and the function is invoked within that - conda environment. For this to succeed, the specified conda environment must - already exist in the image. - * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, - then the conda environment is updated by installing dependencies from the - yaml file and the function is invoked within that conda environment. For - this to succeed, the conda environment name must already be set in - ``SAGEMAKER_JOB_CONDA_ENV``, and ``SAGEMAKER_JOB_CONDA_ENV`` must already - exist in the image. - * If none of the previous conditions are met, a new conda environment named - ``sagemaker-runtime-env`` is created and the function annotated with the remote - decorator is invoked in that conda environment. - - * A path to a requirements.txt file. The following conditions apply. - - * If ``job_conda_env`` is set in the remote decorator, dependencies are installed - within that conda environment and the function annotated with the remote decorator - is invoked in the same conda environment. For this to succeed, the specified - conda environment must already exist in the image. - * If an environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, - dependencies are installed within that conda environment and the function - annotated with the remote decorator is invoked in the same. For this to succeed, - the conda environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and - ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. - * If none of the above conditions are met, conda is not used. Dependencies are - installed at the system level, without any virtual environment, and the function - annotated with the remote decorator is invoked using the Python runtime available - in the system path. - - * The parameter dependencies is set to ``auto_capture``. SageMaker will automatically - generate an env_snapshot.yml corresponding to the current active conda environment’s - snapshot. You do not need to provide a dependencies file. The following conditions - apply: - - * You must run the remote function within an active conda environment. - * When installing the dependencies on the training job, the same conditions - as when dependencies is set to a path to a conda environment file apply. - These conditions are as follows: - - * If job_conda_env is set, then the conda environment is updated by installing - dependencies from the yaml file and the function is invoked within that - conda environment. For this to succeed, the specified conda environment must - already exist in the image. - * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, - then the conda environment is updated by installing dependencies from the yaml - file and the function is invoked within that conda environment. For this to - succeed, the conda environment name must already be set in - ``SAGEMAKER_JOB_CONDA_ENV``, and ``SAGEMAKER_JOB_CONDA_ENV`` must already exist - in the image. - * If none of the previous conditions are met, a new conda environment with name - ``sagemaker-runtime-env`` is created and the function annotated with the - remote decorator is invoked in that conda environment. - - * ``None``. SageMaker will assume that there are no dependencies to install while - executing the remote annotated function in the training job. - - pre_execution_commands (List[str]): List of commands to be executed prior to executing - remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` - can be specified at the same time. Defaults to None. - - pre_execution_script (str): Path to script file to be executed prior to executing - remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` - can be specified at the same time. Defaults to None. - - environment_variables (dict[str, str] or dict[str, PipelineVariable]): The environment - variables used inside the decorator function. Defaults to ``None``. - - image_uri (str, PipelineVariable): The universal resource identifier (URI) location of - a Docker image on Amazon Elastic Container Registry (ECR). Defaults to the following - based on where the SDK is running: - - * For users who specify ``spark_config`` and want to run the function in a Spark - application, the ``image_uri`` should be ``None``. A SageMaker Spark image will - be used for training, otherwise a ``ValueError`` is thrown. - * For users on SageMaker Studio notebooks, the image used as the kernel image for - the notebook is used. - * For other users, it is resolved to base python image with the same python version - as the environment running the local code. - - If no compatible image is found, a ValueError is thrown. - - include_local_workdir (bool): A flag to indicate that the remote function should include - local directories. Set to ``True`` if the remote function code imports local modules - and methods that are not available via PyPI or conda. Default value is ``False``. - - custom_file_filter (Callable[[str, List], List], CustomFileFilter): Either a function - that filters job dependencies to be uploaded to S3 or a ``CustomFileFilter`` object - that specifies the local directories and files to be included in the remote function. - If a callable is passed in, that function is passed to the ``ignore`` argument of - ``shutil.copytree``. Defaults to ``None``, which means only python - files are accepted and uploaded to S3. - - instance_count (int, PipelineVariable): The number of instances to use. Defaults to 1. - - instance_type (str, PipelineVariable): The Amazon Elastic Compute Cloud (EC2) instance - type to use to run the SageMaker job. e.g. ml.c4.xlarge. If not provided, - a ValueError is thrown. - - job_conda_env (str, PipelineVariable): The name of the conda environment to activate - during job's runtime. Defaults to ``None``. - - job_name_prefix (str, PipelineVariable): The prefix used to create the underlying - SageMaker job. - - keep_alive_period_in_seconds (int, PipelineVariable): The duration in seconds to retain - and reuse provisioned infrastructure after the completion of a training job, also - known as SageMaker managed warm pools. The use of warm pools reduces the latency time - spent to provision new resources. The default value for - ``keep_alive_period_in_seconds`` is 0. - NOTE: Additional charges associated with warm pools may apply. Using this parameter - also activates a new persistent cache feature, which will further reduce job start up - latency than over using SageMaker managed warm pools alone by caching the package - source downloaded in the previous runs. - - max_retry_attempts (int, PipelineVariable): The max number of times the job is retried - on ``InternalServerFailure`` Error from SageMaker service. Defaults to 1. - - max_runtime_in_seconds (int, PipelineVariable): The upper limit in seconds to be used - for training. After this specified amount of time, SageMaker terminates the job - regardless of its current status. Defaults to 1 day or (86400 seconds). - - role (str): The IAM role (either name or full ARN) used to run your SageMaker training - job. Defaults to: - - * the SageMaker default IAM role if the SDK is running in SageMaker Notebooks or - SageMaker Studio Notebooks. - * if not above, a ValueError is thrown. - - s3_kms_key (str): The key used to encrypt the input and output data. - Default to ``None``. - - s3_root_uri (str): The root S3 folder to which the code archives and data are - uploaded to. Defaults to ``s3://``. - - sagemaker_session (sagemaker.core.helper.session.Session): The underlying SageMaker session to - which SageMaker service calls are delegated to (default: None). If not provided, - one is created using a default configuration chain. - - security_group_ids (List[str, PipelineVariable]): A list of security group IDs. - Defaults to ``None`` and the training job is created without VPC config. - - subnets (List[str, PipelineVariable]): A list of subnet IDs. Defaults to ``None`` - and the job is created without VPC config. - - tags (Optional[Tags]): Tags attached to the job. Defaults to ``None`` - and the training job is created without tags. - - volume_kms_key (str, PipelineVariable): An Amazon Key Management Service (KMS) key - used to encrypt an Amazon Elastic Block Storage (EBS) volume attached to the - training instance. Defaults to ``None``. - - volume_size (int, PipelineVariable): The size in GB of the storage volume for storing - input and output data during training. Defaults to ``30``. - - encrypt_inter_container_traffic (bool, PipelineVariable): A flag that specifies - whether traffic between training containers is encrypted for the training job. - Defaults to ``False``. - - spark_config (SparkConfig): Configurations to the Spark application that runs on - Spark image. If ``spark_config`` is specified, a SageMaker Spark image uri - will be used for training. Note that ``image_uri`` can not be specified at the - same time otherwise a ``ValueError`` is thrown. Defaults to ``None``. - - use_spot_instances (bool, PipelineVariable): Specifies whether to use SageMaker - Managed Spot instances for training. If enabled then the ``max_wait`` arg should - also be set. Defaults to ``False``. - - max_wait_time_in_seconds (int): Timeout in seconds waiting for spot training job. - After this amount of time Amazon SageMaker will stop waiting for managed spot - training job to complete. Defaults to ``None``. - - disable_output_compression (bool): Optional. When set to true, Model is uploaded to - Amazon S3 without compression after training finishes. - - use_torchrun (bool): Specifies whether to use torchrun for distributed training. - Defaults to ``False``. - - use_mpirun (bool): Specifies whether to use mpirun for distributed training. - Defaults to ``False``. - - nproc_per_node (int): Optional. Specifies the number of processes per node for - distributed training. Defaults to ``None``. - This is defined automatically configured on the instance type. - """ - self.sagemaker_session = sagemaker_session or Session() - self.environment_variables = resolve_value_from_config( - direct_input=environment_variables, - config_path=REMOTE_FUNCTION_ENVIRONMENT_VARIABLES, - default_value={}, - sagemaker_session=self.sagemaker_session, - ) - self.environment_variables.update( - {"AWS_DEFAULT_REGION": self.sagemaker_session.boto_region_name} - ) - - # The following will be overridden by the _Job.compile method. - # However, it needs to be kept here for feature store SDK. - # TODO: update the feature store SDK to set the HMAC key there. - self.environment_variables.update({"REMOTE_FUNCTION_SECRET_KEY": secrets.token_hex(32)}) - - if spark_config and image_uri: - raise ValueError("spark_config and image_uri cannot be specified at the same time!") - - if spark_config and job_conda_env: - raise ValueError("Remote Spark jobs do not support job_conda_env.") - - if spark_config and dependencies == "auto_capture": - raise ValueError( - "Remote Spark jobs do not support automatically capturing dependencies." - ) - - _image_uri = resolve_value_from_config( - direct_input=image_uri, - config_path=REMOTE_FUNCTION_IMAGE_URI, - sagemaker_session=self.sagemaker_session, - ) - - if spark_config: - self.image_uri = self._get_default_spark_image(self.sagemaker_session) - logger.info( - "Set the image uri as %s because value of spark_config is " - "indicating this is a remote spark job.", - self.image_uri, - ) - elif _image_uri: - self.image_uri = _image_uri - else: - self.image_uri = self._get_default_image(self.sagemaker_session) - - self.dependencies = resolve_value_from_config( - direct_input=dependencies, - config_path=REMOTE_FUNCTION_DEPENDENCIES, - sagemaker_session=self.sagemaker_session, - ) - - self.pre_execution_commands = resolve_value_from_config( - direct_input=pre_execution_commands, - config_path=REMOTE_FUNCTION_PRE_EXECUTION_COMMANDS, - sagemaker_session=self.sagemaker_session, - ) - - self.pre_execution_script = resolve_value_from_config( - direct_input=pre_execution_script, - config_path=REMOTE_FUNCTION_PRE_EXECUTION_SCRIPT, - sagemaker_session=self.sagemaker_session, - ) - - if self.pre_execution_commands is not None and self.pre_execution_script is not None: - raise ValueError( - "Only one of pre_execution_commands or pre_execution_script can be specified!" - ) - - self.include_local_workdir = resolve_value_from_config( - direct_input=include_local_workdir, - config_path=REMOTE_FUNCTION_INCLUDE_LOCAL_WORKDIR, - default_value=False, - sagemaker_session=self.sagemaker_session, - ) - - self.custom_file_filter = resolve_custom_file_filter_from_config_file( - custom_file_filter, self.sagemaker_session - ) - - self.instance_type = resolve_value_from_config( - direct_input=instance_type, - config_path=REMOTE_FUNCTION_INSTANCE_TYPE, - sagemaker_session=self.sagemaker_session, - ) - if not self.instance_type: - raise ValueError("instance_type is a required parameter!") - - self.instance_count = instance_count - self.volume_size = volume_size - self.max_runtime_in_seconds = max_runtime_in_seconds - self.max_retry_attempts = max_retry_attempts - self.keep_alive_period_in_seconds = keep_alive_period_in_seconds - self.spark_config = spark_config - self.use_spot_instances = use_spot_instances - self.max_wait_time_in_seconds = max_wait_time_in_seconds - self.job_conda_env = resolve_value_from_config( - direct_input=job_conda_env, - config_path=REMOTE_FUNCTION_JOB_CONDA_ENV, - sagemaker_session=self.sagemaker_session, - ) - self.job_name_prefix = job_name_prefix - self.encrypt_inter_container_traffic = resolve_value_from_config( - direct_input=encrypt_inter_container_traffic, - config_path=REMOTE_FUNCTION_ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION, - default_value=False, - sagemaker_session=self.sagemaker_session, - ) - self.enable_network_isolation = False - - _role = resolve_value_from_config( - direct_input=role, - config_path=REMOTE_FUNCTION_ROLE_ARN, - sagemaker_session=self.sagemaker_session, - ) - if _role: - self.role = expand_role(self.sagemaker_session.boto_session, _role) - else: - self.role = get_execution_role(self.sagemaker_session) - - self.s3_root_uri = resolve_value_from_config( - direct_input=s3_root_uri, - config_path=REMOTE_FUNCTION_S3_ROOT_URI, - default_value=s3_path_join( - "s3://", - self.sagemaker_session.default_bucket(), - self.sagemaker_session.default_bucket_prefix, - ), - sagemaker_session=self.sagemaker_session, - ) - - self.s3_kms_key = resolve_value_from_config( - direct_input=s3_kms_key, - config_path=REMOTE_FUNCTION_S3_KMS_KEY_ID, - sagemaker_session=self.sagemaker_session, - ) - self.volume_kms_key = resolve_value_from_config( - direct_input=volume_kms_key, - config_path=REMOTE_FUNCTION_VOLUME_KMS_KEY_ID, - sagemaker_session=self.sagemaker_session, - ) - - _subnets = resolve_value_from_config( - direct_input=subnets, - config_path=REMOTE_FUNCTION_VPC_CONFIG_SUBNETS, - sagemaker_session=self.sagemaker_session, - ) - _security_group_ids = resolve_value_from_config( - direct_input=security_group_ids, - config_path=REMOTE_FUNCTION_VPC_CONFIG_SECURITY_GROUP_IDS, - sagemaker_session=self.sagemaker_session, - ) - vpc_config = vpc_utils.to_dict(subnets=_subnets, security_group_ids=_security_group_ids) - self.vpc_config = vpc_utils.sanitize(vpc_config) - - tags = format_tags(tags) - self.tags = _append_sagemaker_config_tags( - self.sagemaker_session, tags, REMOTE_FUNCTION_TAGS - ) - - self.disable_output_compression = disable_output_compression - self.use_torchrun = use_torchrun - self.use_mpirun = use_mpirun - self.nproc_per_node = nproc_per_node - - @staticmethod - def _get_default_image(session): - """Return Studio notebook image, if in Studio env. Else, base python. - - Args: - session (Session): Boto session. - - Returns: - Default SageMaker base python image. - """ - - if ( - "SAGEMAKER_INTERNAL_IMAGE_URI" in os.environ - and os.environ["SAGEMAKER_INTERNAL_IMAGE_URI"] - ): - return os.environ["SAGEMAKER_INTERNAL_IMAGE_URI"] - - py_version = str(sys.version_info[0]) + str(sys.version_info[1]) - - if py_version not in ["310", "38"]: - raise ValueError( - "Default image is supported only for Python versions 3.8 and 3.10. If you " - "are using any other python version, you must provide a compatible image_uri." - ) - - region = session.boto_region_name - image_uri = get_base_python_image_uri(region=region, py_version=py_version) - - return image_uri - - @staticmethod - def _get_default_spark_image(session): - """Return the Spark image. - - Args: - session (Session): Boto session. - - Returns: - SageMaker Spark container image uri. - """ - - region = session.boto_region_name - - py_version = str(sys.version_info[0]) + str(sys.version_info[1]) - - if py_version not in ["39"]: - raise ValueError( - "The SageMaker Spark image for remote job only supports Python version 3.9. " - ) - - image_uri = image_uris.retrieve( - framework=SPARK_NAME, - region=region, - version=DEFAULT_SPARK_VERSION, - instance_type=None, - py_version=f"py{py_version}", - container_version=DEFAULT_SPARK_CONTAINER_VERSION, - ) - - return image_uri - - -class _Job: - """Helper class that interacts with the SageMaker training service.""" - - def __init__(self, job_name: str, s3_uri: str, sagemaker_session: Session, hmac_key: str): - """Initialize a _Job object. - - Args: - job_name (str): The training job name. - s3_uri (str): The training job output S3 uri. - sagemaker_session (Session): SageMaker boto session. - hmac_key (str): Remote function secret key. - """ - self.job_name = job_name - self.s3_uri = s3_uri - self.sagemaker_session = sagemaker_session - self.hmac_key = hmac_key - self._last_describe_response = None - - @staticmethod - def from_describe_response(describe_training_job_response, sagemaker_session): - """Construct a _Job from a describe_training_job_response object. - - Args: - describe_training_job_response (Dict): Describe training job response. - sagemaker_session (Session): SageMaker boto session. - - Returns: - the _Job object. - """ - job_name = describe_training_job_response["TrainingJobName"] - s3_uri = describe_training_job_response["OutputDataConfig"]["S3OutputPath"] - hmac_key = describe_training_job_response["Environment"]["REMOTE_FUNCTION_SECRET_KEY"] - - job = _Job(job_name, s3_uri, sagemaker_session, hmac_key) - job._last_describe_response = describe_training_job_response - return job - - @staticmethod - def start(job_settings: _JobSettings, func, func_args, func_kwargs, run_info=None): - """Start a training job. - - Args: - job_settings (_JobSettings): the job settings. - func: the function to be executed. - func_args: the positional arguments to the function. - func_kwargs: the keyword arguments to the function - - Returns: - the _Job object. - """ - job_name = _Job._get_job_name(job_settings, func) - s3_base_uri = s3_path_join(job_settings.s3_root_uri, job_name) - - training_job_request = _Job.compile( - job_settings=job_settings, - job_name=job_name, - s3_base_uri=s3_base_uri, - func=func, - func_args=func_args, - func_kwargs=func_kwargs, - run_info=run_info, - ) - - logger.info("Creating job: %s", job_name) - - job_settings.sagemaker_session.sagemaker_client.create_training_job(**training_job_request) - - return _Job( - job_name, - s3_base_uri, - job_settings.sagemaker_session, - training_job_request["Environment"]["REMOTE_FUNCTION_SECRET_KEY"], - ) - - @staticmethod - def compile( - job_settings: _JobSettings, - job_name: str, - s3_base_uri: str, - func: Callable, - func_args: tuple, - func_kwargs: dict, - run_info=None, - serialized_data: _SerializedData = None, - ) -> dict: - """Build the artifacts and generate the training job request.""" - from sagemaker.core.workflow.properties import Properties - from sagemaker.core.workflow.parameters import Parameter - from sagemaker.core.workflow.functions import Join - from sagemaker.core.workflow.execution_variables import ( - ExecutionVariables, - ExecutionVariable, - ) - from sagemaker.core.workflow.utilities import load_step_compilation_context - - step_compilation_context = load_step_compilation_context() - - jobs_container_entrypoint = JOBS_CONTAINER_ENTRYPOINT[:] - - # generate hmac key for integrity check - if step_compilation_context is None: - hmac_key = secrets.token_hex(32) - else: - hmac_key = step_compilation_context.function_step_secret_token - - # serialize function and arguments - if step_compilation_context is None: - stored_function = StoredFunction( - sagemaker_session=job_settings.sagemaker_session, - s3_base_uri=s3_base_uri, - hmac_key=hmac_key, - s3_kms_key=job_settings.s3_kms_key, - ) - stored_function.save(func, *func_args, **func_kwargs) - else: - stored_function = StoredFunction( - sagemaker_session=job_settings.sagemaker_session, - s3_base_uri=s3_base_uri, - hmac_key=hmac_key, - s3_kms_key=job_settings.s3_kms_key, - context=Context( - step_name=step_compilation_context.step_name, - func_step_s3_dir=step_compilation_context.pipeline_build_time, - ), - ) - - stored_function.save_pipeline_step_function(serialized_data) - - stopping_condition = { - "MaxRuntimeInSeconds": job_settings.max_runtime_in_seconds, - } - if job_settings.max_wait_time_in_seconds is not None: - stopping_condition["MaxWaitTimeInSeconds"] = job_settings.max_wait_time_in_seconds - - request_dict = dict( - TrainingJobName=job_name, - RoleArn=job_settings.role, - StoppingCondition=stopping_condition, - RetryStrategy={"MaximumRetryAttempts": job_settings.max_retry_attempts}, - ) - - _update_job_request_with_checkpoint_config(func_args, func_kwargs, request_dict) - - if job_settings.tags: - request_dict["Tags"] = job_settings.tags - - # generate other build artifacts including workspace, requirements.txt - request_dict["InputDataConfig"] = _generate_input_data_config( - job_settings=job_settings, s3_base_uri=s3_base_uri - ) - - if step_compilation_context: - # Path format: base/step_name/build_timestamp/execution_id/results - # This matches the path construction in stored_function.py - s3_output_path = Join( - on="/", - values=[ - s3_base_uri, - step_compilation_context.step_name, - step_compilation_context.pipeline_build_time, - ExecutionVariables.PIPELINE_EXECUTION_ID, - "results", - ], - ) - output_config = {"S3OutputPath": s3_output_path} - else: - output_config = {"S3OutputPath": s3_base_uri} - if job_settings.s3_kms_key is not None: - output_config["KmsKeyId"] = job_settings.s3_kms_key - if job_settings.disable_output_compression: - output_config["CompressionType"] = "NONE" - request_dict["OutputDataConfig"] = output_config - - container_args = ["--s3_base_uri", s3_base_uri] - container_args.extend(["--region", job_settings.sagemaker_session.boto_region_name]) - container_args.extend( - ["--client_python_version", RuntimeEnvironmentManager()._current_python_version()] - ) - container_args.extend( - [ - "--client_sagemaker_pysdk_version", - RuntimeEnvironmentManager()._current_sagemaker_pysdk_version(), - ] - ) - container_args.extend( - [ - "--dependency_settings", - _DependencySettings.from_dependency_file_path( - job_settings.dependencies - ).to_string(), - ] - ) - if job_settings.use_torchrun: - container_args.extend(["--distribution", "torchrun"]) - elif job_settings.use_mpirun: - container_args.extend(["--distribution", "mpirun"]) - if job_settings.nproc_per_node is not None and int(job_settings.nproc_per_node) > 0: - container_args.extend(["--user_nproc_per_node", str(job_settings.nproc_per_node)]) - if job_settings.s3_kms_key: - container_args.extend(["--s3_kms_key", job_settings.s3_kms_key]) - - if job_settings.job_conda_env: - container_args.extend(["--job_conda_env", job_settings.job_conda_env]) - - if step_compilation_context: - # TODO: remove the duplicates in the list - container_args.extend(["--pipeline_step_name", step_compilation_context.step_name]) - container_args.extend( - ["--pipeline_execution_id", ExecutionVariables.PIPELINE_EXECUTION_ID] - ) - container_args.extend( - ["--func_step_s3_dir", step_compilation_context.pipeline_build_time] - ) - container_args.extend(["--property_references"]) - container_args.extend( - [ - ExecutionVariables.PIPELINE_EXECUTION_ID.expr["Get"], - ExecutionVariables.PIPELINE_EXECUTION_ID.to_string(), - ] - ) - for arg in func_args + tuple(func_kwargs.values()): - if isinstance(arg, (Parameter, ExecutionVariable, Properties)): - container_args.extend([arg.expr["Get"], arg.to_string()]) - - # Lazy import to avoid circular dependency - try: - from sagemaker.mlops.workflow.function_step import DelayedReturn - - if isinstance(arg, DelayedReturn): - # The uri is a Properties object - uri = get_step(arg)._properties.OutputDataConfig.S3OutputPath - container_args.extend([uri.expr["Get"], uri.to_string()]) - except ImportError: - # MLOps not installed, skip DelayedReturn handling - pass - - if run_info is not None: - container_args.extend(["--run_in_context", json.dumps(dataclasses.asdict(run_info))]) - elif _RunContext.get_current_run() is not None: - container_args.extend( - ["--run_in_context", _convert_run_to_json(_RunContext.get_current_run())] - ) - - algorithm_spec = dict( - TrainingImage=job_settings.image_uri, - TrainingInputMode="File", - ContainerEntrypoint=jobs_container_entrypoint, - ContainerArguments=container_args, - ) - - request_dict["AlgorithmSpecification"] = algorithm_spec - - resource_config = dict( - VolumeSizeInGB=job_settings.volume_size, - InstanceCount=job_settings.instance_count, - InstanceType=job_settings.instance_type, - ) - if job_settings.volume_kms_key is not None: - resource_config["VolumeKmsKeyId"] = job_settings.volume_kms_key - if job_settings.keep_alive_period_in_seconds is not None: - resource_config["KeepAlivePeriodInSeconds"] = job_settings.keep_alive_period_in_seconds - - request_dict["ResourceConfig"] = resource_config - - if job_settings.enable_network_isolation is not None: - request_dict["EnableNetworkIsolation"] = job_settings.enable_network_isolation - - if job_settings.encrypt_inter_container_traffic is not None: - request_dict["EnableInterContainerTrafficEncryption"] = ( - job_settings.encrypt_inter_container_traffic - ) - - if job_settings.vpc_config: - request_dict["VpcConfig"] = job_settings.vpc_config - - request_dict["EnableManagedSpotTraining"] = job_settings.use_spot_instances - - request_dict["Environment"] = job_settings.environment_variables - request_dict["Environment"].update({"REMOTE_FUNCTION_SECRET_KEY": hmac_key}) - - extended_request = _extend_spark_config_to_request(request_dict, job_settings, s3_base_uri) - extended_request = _extend_mpirun_to_request(extended_request, job_settings) - extended_request = _extend_torchrun_to_request(extended_request, job_settings) - - return extended_request - - def describe(self): - """Describe the underlying sagemaker training job. - - Returns: - Dict: Describe training job response. - """ - if self._last_describe_response is not None and self._last_describe_response[ - "TrainingJobStatus" - ] in ["Completed", "Failed", "Stopped"]: - return self._last_describe_response - - self._last_describe_response = ( - self.sagemaker_session.sagemaker_client.describe_training_job( - TrainingJobName=self.job_name - ) - ) - - return self._last_describe_response - - def stop(self): - """Stop the underlying sagemaker training job.""" - self.sagemaker_session.sagemaker_client.stop_training_job(TrainingJobName=self.job_name) - - def wait(self, timeout: int = None): - """Wait for the underlying sagemaker job to finish and displays its logs . - - This method blocks on the sagemaker job completing for up to the timeout value (if - specified). If timeout is ``None``, this method will block until the job is completed. - - Args: - timeout (int): Timeout in seconds to wait until the job is completed. ``None`` by - default. - - Returns: None - """ - - self._last_describe_response = _logs_for_job( - sagemaker_session=self.sagemaker_session, - job_name=self.job_name, - wait=True, - timeout=timeout, - ) - - @staticmethod - def _get_job_name(job_settings, func): - """Get the underlying SageMaker job name from job_name_prefix or func. - - Args: - job_settings (_JobSettings): the job settings. - func: the function to be executed. - - Returns: - str : the training job name. - """ - from sagemaker.core.workflow.utilities import load_step_compilation_context - - step_complication_context = load_step_compilation_context() - - job_name_prefix = job_settings.job_name_prefix - if not job_name_prefix: - job_name_prefix = func.__name__ - # remove all special characters in the beginning of function name - job_name_prefix = re.sub(r"^[^a-zA-Z0-9]+", "", job_name_prefix) - # convert all remaining special characters to '-' - job_name_prefix = re.sub(r"[^a-zA-Z0-9-]", "-", job_name_prefix) - - if step_complication_context: - return job_name_prefix - return name_from_base(job_name_prefix) - - -def _prepare_and_upload_runtime_scripts( - spark_config: SparkConfig, - s3_base_uri: str, - s3_kms_key: str, - sagemaker_session: Session, - use_torchrun: bool = False, - use_mpirun: bool = False, -): - """Copy runtime scripts to a folder and upload to S3. - - In case of remote function, s3_base_uri is s3_root_uri + function_name. - In case of pipeline, s3_base_uri is s3_root_uri + pipeline_name. The runtime scripts are - uploaded only once per pipeline. - - Args: - spark_config (SparkConfig): remote Spark job configurations. - - s3_base_uri (str): S3 location that the runtime scripts will be uploaded to. - - s3_kms_key (str): kms key used to encrypt the files uploaded to S3. - - sagemaker_session (str): SageMaker boto client session. - - use_torchrun (bool): Whether to use torchrun or not. - - use_mpirun (bool): Whether to use mpirun or not. - - nproc_per_node (Optional[int]): Number of processes per node - """ - - from sagemaker.core.workflow.utilities import load_step_compilation_context - - step_compilation_context = load_step_compilation_context() - - if step_compilation_context and not step_compilation_context.upload_runtime_scripts: - return s3_path_join(s3_base_uri, RUNTIME_SCRIPTS_CHANNEL_NAME) - - with _tmpdir() as bootstrap_scripts: - - # write entrypoint script to tmpdir - entrypoint_script_path = os.path.join(bootstrap_scripts, ENTRYPOINT_SCRIPT_NAME) - entry_point_script = ENTRYPOINT_SCRIPT - if spark_config: - entry_point_script = SPARK_ENTRYPOINT_SCRIPT - spark_script_path = os.path.join( - os.path.dirname(__file__), "runtime_environment", SPARK_APP_SCRIPT_NAME - ) - shutil.copy2(spark_script_path, bootstrap_scripts) - - if use_torchrun: - entry_point_script = ENTRYPOINT_TORCHRUN_SCRIPT - - if use_mpirun: - entry_point_script = ENTRYPOINT_MPIRUN_SCRIPT - - with open(entrypoint_script_path, "w", newline="\n") as file: - file.writelines(entry_point_script) - - bootstrap_script_path = os.path.join( - os.path.dirname(__file__), "runtime_environment", BOOTSTRAP_SCRIPT_NAME - ) - mpi_utils_path = os.path.join( - os.path.dirname(__file__), "runtime_environment", MPI_UTILS_SCRIPT_NAME - ) - runtime_manager_script_path = os.path.join( - os.path.dirname(__file__), "runtime_environment", RUNTIME_MANAGER_SCRIPT_NAME - ) - - # copy runtime scripts to tmpdir - shutil.copy2(bootstrap_script_path, bootstrap_scripts) - shutil.copy2(mpi_utils_path, bootstrap_scripts) - shutil.copy2(runtime_manager_script_path, bootstrap_scripts) - - upload_path = S3Uploader.upload( - bootstrap_scripts, - s3_path_join(s3_base_uri, RUNTIME_SCRIPTS_CHANNEL_NAME), - s3_kms_key, - sagemaker_session, - ) - - if step_compilation_context: - step_compilation_context.upload_runtime_scripts = False - return upload_path - - -def _generate_input_data_config(job_settings: _JobSettings, s3_base_uri: str): - """Generates input data config""" - from sagemaker.core.workflow.utilities import load_step_compilation_context - - step_compilation_context = load_step_compilation_context() - - bootstrap_scripts_s3uri = _prepare_and_upload_runtime_scripts( - spark_config=job_settings.spark_config, - s3_base_uri=s3_base_uri, - s3_kms_key=job_settings.s3_kms_key, - sagemaker_session=job_settings.sagemaker_session, - use_torchrun=job_settings.use_torchrun, - use_mpirun=job_settings.use_mpirun, - ) - - input_data_config = [ - dict( - ChannelName=RUNTIME_SCRIPTS_CHANNEL_NAME, - DataSource={ - "S3DataSource": { - "S3Uri": bootstrap_scripts_s3uri, - "S3DataType": "S3Prefix", - } - }, - ) - ] - - local_dependencies_path = RuntimeEnvironmentManager().snapshot(job_settings.dependencies) - - if step_compilation_context: - with _tmpdir() as tmp_dir: - script_and_dependencies_s3uri = _prepare_dependencies_and_pre_execution_scripts( - local_dependencies_path=local_dependencies_path, - pre_execution_commands=job_settings.pre_execution_commands, - pre_execution_script_local_path=job_settings.pre_execution_script, - s3_base_uri=s3_base_uri, - s3_kms_key=job_settings.s3_kms_key, - sagemaker_session=job_settings.sagemaker_session, - tmp_dir=tmp_dir, - ) - - if script_and_dependencies_s3uri: - input_data_config.append( - dict( - ChannelName=SCRIPT_AND_DEPENDENCIES_CHANNEL_NAME, - DataSource={ - "S3DataSource": { - "S3Uri": script_and_dependencies_s3uri, - "S3DataType": "S3Prefix", - } - }, - ) - ) - - user_workspace_s3uri = _prepare_and_upload_workspace( - local_dependencies_path=local_dependencies_path, - include_local_workdir=job_settings.include_local_workdir, - pre_execution_commands=job_settings.pre_execution_commands, - pre_execution_script_local_path=job_settings.pre_execution_script, - s3_base_uri=s3_base_uri, - s3_kms_key=job_settings.s3_kms_key, - sagemaker_session=job_settings.sagemaker_session, - custom_file_filter=job_settings.custom_file_filter, - ) - - if user_workspace_s3uri: - input_data_config.append( - dict( - ChannelName=( - REMOTE_FUNCTION_WORKSPACE - if not step_compilation_context - else step_compilation_context.pipeline_build_time - ), - DataSource={ - "S3DataSource": { - "S3Uri": user_workspace_s3uri, - "S3DataType": "S3Prefix", - } - }, - ) - ) - - return input_data_config - - -def _prepare_dependencies_and_pre_execution_scripts( - local_dependencies_path: str, - pre_execution_commands: List[str], - pre_execution_script_local_path: str, - s3_base_uri: str, - s3_kms_key: str, - sagemaker_session: Session, - tmp_dir: str, -): - """Prepare pre-execution scripts and dependencies and upload them to s3. - - If pre execution commands are provided, a new bash file will be created - with those commands in tmp directory. - If pre execution script is provided, it copies that file from local file path - to tmp directory. - If local dependencies file is provided, it copies that file from local file path - to tmp directory. - If under pipeline context, tmp directory with copied dependencies and scripts is - uploaded to S3. - """ - from sagemaker.core.workflow.utilities import load_step_compilation_context - - if not (local_dependencies_path or pre_execution_commands or pre_execution_script_local_path): - return None - - if local_dependencies_path: - dst_path = shutil.copy2(local_dependencies_path, tmp_dir) - logger.info("Copied dependencies file at '%s' to '%s'", local_dependencies_path, dst_path) - - if pre_execution_commands or pre_execution_script_local_path: - pre_execution_script = os.path.join(tmp_dir, PRE_EXECUTION_SCRIPT_NAME) - if pre_execution_commands: - with open(pre_execution_script, "w") as target_script: - commands = [cmd + "\n" for cmd in pre_execution_commands] - target_script.writelines(commands) - logger.info( - "Generated pre-execution script from commands to '%s'", pre_execution_script - ) - else: - shutil.copy2(pre_execution_script_local_path, pre_execution_script) - logger.info( - "Copied pre-execution commands from script at '%s' to '%s'", - pre_execution_script_local_path, - pre_execution_script, - ) - - step_compilation_context = load_step_compilation_context() - if step_compilation_context: - upload_path = S3Uploader.upload( - tmp_dir, - s3_path_join( - s3_base_uri, - step_compilation_context.step_name, - step_compilation_context.pipeline_build_time, - SCRIPT_AND_DEPENDENCIES_CHANNEL_NAME, - ), - s3_kms_key, - sagemaker_session, - ) - logger.info( - "Successfully uploaded dependencies and pre execution scripts to '%s'", upload_path - ) - return upload_path - return None - - -def _prepare_and_upload_workspace( - local_dependencies_path: str, - include_local_workdir: bool, - pre_execution_commands: List[str], - pre_execution_script_local_path: str, - s3_base_uri: str, - s3_kms_key: str, - sagemaker_session: Session, - custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None, -) -> str: - """Prepare and upload the workspace to S3. - - Under pipeline context, only workdir is packaged in the workspace folder and uploaded to s3. - Under remote function context, workdir along with pre execution scripts and dependencies - are packaged together into the workspace folder and uploaded to S3. - """ - from sagemaker.core.workflow.utilities import load_step_compilation_context - - step_compilation_context = load_step_compilation_context() - - if not ( - local_dependencies_path - or include_local_workdir - or pre_execution_commands - or pre_execution_script_local_path - ): - return None - - func_step_s3_dir = None - if step_compilation_context: - func_step_s3_dir = step_compilation_context.pipeline_build_time - if not include_local_workdir: - return None - if not step_compilation_context.upload_workspace: - return s3_path_join(s3_base_uri, REMOTE_FUNCTION_WORKSPACE, func_step_s3_dir) - - with _tmpdir() as tmp_dir: - tmp_workspace_dir = os.path.join(tmp_dir, "temp_workspace/") - os.mkdir(tmp_workspace_dir) - # TODO Remove the following hack to avoid dir_exists error in the copy_tree call below. - tmp_workspace = os.path.join(tmp_workspace_dir, JOB_REMOTE_FUNCTION_WORKSPACE) - - if include_local_workdir: - copy_workdir(tmp_workspace, custom_file_filter) - logger.info("Copied user workspace to '%s'", tmp_workspace) - - if not os.path.isdir(tmp_workspace): - # create the directory if no workdir_path was provided in the input. - os.mkdir(tmp_workspace) - - if not step_compilation_context: - _prepare_dependencies_and_pre_execution_scripts( - local_dependencies_path=local_dependencies_path, - pre_execution_commands=pre_execution_commands, - pre_execution_script_local_path=pre_execution_script_local_path, - s3_base_uri=s3_base_uri, - s3_kms_key=s3_kms_key, - sagemaker_session=sagemaker_session, - tmp_dir=tmp_workspace, - ) - - workspace_archive_path = os.path.join(tmp_dir, "workspace") - workspace_archive_path = shutil.make_archive( - workspace_archive_path, "zip", tmp_workspace_dir - ) - logger.info("Successfully created workdir archive at '%s'", workspace_archive_path) - - upload_path = S3Uploader.upload( - workspace_archive_path, - s3_path_join(s3_base_uri, REMOTE_FUNCTION_WORKSPACE, func_step_s3_dir), - s3_kms_key, - sagemaker_session, - ) - logger.info("Successfully uploaded workdir to '%s'", upload_path) - if step_compilation_context: - step_compilation_context.upload_workspace = False - return upload_path - - -def _convert_run_to_json(run: Run) -> str: - """Convert current run into json string""" - run_info = _RunInfo(run.experiment_name, run.run_name) - return json.dumps(dataclasses.asdict(run_info)) - - -def _prepare_and_upload_spark_dependent_files( - spark_config: SparkConfig, - s3_base_uri: str, - s3_kms_key: str, - sagemaker_session: Session, -) -> Tuple: - """Upload the Spark dependencies to S3 if present. - - Args: - spark_config (SparkConfig): The remote Spark job configurations. - s3_base_uri (str): The S3 location that the Spark dependencies will be uploaded to. - s3_kms_key (str): The kms key used to encrypt the files uploaded to S3. - sagemaker_session (str): SageMaker boto client session. - """ - if not spark_config: - return None, None, None, None - - submit_jars_s3_paths = _upload_spark_submit_deps( - spark_config.submit_jars, - SPARK_SUBMIT_JARS_WORKSPACE, - s3_base_uri, - s3_kms_key, - sagemaker_session, - ) - submit_py_files_s3_paths = _upload_spark_submit_deps( - spark_config.submit_py_files, - SPARK_SUBMIT_PY_FILES_WORKSPACE, - s3_base_uri, - s3_kms_key, - sagemaker_session, - ) - submit_files_s3_path = _upload_spark_submit_deps( - spark_config.submit_files, - SPARK_SUBMIT_FILES_WORKSPACE, - s3_base_uri, - s3_kms_key, - sagemaker_session, - ) - config_file_s3_uri = _upload_serialized_spark_configuration( - s3_base_uri, s3_kms_key, spark_config.configuration, sagemaker_session - ) - - return submit_jars_s3_paths, submit_py_files_s3_paths, submit_files_s3_path, config_file_s3_uri - - -def _upload_spark_submit_deps( - submit_deps: List[str], - workspace_name: str, - s3_base_uri: str, - s3_kms_key: str, - sagemaker_session: Session, -) -> str: - """Upload the Spark submit dependencies to S3. - - Args: - submit_deps (List[str]): A list of path which points to the Spark dependency files. - The path can be either a local path or S3 uri. For example ``/local/deps.jar`` or - ``s3:///deps.jar``. - - workspace_name (str): workspace name for Spark dependency. - s3_base_uri (str): S3 location that the Spark dependencies will be uploaded to. - s3_kms_key (str): kms key used to encrypt the files uploaded to S3. - sagemaker_session (str): SageMaker boto client session. - - Returns: - str : The concatenated path of all dependencies which will be passed to Spark. - """ - spark_opt_s3_uris = [] - if not submit_deps: - return None - - if not workspace_name or not s3_base_uri: - raise ValueError("workspace_name or s3_base_uri may not be empty.") - - for dep_path in submit_deps: - dep_url = urlparse(dep_path) - - if dep_url.scheme in ["s3", "s3a"]: - spark_opt_s3_uris.append(dep_path) - elif not dep_url.scheme or dep_url.scheme == "file": - if not os.path.isfile(dep_path): - raise ValueError(f"submit_deps path {dep_path} is not a valid local file.") - - upload_path = S3Uploader.upload( - local_path=dep_path, - desired_s3_uri=s3_path_join(s3_base_uri, workspace_name), - kms_key=s3_kms_key, - sagemaker_session=sagemaker_session, - ) - - spark_opt_s3_uris.append(upload_path) - logger.info("Uploaded the local file %s to %s", dep_path, upload_path) - return str.join(",", spark_opt_s3_uris) - - -def _upload_serialized_spark_configuration( - s3_base_uri: str, s3_kms_key: str, configuration: Dict, sagemaker_session: Session -) -> str: - """Upload the Spark configuration json to S3""" - if not configuration: - return None - - serialized_configuration = BytesIO(json.dumps(configuration).encode("utf-8")) - config_file_s3_uri = s3_path_join(s3_base_uri, SPARK_CONF_WORKSPACE, SPARK_CONF_FILE_NAME) - - S3Uploader.upload_string_as_file_body( - body=serialized_configuration, - desired_s3_uri=config_file_s3_uri, - kms_key=s3_kms_key, - sagemaker_session=sagemaker_session, - ) - - logger.info("Uploaded spark configuration json %s to %s", configuration, config_file_s3_uri) - - return config_file_s3_uri - - -def _extend_mpirun_to_request( - request_dict: Dict, - job_settings: _JobSettings, -) -> Dict: - """Extend the create training job request with mpirun configuration. - - Args: - request_dict (Dict): create training job request dict. - job_settings (_JobSettings): the job settings. - """ - use_mpirun = job_settings.use_mpirun - instance_count = job_settings.instance_count - - if not use_mpirun: - return request_dict - - if instance_count == 1: - return request_dict - - extended_request = request_dict.copy() - - for input_channel in extended_request["InputDataConfig"]: - s3_data_source = input_channel["DataSource"].get("S3DataSource", None) - if s3_data_source: - s3_data_source["S3DataDistributionType"] = "FullyReplicated" - - return extended_request - - -def _extend_torchrun_to_request( - request_dict: Dict, - job_settings: _JobSettings, -) -> Dict: - """Extend the create training job request with torchrun configuration. - - Args: - request_dict (Dict): create training job request dict. - job_settings (_JobSettings): the job settings. - """ - use_torchrun = job_settings.use_torchrun - instance_count = job_settings.instance_count - - if not use_torchrun: - return request_dict - - if instance_count == 1: - return request_dict - - extended_request = request_dict.copy() - - for input_channel in extended_request["InputDataConfig"]: - s3_data_source = input_channel["DataSource"].get("S3DataSource", None) - if s3_data_source: - s3_data_source["S3DataDistributionType"] = "FullyReplicated" - - return extended_request - - -def _extend_spark_config_to_request( - request_dict: Dict, - job_settings: _JobSettings, - s3_base_uri: str, -) -> Dict: - """Extend the create training job request with spark configurations. - - Args: - request_dict (Dict): create training job request dict. - job_settings (_JobSettings): the job settings. - s3_base_uri (str): S3 location that the Spark dependencies will be uploaded to. - """ - spark_config = job_settings.spark_config - - if not spark_config: - return request_dict - - extended_request = request_dict.copy() - container_entrypoint = extended_request["AlgorithmSpecification"]["ContainerEntrypoint"] - - ( - submit_jars_s3_paths, - submit_py_files_s3_paths, - submit_files_s3_path, - config_file_s3_uri, - ) = _prepare_and_upload_spark_dependent_files( - spark_config=spark_config, - s3_base_uri=s3_base_uri, - s3_kms_key=job_settings.s3_kms_key, - sagemaker_session=job_settings.sagemaker_session, - ) - - input_data_config = extended_request["InputDataConfig"] - - if config_file_s3_uri: - input_data_config.append( - dict( - ChannelName=SPARK_CONF_CHANNEL_NAME, - DataSource={ - "S3DataSource": { - "S3Uri": config_file_s3_uri, - "S3DataType": "S3Prefix", - } - }, - ) - ) - - for input_channel in extended_request["InputDataConfig"]: - s3_data_source = input_channel["DataSource"].get("S3DataSource", None) - if s3_data_source: - s3_data_source["S3DataDistributionType"] = "FullyReplicated" - - if spark_config.spark_event_logs_uri: - container_entrypoint.extend( - ["--spark-event-logs-s3-uri", spark_config.spark_event_logs_uri] - ) - - if submit_jars_s3_paths: - container_entrypoint.extend(["--jars", submit_jars_s3_paths]) - - if submit_py_files_s3_paths: - container_entrypoint.extend(["--py-files", submit_py_files_s3_paths]) - - if submit_files_s3_path: - container_entrypoint.extend(["--files", submit_files_s3_path]) - - if spark_config: - container_entrypoint.extend([SPARK_APP_SCRIPT_PATH]) - - return extended_request - - -def _update_job_request_with_checkpoint_config(args, kwargs, request_dict): - """Extend job request with checkpoint config based on CheckpointLocation in function args. - - Args: - args (tuple): The positional arguments of the remote function. - kwargs (Dict): The keyword arguments of the remote function. - request_dict (Dict): create training job request dict. - """ - checkpoint_location_index_in_args = None - checkpoint_location_key_in_kwargs = None - checkpoint_location_count = 0 - - for index, arg in enumerate(args): - if isinstance(arg, CheckpointLocation): - checkpoint_location_index_in_args = index - checkpoint_location_count += 1 - - for key, value in kwargs.items(): - if isinstance(value, CheckpointLocation): - checkpoint_location_key_in_kwargs = key - checkpoint_location_count += 1 - - if checkpoint_location_count < 1: - return - - if checkpoint_location_count > 1: - raise ValueError( - "Remote function cannot have more than one argument of type CheckpointLocation." - ) - - if checkpoint_location_index_in_args is not None: - checkpoint_location_arg = args[checkpoint_location_index_in_args] - else: - checkpoint_location_arg = kwargs[checkpoint_location_key_in_kwargs] - - checkpoint_s3_uri = checkpoint_location_arg._s3_uri - checkpoint_local_path = checkpoint_location_arg._local_path - - request_dict["CheckpointConfig"] = { - "LocalPath": checkpoint_local_path, - "S3Uri": checkpoint_s3_uri, - } - - -@dataclasses.dataclass -class _RunInfo: - """Data class to hold information of the run object from context.""" - - experiment_name: str - run_name: str - - -def _get_initial_job_state(description, status_key, wait): - """Placeholder docstring""" - status = description[status_key] - job_already_completed = status in ("Completed", "Failed", "Stopped") - return LogState.TAILING if wait and not job_already_completed else LogState.COMPLETE - - -def _logs_for_job( # noqa: C901 - suppress complexity warning for this method - sagemaker_session, job_name, wait=False, poll=10, log_type="All", timeout=None -): - """Display logs for a given training job, optionally tailing them until job is complete. - - If the output is a tty or a Jupyter cell, it will be color-coded - based on which instance the log entry is from. - - Args: - sagemaker_session (sagemaker.core.helper.session.Session): A SageMaker Session - object, used for SageMaker interactions. - job_name (str): Name of the training job to display the logs for. - wait (bool): Whether to keep looking for new log entries until the job completes - (default: False). - poll (int): The interval in seconds between polling for new log entries and job - completion (default: 5). - log_type ([str]): A list of strings specifying which logs to print. Acceptable - strings are "All", "None", "Training", or "Rules". To maintain backwards - compatibility, boolean values are also accepted and converted to strings. - timeout (int): Timeout in seconds to wait until the job is completed. ``None`` by - default. - Returns: - Last call to sagemaker DescribeTrainingJob - Raises: - exceptions.CapacityError: If the training job fails with CapacityError. - exceptions.UnexpectedStatusException: If waiting and the training job fails. - """ - sagemaker_client = sagemaker_session.sagemaker_client - request_end_time = time.time() + timeout if timeout else None - description = _wait_until( - lambda: sagemaker_client.describe_training_job(TrainingJobName=job_name) - ) - print(secondary_training_status_message(description, None), end="") - - instance_count, stream_names, positions, client, log_group, dot, color_wrap = _logs_init( - sagemaker_session.boto_session, description, job="Training" - ) - - state = _get_initial_job_state(description, "TrainingJobStatus", wait) - - # The loop below implements a state machine that alternates between checking the job status - # and reading whatever is available in the logs at this point. Note, that if we were - # called with wait == False, we never check the job status. - # - # If wait == TRUE and job is not completed, the initial state is TAILING - # If wait == FALSE, the initial state is COMPLETE (doesn't matter if the job really is - # complete). - # - # The state table: - # - # STATE ACTIONS CONDITION NEW STATE - # ---------------- ---------------- ----------------- ---------------- - # TAILING Read logs, Pause, Get status Job complete JOB_COMPLETE - # Else TAILING - # JOB_COMPLETE Read logs, Pause Any COMPLETE - # COMPLETE Read logs, Exit N/A - # - # Notes: - # - The JOB_COMPLETE state forces us to do an extra pause and read any items that got to - # Cloudwatch after the job was marked complete. - last_describe_job_call = time.time() - last_description = description - last_debug_rule_statuses = None - last_profiler_rule_statuses = None - - while True: - _flush_log_streams( - stream_names, - instance_count, - client, - log_group, - job_name, - positions, - dot, - color_wrap, - ) - if timeout and time.time() > request_end_time: - print("Timeout Exceeded. {} seconds elapsed.".format(timeout)) - break - - if state == LogState.COMPLETE: - break - - time.sleep(poll) - - if state == LogState.JOB_COMPLETE: - state = LogState.COMPLETE - elif time.time() - last_describe_job_call >= 30: - description = sagemaker_client.describe_training_job(TrainingJobName=job_name) - last_describe_job_call = time.time() - - if secondary_training_status_changed(description, last_description): - print() - print(secondary_training_status_message(description, last_description), end="") - last_description = description - - status = description["TrainingJobStatus"] - - if status in ("Completed", "Failed", "Stopped"): - print() - state = LogState.JOB_COMPLETE - - # Print prettified logs related to the status of SageMaker Debugger rules. - debug_rule_statuses = description.get("DebugRuleEvaluationStatuses", {}) - if ( - debug_rule_statuses - and _rule_statuses_changed(debug_rule_statuses, last_debug_rule_statuses) - and (log_type in {"All", "Rules"}) - ): - for status in debug_rule_statuses: - rule_log = ( - f"{status['RuleConfigurationName']}: {status['RuleEvaluationStatus']}" - ) - print(rule_log) - - last_debug_rule_statuses = debug_rule_statuses - - # Print prettified logs related to the status of SageMaker Profiler rules. - profiler_rule_statuses = description.get("ProfilerRuleEvaluationStatuses", {}) - if ( - profiler_rule_statuses - and _rule_statuses_changed(profiler_rule_statuses, last_profiler_rule_statuses) - and (log_type in {"All", "Rules"}) - ): - for status in profiler_rule_statuses: - rule_log = ( - f"{status['RuleConfigurationName']}: {status['RuleEvaluationStatus']}" - ) - print(rule_log) - - last_profiler_rule_statuses = profiler_rule_statuses - - if wait: - _check_job_status(job_name, description, "TrainingJobStatus") - if dot: - print() - # Customers are not billed for hardware provisioning, so billable time is less than - # total time - training_time = description.get("TrainingTimeInSeconds") - billable_time = description.get("BillableTimeInSeconds") - if training_time is not None: - print("Training seconds:", training_time * instance_count) - if billable_time is not None: - print("Billable seconds:", billable_time * instance_count) - if description.get("EnableManagedSpotTraining"): - saving = (1 - float(billable_time) / training_time) * 100 - print("Managed Spot Training savings: {:.1f}%".format(saving)) - return last_description - - -def _check_job_status(job, desc, status_key_name): - """Check to see if the job completed successfully. - - If not, construct and raise a exceptions. (UnexpectedStatusException). - - Args: - job (str): The name of the job to check. - desc (dict[str, str]): The result of ``describe_training_job()``. - status_key_name (str): Status key name to check for. - - Raises: - exceptions.CapacityError: If the training job fails with CapacityError. - exceptions.UnexpectedStatusException: If the training job fails. - """ - status = desc[status_key_name] - # If the status is capital case, then convert it to Camel case - status = _STATUS_CODE_TABLE.get(status, status) - - if status == "Stopped": - logger.warning( - "Job ended with status 'Stopped' rather than 'Completed'. " - "This could mean the job timed out or stopped early for some other reason: " - "Consider checking whether it completed as you expect." - ) - elif status != "Completed": - reason = desc.get("FailureReason", "(No reason provided)") - job_type = status_key_name.replace("JobStatus", " job") - troubleshooting = ( - "https://docs.aws.amazon.com/sagemaker/latest/dg/" - "sagemaker-python-sdk-troubleshooting.html" - ) - message = ( - "Error for {job_type} {job_name}: {status}. Reason: {reason}. " - "Check troubleshooting guide for common errors: {troubleshooting}" - ).format( - job_type=job_type, - job_name=job, - status=status, - reason=reason, - troubleshooting=troubleshooting, - ) - if "CapacityError" in str(reason): - raise exceptions.CapacityError( - message=message, - allowed_statuses=["Completed", "Stopped"], - actual_status=status, - ) - raise exceptions.UnexpectedStatusException( - message=message, - allowed_statuses=["Completed", "Stopped"], - actual_status=status, - ) - - -def _flush_log_streams( - stream_names, instance_count, client, log_group, job_name, positions, dot, color_wrap -): - """Placeholder docstring""" - if len(stream_names) < instance_count: - # Log streams are created whenever a container starts writing to stdout/err, so this list - # may be dynamic until we have a stream for every instance. - try: - streams = client.describe_log_streams( - logGroupName=log_group, - logStreamNamePrefix=job_name + "/", - orderBy="LogStreamName", - limit=min(instance_count, 50), - ) - stream_names = [s["logStreamName"] for s in streams["logStreams"]] - - while "nextToken" in streams: - streams = client.describe_log_streams( - logGroupName=log_group, - logStreamNamePrefix=job_name + "/", - orderBy="LogStreamName", - limit=50, - ) - - stream_names.extend([s["logStreamName"] for s in streams["logStreams"]]) - - positions.update( - [ - (s, sagemaker_logs.Position(timestamp=0, skip=0)) - for s in stream_names - if s not in positions - ] - ) - except ClientError as e: - # On the very first training job run on an account, there's no log group until - # the container starts logging, so ignore any errors thrown about that - err = e.response.get("Error", {}) - if err.get("Code", None) != "ResourceNotFoundException": - raise - - if len(stream_names) > 0: - if dot: - print("") - dot = False - for idx, event in sagemaker_logs.multi_stream_iter( - client, log_group, stream_names, positions - ): - color_wrap(idx, event["message"]) - ts, count = positions[stream_names[idx]] - if event["timestamp"] == ts: - positions[stream_names[idx]] = sagemaker_logs.Position(timestamp=ts, skip=count + 1) - else: - positions[stream_names[idx]] = sagemaker_logs.Position( - timestamp=event["timestamp"], skip=1 - ) - else: - dot = True - print(".", end="") - sys.stdout.flush() - - -def _rule_statuses_changed(current_statuses, last_statuses): - """Checks the rule evaluation statuses for SageMaker Debugger and Profiler rules.""" - if not last_statuses: - return True - - for current, last in zip(current_statuses, last_statuses): - if (current["RuleConfigurationName"] == last["RuleConfigurationName"]) and ( - current["RuleEvaluationStatus"] != last["RuleEvaluationStatus"] - ): - return True - - return False - - -def _get_initial_job_state(description, status_key, wait): - """Placeholder docstring""" - status = description[status_key] - job_already_completed = status in ("Completed", "Failed", "Stopped") - return LogState.TAILING if wait and not job_already_completed else LogState.COMPLETE - - -def _logs_init(boto_session, description, job): - """Placeholder docstring""" - if job == "Training": - if "InstanceGroups" in description["ResourceConfig"]: - instance_count = 0 - for instanceGroup in description["ResourceConfig"]["InstanceGroups"]: - instance_count += instanceGroup["InstanceCount"] - else: - instance_count = description["ResourceConfig"]["InstanceCount"] - elif job == "Transform": - instance_count = description["TransformResources"]["InstanceCount"] - elif job == "Processing": - instance_count = description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] - elif job == "AutoML": - instance_count = 0 - - stream_names = [] # The list of log streams - positions = {} # The current position in each stream, map of stream name -> position - - # Increase retries allowed (from default of 4), as we don't want waiting for a training job - # to be interrupted by a transient exception. - config = botocore.config.Config(retries={"max_attempts": 15}) - client = boto_session.client("logs", config=config) - log_group = "/aws/sagemaker/" + job + "Jobs" - - dot = False - - from sagemaker.core.logs import ColorWrap - - color_wrap = ColorWrap() - - return instance_count, stream_names, positions, client, log_group, dot, color_wrap diff --git a/sagemaker-core/src/sagemaker/core/remote_function/logging_config.py b/sagemaker-core/src/sagemaker/core/remote_function/logging_config.py deleted file mode 100644 index 875fabf6e0..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/logging_config.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Utilities related to logging.""" -from __future__ import absolute_import - -import logging -import time - - -class _UTCFormatter(logging.Formatter): - """Class that overrides the default local time provider in log formatter.""" - - converter = time.gmtime - - -def get_logger(): - """Return a logger with the name 'sagemaker'""" - sagemaker_logger = logging.getLogger("sagemaker.remote_function") - if len(sagemaker_logger.handlers) == 0: - sagemaker_logger.setLevel(logging.INFO) - handler = logging.StreamHandler() - formatter = _UTCFormatter("%(asctime)s %(name)s %(levelname)-8s %(message)s") - handler.setFormatter(formatter) - sagemaker_logger.addHandler(handler) - # don't stream logs with the root logger handler - sagemaker_logger.propagate = 0 - - return sagemaker_logger diff --git a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/__init__.py b/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/__init__.py deleted file mode 100644 index 18557a2eb5..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Sagemaker modules container_drivers directory.""" -from __future__ import absolute_import diff --git a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/bootstrap_runtime_environment.py b/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/bootstrap_runtime_environment.py deleted file mode 100644 index 2c20151ed1..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/bootstrap_runtime_environment.py +++ /dev/null @@ -1,605 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""An entry point for runtime environment. This must be kept independent of SageMaker PySDK""" -from __future__ import absolute_import - -import argparse -import getpass -import json -import multiprocessing -import os -import pathlib -import shutil -import subprocess -import sys -from typing import Any, Dict - -if __package__ is None or __package__ == "": - from runtime_environment_manager import ( - RuntimeEnvironmentManager, - _DependencySettings, - get_logger, - ) -else: - from sagemaker.core.remote_function.runtime_environment.runtime_environment_manager import ( - RuntimeEnvironmentManager, - _DependencySettings, - get_logger, - ) - -SUCCESS_EXIT_CODE = 0 -DEFAULT_FAILURE_CODE = 1 - -REMOTE_FUNCTION_WORKSPACE = "sm_rf_user_ws" -BASE_CHANNEL_PATH = "/opt/ml/input/data" -FAILURE_REASON_PATH = "/opt/ml/output/failure" -JOB_OUTPUT_DIRS = ["/opt/ml/input", "/opt/ml/output", "/opt/ml/model", "/tmp"] -PRE_EXECUTION_SCRIPT_NAME = "pre_exec.sh" -JOB_REMOTE_FUNCTION_WORKSPACE = "sagemaker_remote_function_workspace" -SCRIPT_AND_DEPENDENCIES_CHANNEL_NAME = "pre_exec_script_and_dependencies" - -SM_MODEL_DIR = "/opt/ml/model" - -SM_INPUT_DIR = "/opt/ml/input" -SM_INPUT_DATA_DIR = "/opt/ml/input/data" -SM_INPUT_CONFIG_DIR = "/opt/ml/input/config" - -SM_OUTPUT_DIR = "/opt/ml/output" -SM_OUTPUT_FAILURE = "/opt/ml/output/failure" -SM_OUTPUT_DATA_DIR = "/opt/ml/output/data" - -SM_MASTER_ADDR = "algo-1" -SM_MASTER_PORT = 7777 - -RESOURCE_CONFIG = f"{SM_INPUT_CONFIG_DIR}/resourceconfig.json" -ENV_OUTPUT_FILE = "/opt/ml/input/sm_training.env" - -SENSITIVE_KEYWORDS = ["SECRET", "PASSWORD", "KEY", "TOKEN", "PRIVATE", "CREDS", "CREDENTIALS"] -HIDDEN_VALUE = "******" - -SM_EFA_NCCL_INSTANCES = [ - "ml.g4dn.8xlarge", - "ml.g4dn.12xlarge", - "ml.g5.48xlarge", - "ml.p3dn.24xlarge", - "ml.p4d.24xlarge", - "ml.p4de.24xlarge", - "ml.p5.48xlarge", - "ml.trn1.32xlarge", -] - -SM_EFA_RDMA_INSTANCES = [ - "ml.p4d.24xlarge", - "ml.p4de.24xlarge", - "ml.trn1.32xlarge", -] - -logger = get_logger() - - -def _bootstrap_runtime_env_for_remote_function( - client_python_version: str, - conda_env: str = None, - dependency_settings: _DependencySettings = None, -): - """Bootstrap runtime environment for remote function invocation. - - Args: - client_python_version (str): Python version at the client side. - conda_env (str): conda environment to be activated. Default is None. - dependency_settings (dict): Settings for installing dependencies. - """ - - workspace_unpack_dir = _unpack_user_workspace() - if not workspace_unpack_dir: - logger.info("No workspace to unpack and setup.") - return - - _handle_pre_exec_scripts(workspace_unpack_dir) - - _install_dependencies( - workspace_unpack_dir, - conda_env, - client_python_version, - REMOTE_FUNCTION_WORKSPACE, - dependency_settings, - ) - - -def _bootstrap_runtime_env_for_pipeline_step( - client_python_version: str, - func_step_workspace: str, - conda_env: str = None, - dependency_settings: _DependencySettings = None, -): - """Bootstrap runtime environment for pipeline step invocation. - - Args: - client_python_version (str): Python version at the client side. - func_step_workspace (str): s3 folder where workspace for FunctionStep is stored - conda_env (str): conda environment to be activated. Default is None. - dependency_settings (dict): Name of the dependency file. Default is None. - """ - - workspace_dir = _unpack_user_workspace(func_step_workspace) - if not workspace_dir: - os.mkdir(JOB_REMOTE_FUNCTION_WORKSPACE) - workspace_dir = pathlib.Path(os.getcwd(), JOB_REMOTE_FUNCTION_WORKSPACE).absolute() - - pre_exec_script_and_dependencies_dir = os.path.join( - BASE_CHANNEL_PATH, SCRIPT_AND_DEPENDENCIES_CHANNEL_NAME - ) - - if not os.path.exists(pre_exec_script_and_dependencies_dir): - logger.info("No dependencies to bootstrap") - return - for file in os.listdir(pre_exec_script_and_dependencies_dir): - src_path = os.path.join(pre_exec_script_and_dependencies_dir, file) - dest_path = os.path.join(workspace_dir, file) - shutil.copy(src_path, dest_path) - - _handle_pre_exec_scripts(workspace_dir) - - _install_dependencies( - workspace_dir, - conda_env, - client_python_version, - SCRIPT_AND_DEPENDENCIES_CHANNEL_NAME, - dependency_settings, - ) - - -def _handle_pre_exec_scripts(script_file_dir: str): - """Run the pre execution scripts. - - Args: - script_file_dir (str): Directory in the container where pre-execution scripts exists. - """ - - path_to_pre_exec_script = os.path.join(script_file_dir, PRE_EXECUTION_SCRIPT_NAME) - if os.path.isfile(path_to_pre_exec_script): - RuntimeEnvironmentManager().run_pre_exec_script( - pre_exec_script_path=path_to_pre_exec_script - ) - - -def _install_dependencies( - dependency_file_dir: str, - conda_env: str, - client_python_version: str, - channel_name: str, - dependency_settings: _DependencySettings = None, -): - """Install dependencies in the job container - - Args: - dependency_file_dir (str): Directory in the container where dependency file exists. - conda_env (str): conda environment to be activated. - client_python_version (str): Python version at the client side. - channel_name (str): Channel where dependency file was uploaded. - dependency_settings (dict): Settings for installing dependencies. - """ - - if dependency_settings is not None and dependency_settings.dependency_file is None: - # an empty dict is passed when no dependencies are specified - logger.info("No dependencies to install.") - elif dependency_settings is not None: - dependencies_file = os.path.join(dependency_file_dir, dependency_settings.dependency_file) - RuntimeEnvironmentManager().bootstrap( - local_dependencies_file=dependencies_file, - conda_env=conda_env, - client_python_version=client_python_version, - ) - else: - # no dependency file name is passed when an legacy version of the SDK is used - # we look for a file with .txt, .yml or .yaml extension in the workspace directory - dependencies_file = None - for file in os.listdir(dependency_file_dir): - if file.endswith(".txt") or file.endswith(".yml") or file.endswith(".yaml"): - dependencies_file = os.path.join(dependency_file_dir, file) - break - - if dependencies_file: - RuntimeEnvironmentManager().bootstrap( - local_dependencies_file=dependencies_file, - conda_env=conda_env, - client_python_version=client_python_version, - ) - else: - logger.info( - "Did not find any dependency file in the directory at '%s'." - " Assuming no additional dependencies to install.", - os.path.join(BASE_CHANNEL_PATH, channel_name), - ) - - -def _unpack_user_workspace(func_step_workspace: str = None): - """Unzip the user workspace""" - - workspace_archive_dir_path = ( - os.path.join(BASE_CHANNEL_PATH, REMOTE_FUNCTION_WORKSPACE) - if not func_step_workspace - else os.path.join(BASE_CHANNEL_PATH, func_step_workspace) - ) - if not os.path.exists(workspace_archive_dir_path): - logger.info( - "Directory '%s' does not exist.", - workspace_archive_dir_path, - ) - return None - - workspace_archive_path = os.path.join(workspace_archive_dir_path, "workspace.zip") - if not os.path.isfile(workspace_archive_path): - logger.info( - "Workspace archive '%s' does not exist.", - workspace_archive_dir_path, - ) - return None - - workspace_unpack_dir = pathlib.Path(os.getcwd()).absolute() - shutil.unpack_archive(filename=workspace_archive_path, extract_dir=workspace_unpack_dir) - logger.info("Successfully unpacked workspace archive at '%s'.", workspace_unpack_dir) - workspace_unpack_dir = pathlib.Path(workspace_unpack_dir, JOB_REMOTE_FUNCTION_WORKSPACE) - return workspace_unpack_dir - - -def _write_failure_reason_file(failure_msg): - """Create a file 'failure' with failure reason written if bootstrap runtime env failed. - - See: https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-training-algo.html - Args: - failure_msg: The content of file to be written. - """ - if not os.path.exists(FAILURE_REASON_PATH): - with open(FAILURE_REASON_PATH, "w") as f: - f.write("RuntimeEnvironmentError: " + failure_msg) - - -def _parse_args(sys_args): - """Parses CLI arguments.""" - parser = argparse.ArgumentParser() - parser.add_argument("--job_conda_env", type=str) - parser.add_argument("--client_python_version", type=str) - parser.add_argument("--client_sagemaker_pysdk_version", type=str, default=None) - parser.add_argument("--pipeline_execution_id", type=str) - parser.add_argument("--dependency_settings", type=str) - parser.add_argument("--func_step_s3_dir", type=str) - parser.add_argument("--distribution", type=str, default=None) - parser.add_argument("--user_nproc_per_node", type=str, default=None) - args, _ = parser.parse_known_args(sys_args) - return args - - -def log_key_value(key: str, value: str): - """Log a key-value pair, masking sensitive values if necessary.""" - if any(keyword.lower() in key.lower() for keyword in SENSITIVE_KEYWORDS): - logger.info("%s=%s", key, HIDDEN_VALUE) - elif isinstance(value, dict): - masked_value = mask_sensitive_info(value) - logger.info("%s=%s", key, json.dumps(masked_value)) - else: - try: - decoded_value = json.loads(value) - if isinstance(decoded_value, dict): - masked_value = mask_sensitive_info(decoded_value) - logger.info("%s=%s", key, json.dumps(masked_value)) - else: - logger.info("%s=%s", key, decoded_value) - except (json.JSONDecodeError, TypeError): - logger.info("%s=%s", key, value) - - -def log_env_variables(env_vars_dict: Dict[str, Any]): - """Log Environment Variables from the environment and an env_vars_dict.""" - for key, value in os.environ.items(): - log_key_value(key, value) - - for key, value in env_vars_dict.items(): - log_key_value(key, value) - - -def mask_sensitive_info(data): - """Recursively mask sensitive information in a dictionary.""" - if isinstance(data, dict): - for k, v in data.items(): - if isinstance(v, dict): - data[k] = mask_sensitive_info(v) - elif isinstance(v, str) and any( - keyword.lower() in k.lower() for keyword in SENSITIVE_KEYWORDS - ): - data[k] = HIDDEN_VALUE - return data - - -def num_cpus() -> int: - """Return the number of CPUs available in the current container. - - Returns: - int: Number of CPUs available in the current container. - """ - return multiprocessing.cpu_count() - - -def num_gpus() -> int: - """Return the number of GPUs available in the current container. - - Returns: - int: Number of GPUs available in the current container. - """ - try: - cmd = ["nvidia-smi", "--list-gpus"] - output = subprocess.check_output(cmd).decode("utf-8") - return sum(1 for line in output.splitlines() if line.startswith("GPU ")) - except (OSError, subprocess.CalledProcessError): - logger.info("No GPUs detected (normal if no gpus installed)") - return 0 - - -def num_neurons() -> int: - """Return the number of neuron cores available in the current container. - - Returns: - int: Number of Neuron Cores available in the current container. - """ - try: - cmd = ["neuron-ls", "-j"] - output = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode("utf-8") - j = json.loads(output) - neuron_cores = 0 - for item in j: - neuron_cores += item.get("nc_count", 0) - logger.info("Found %s neurons on this instance", neuron_cores) - return neuron_cores - except OSError: - logger.info("No Neurons detected (normal if no neurons installed)") - return 0 - except subprocess.CalledProcessError as e: - if e.output is not None: - try: - msg = e.output.decode("utf-8").partition("error=")[2] - logger.info( - "No Neurons detected (normal if no neurons installed). \ - If neuron installed then %s", - msg, - ) - except AttributeError: - logger.info("No Neurons detected (normal if no neurons installed)") - else: - logger.info("No Neurons detected (normal if no neurons installed)") - - return 0 - - -def safe_serialize(data): - """Serialize the data without wrapping strings in quotes. - - This function handles the following cases: - 1. If `data` is a string, it returns the string as-is without wrapping in quotes. - 2. If `data` is serializable (e.g., a dictionary, list, int, float), it returns - the JSON-encoded string using `json.dumps()`. - 3. If `data` cannot be serialized (e.g., a custom object), it returns the string - representation of the data using `str(data)`. - - Args: - data (Any): The data to serialize. - - Returns: - str: The serialized JSON-compatible string or the string representation of the input. - """ - if isinstance(data, str): - return data - try: - return json.dumps(data) - except TypeError: - return str(data) - - -def set_env( - resource_config: Dict[str, Any], - distribution: str = None, - user_nproc_per_node: bool = None, - output_file: str = ENV_OUTPUT_FILE, -): - """Set environment variables for the training job container. - - Args: - resource_config (Dict[str, Any]): Resource configuration for the training job. - output_file (str): Output file to write the environment variables. - """ - # Constants - env_vars = { - "SM_MODEL_DIR": SM_MODEL_DIR, - "SM_INPUT_DIR": SM_INPUT_DIR, - "SM_INPUT_DATA_DIR": SM_INPUT_DATA_DIR, - "SM_INPUT_CONFIG_DIR": SM_INPUT_CONFIG_DIR, - "SM_OUTPUT_DIR": SM_OUTPUT_DIR, - "SM_OUTPUT_FAILURE": SM_OUTPUT_FAILURE, - "SM_OUTPUT_DATA_DIR": SM_OUTPUT_DATA_DIR, - "SM_MASTER_ADDR": SM_MASTER_ADDR, - "SM_MASTER_PORT": SM_MASTER_PORT, - } - - # Host Variables - current_host = resource_config["current_host"] - current_instance_type = resource_config["current_instance_type"] - hosts = resource_config["hosts"] - sorted_hosts = sorted(hosts) - - env_vars["SM_CURRENT_HOST"] = current_host - env_vars["SM_CURRENT_INSTANCE_TYPE"] = current_instance_type - env_vars["SM_HOSTS"] = sorted_hosts - env_vars["SM_NETWORK_INTERFACE_NAME"] = resource_config["network_interface_name"] - env_vars["SM_HOST_COUNT"] = len(sorted_hosts) - env_vars["SM_CURRENT_HOST_RANK"] = sorted_hosts.index(current_host) - - env_vars["SM_NUM_CPUS"] = num_cpus() - env_vars["SM_NUM_GPUS"] = num_gpus() - env_vars["SM_NUM_NEURONS"] = num_neurons() - - # Misc. - env_vars["SM_RESOURCE_CONFIG"] = resource_config - - if user_nproc_per_node is not None and int(user_nproc_per_node) > 0: - env_vars["SM_NPROC_PER_NODE"] = int(user_nproc_per_node) - else: - if int(env_vars["SM_NUM_GPUS"]) > 0: - env_vars["SM_NPROC_PER_NODE"] = int(env_vars["SM_NUM_GPUS"]) - elif int(env_vars["SM_NUM_NEURONS"]) > 0: - env_vars["SM_NPROC_PER_NODE"] = int(env_vars["SM_NUM_NEURONS"]) - else: - env_vars["SM_NPROC_PER_NODE"] = int(env_vars["SM_NUM_CPUS"]) - - # All Training Environment Variables - env_vars["SM_TRAINING_ENV"] = { - "current_host": env_vars["SM_CURRENT_HOST"], - "current_instance_type": env_vars["SM_CURRENT_INSTANCE_TYPE"], - "hosts": env_vars["SM_HOSTS"], - "host_count": env_vars["SM_HOST_COUNT"], - "nproc_per_node": env_vars["SM_NPROC_PER_NODE"], - "master_addr": env_vars["SM_MASTER_ADDR"], - "master_port": env_vars["SM_MASTER_PORT"], - "input_config_dir": env_vars["SM_INPUT_CONFIG_DIR"], - "input_data_dir": env_vars["SM_INPUT_DATA_DIR"], - "input_dir": env_vars["SM_INPUT_DIR"], - "job_name": os.environ["TRAINING_JOB_NAME"], - "model_dir": env_vars["SM_MODEL_DIR"], - "network_interface_name": env_vars["SM_NETWORK_INTERFACE_NAME"], - "num_cpus": env_vars["SM_NUM_CPUS"], - "num_gpus": env_vars["SM_NUM_GPUS"], - "num_neurons": env_vars["SM_NUM_NEURONS"], - "output_data_dir": env_vars["SM_OUTPUT_DATA_DIR"], - "resource_config": env_vars["SM_RESOURCE_CONFIG"], - } - - if distribution and distribution == "torchrun": - logger.info("Distribution: torchrun") - - instance_type = env_vars["SM_CURRENT_INSTANCE_TYPE"] - network_interface_name = env_vars.get("SM_NETWORK_INTERFACE_NAME", "eth0") - - if instance_type in SM_EFA_NCCL_INSTANCES: - # Enable EFA use - env_vars["FI_PROVIDER"] = "efa" - if instance_type in SM_EFA_RDMA_INSTANCES: - # Use EFA's RDMA functionality for one-sided and two-sided transfer - env_vars["FI_EFA_USE_DEVICE_RDMA"] = "1" - env_vars["RDMAV_FORK_SAFE"] = "1" - env_vars["NCCL_SOCKET_IFNAME"] = str(network_interface_name) - env_vars["NCCL_PROTO"] = "simple" - elif distribution and distribution == "mpirun": - logger.info("Distribution: mpirun") - - env_vars["MASTER_ADDR"] = env_vars["SM_MASTER_ADDR"] - env_vars["MASTER_PORT"] = str(env_vars["SM_MASTER_PORT"]) - - host_list = [ - "{}:{}".format(host, int(env_vars["SM_NPROC_PER_NODE"])) for host in sorted_hosts - ] - env_vars["SM_HOSTS_LIST"] = ",".join(host_list) - - instance_type = env_vars["SM_CURRENT_INSTANCE_TYPE"] - - if instance_type in SM_EFA_NCCL_INSTANCES: - env_vars["SM_FI_PROVIDER"] = "-x FI_PROVIDER=efa" - env_vars["SM_NCCL_PROTO"] = "-x NCCL_PROTO=simple" - else: - env_vars["SM_FI_PROVIDER"] = "" - env_vars["SM_NCCL_PROTO"] = "" - - if instance_type in SM_EFA_RDMA_INSTANCES: - env_vars["SM_FI_EFA_USE_DEVICE_RDMA"] = "-x FI_EFA_USE_DEVICE_RDMA=1" - else: - env_vars["SM_FI_EFA_USE_DEVICE_RDMA"] = "" - - with open(output_file, "w") as f: - for key, value in env_vars.items(): - f.write(f"export {key}='{safe_serialize(value)}'\n") - - logger.info("Environment Variables:") - log_env_variables(env_vars_dict=env_vars) - - -def main(sys_args=None): - """Entry point for bootstrap script""" - - exit_code = DEFAULT_FAILURE_CODE - - try: - args = _parse_args(sys_args) - - logger.info("Arguments:") - for arg in vars(args): - logger.info("%s=%s", arg, getattr(args, arg)) - - client_python_version = args.client_python_version - client_sagemaker_pysdk_version = args.client_sagemaker_pysdk_version - job_conda_env = args.job_conda_env - pipeline_execution_id = args.pipeline_execution_id - dependency_settings = _DependencySettings.from_string(args.dependency_settings) - func_step_workspace = args.func_step_s3_dir - distribution = args.distribution - user_nproc_per_node = args.user_nproc_per_node - - conda_env = job_conda_env or os.getenv("SAGEMAKER_JOB_CONDA_ENV") - - RuntimeEnvironmentManager()._validate_python_version(client_python_version, conda_env) - - user = getpass.getuser() - if user != "root": - log_message = ( - "The job is running on non-root user: %s. Adding write permissions to the " - "following job output directories: %s." - ) - logger.info(log_message, user, JOB_OUTPUT_DIRS) - RuntimeEnvironmentManager().change_dir_permission( - dirs=JOB_OUTPUT_DIRS, new_permission="777" - ) - - if pipeline_execution_id: - _bootstrap_runtime_env_for_pipeline_step( - client_python_version, func_step_workspace, conda_env, dependency_settings - ) - else: - _bootstrap_runtime_env_for_remote_function( - client_python_version, conda_env, dependency_settings - ) - - RuntimeEnvironmentManager()._validate_sagemaker_pysdk_version( - client_sagemaker_pysdk_version - ) - - if os.path.exists(RESOURCE_CONFIG): - try: - logger.info("Found %s", RESOURCE_CONFIG) - with open(RESOURCE_CONFIG, "r") as f: - resource_config = json.load(f) - set_env( - resource_config=resource_config, - distribution=distribution, - user_nproc_per_node=user_nproc_per_node, - ) - except (json.JSONDecodeError, FileNotFoundError) as e: - # Optionally, you might want to log this error - logger.info("ERROR: Error processing %s: %s", RESOURCE_CONFIG, str(e)) - - exit_code = SUCCESS_EXIT_CODE - except Exception as e: # pylint: disable=broad-except - logger.exception("Error encountered while bootstrapping runtime environment: %s", e) - - _write_failure_reason_file(str(e)) - finally: - sys.exit(exit_code) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/mpi_utils_remote.py b/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/mpi_utils_remote.py deleted file mode 100644 index f36e17a04c..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/mpi_utils_remote.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""An utils function for runtime environment. This must be kept independent of SageMaker PySDK""" -from __future__ import absolute_import - -import argparse -import json -import os -import subprocess -import sys -import time -from typing import List - -import paramiko - -if __package__ is None or __package__ == "": - from runtime_environment_manager import ( - get_logger, - ) -else: - from sagemaker.core.remote_function.runtime_environment.runtime_environment_manager import ( - get_logger, - ) - -SUCCESS_EXIT_CODE = 0 -DEFAULT_FAILURE_CODE = 1 - -FINISHED_STATUS_FILE = "/tmp/done.algo-1" -READY_FILE = "/tmp/ready.%s" -DEFAULT_SSH_PORT = 22 - -FAILURE_REASON_PATH = "/opt/ml/output/failure" -FINISHED_STATUS_FILE = "/tmp/done.algo-1" - -logger = get_logger() - - -class CustomHostKeyPolicy(paramiko.client.MissingHostKeyPolicy): - """Class to handle host key policy for SageMaker distributed training SSH connections. - - Example: - >>> client = paramiko.SSHClient() - >>> client.set_missing_host_key_policy(CustomHostKeyPolicy()) - >>> # Will succeed for SageMaker algorithm containers - >>> client.connect('algo-1234.internal') - >>> # Will raise SSHException for other unknown hosts - >>> client.connect('unknown-host') # raises SSHException - """ - - def missing_host_key(self, client, hostname, key): - """Accept host keys for algo-* hostnames, reject others. - - Args: - client: The SSHClient instance - hostname: The hostname attempting to connect - key: The host key - Raises: - paramiko.SSHException: If hostname doesn't match algo-* pattern - """ - if hostname.startswith("algo-"): - client.get_host_keys().add(hostname, key.get_name(), key) - return - raise paramiko.SSHException(f"Unknown host key for {hostname}") - - -def _parse_args(sys_args): - """Parses CLI arguments.""" - parser = argparse.ArgumentParser() - parser.add_argument("--job_ended", type=str, default="0") - args, _ = parser.parse_known_args(sys_args) - return args - - -def _can_connect(host: str, port: int = DEFAULT_SSH_PORT) -> bool: - """Check if the connection to the provided host and port is possible.""" - try: - with paramiko.SSHClient() as client: - client.load_system_host_keys() - client.set_missing_host_key_policy(CustomHostKeyPolicy()) - client.connect(host, port=port) - logger.info("Can connect to host %s", host) - return True - except Exception as e: # pylint: disable=W0703 - logger.info("Cannot connect to host %s", host) - logger.debug("Connection failed with exception: %s", e) - return False - - -def _write_file_to_host(host: str, status_file: str) -> bool: - """Write the a file to the provided host.""" - try: - logger.info("Writing %s to %s", status_file, host) - subprocess.run( - ["ssh", host, "touch", f"{status_file}"], - capture_output=True, - text=True, - check=True, - ) - logger.info("Finished writing status file") - return True - except subprocess.CalledProcessError: - logger.info("Cannot connect to %s", host) - return False - - -def _write_failure_reason_file(failure_msg): - """Create a file 'failure' with failure reason written if bootstrap runtime env failed. - - See: https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-training-algo.html - Args: - failure_msg: The content of file to be written. - """ - if not os.path.exists(FAILURE_REASON_PATH): - with open(FAILURE_REASON_PATH, "w") as f: - f.write("RuntimeEnvironmentError: " + failure_msg) - - -def _wait_for_master(master_host: str, port: int = DEFAULT_SSH_PORT, timeout: int = 300): - """Worker nodes wait until they can connect to the master node.""" - start_time = time.time() - while True: - logger.info("Worker is attempting to connect to the master node %s...", master_host) - if _can_connect(master_host, port): - logger.info("Worker can connect to master node %s.", master_host) - break - if time.time() - start_time > timeout: - raise TimeoutError("Timed out waiting for master %s to be reachable." % master_host) - - time.sleep(5) # Wait for 5 seconds before trying again - - -def _wait_for_status_file(status_file: str): - """Wait for the status file to be created.""" - logger.info("Waiting for status file %s", status_file) - while not os.path.exists(status_file): - time.sleep(30) - logger.info("Found status file %s", status_file) - - -def _wait_for_workers(worker_hosts: List[str], port: int = DEFAULT_SSH_PORT, timeout: int = 300): - """Master node waits until it can connect to all worker nodes.""" - start_time = time.time() - if not worker_hosts: - logger.info("No worker nodes to connect to.") - return - - while True: - logger.info("Master is attempting to connect to all workers...") - all_workers_connected = all( - _can_connect(worker, port) and os.path.exists(READY_FILE % worker) - for worker in worker_hosts - ) - - if all_workers_connected: - logger.info("Master can connect to all worker nodes.") - break - if time.time() - start_time > timeout: - raise TimeoutError("Timed out waiting for workers to be reachable.") - - time.sleep(5) # Wait for 5 seconds before trying again - - -def bootstrap_master_node(worker_hosts: List[str]): - """Bootstrap the master node.""" - logger.info("Bootstrapping master node...") - _wait_for_workers(worker_hosts) - - -def bootstrap_worker_node( - master_host: str, current_host: str, status_file: str = FINISHED_STATUS_FILE -): - """Bootstrap the worker nodes.""" - logger.info("Bootstrapping worker node...") - _wait_for_master(master_host) - _write_file_to_host(master_host, READY_FILE % current_host) - _wait_for_status_file(status_file) - - -def start_sshd_daemon(): - """Start the SSH daemon on the current node.""" - sshd_executable = "/usr/sbin/sshd" - - if not os.path.exists(sshd_executable): - raise RuntimeError("SSH daemon not found.") - - # Start the sshd in daemon mode (-D) - subprocess.Popen([sshd_executable, "-D"]) - logger.info("Started SSH daemon.") - - -def write_status_file_to_workers(worker_hosts: List[str], status_file: str = FINISHED_STATUS_FILE): - """Write the status file to all worker nodes.""" - for worker in worker_hosts: - retry = 0 - while not _write_file_to_host(worker, status_file): - time.sleep(5) - retry += 1 - if retry > 5: - raise TimeoutError("Timed out waiting for %s to be reachable." % worker) - logger.info("Retrying to write status file to %s", worker) - - -def main(sys_args=None): - """Entry point for bootstrap script""" - try: - args = _parse_args(sys_args) - - job_ended = args.job_ended - - main_host = os.environ["SM_MASTER_ADDR"] - current_host = os.environ["SM_CURRENT_HOST"] - - if job_ended == "0": - logger.info("Job is running, bootstrapping nodes") - - start_sshd_daemon() - - if current_host != main_host: - bootstrap_worker_node(main_host, current_host) - else: - sorted_hosts = json.loads(os.environ["SM_HOSTS"]) - worker_hosts = [host for host in sorted_hosts if host != main_host] - - bootstrap_master_node(worker_hosts) - else: - logger.info("Job ended, writing status file to workers") - - if current_host == main_host: - sorted_hosts = json.loads(os.environ["SM_HOSTS"]) - worker_hosts = [host for host in sorted_hosts if host != main_host] - - write_status_file_to_workers(worker_hosts) - except Exception as e: # pylint: disable=broad-except - logger.exception("Error encountered while bootstrapping runtime environment: %s", e) - - _write_failure_reason_file(str(e)) - - sys.exit(DEFAULT_FAILURE_CODE) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/runtime_environment_manager.py b/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/runtime_environment_manager.py deleted file mode 100644 index 5f00317c23..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/runtime_environment_manager.py +++ /dev/null @@ -1,554 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""SageMaker runtime environment module. This must be kept independent of SageMaker PySDK""" - -from __future__ import absolute_import - - -import logging -import sys -import shlex -import os -import subprocess -import time -import dataclasses -import json - - -class _UTCFormatter(logging.Formatter): - """Class that overrides the default local time provider in log formatter.""" - - converter = time.gmtime - - -def get_logger(): - """Return a logger with the name 'sagemaker'""" - sagemaker_logger = logging.getLogger("sagemaker.remote_function") - if len(sagemaker_logger.handlers) == 0: - sagemaker_logger.setLevel(logging.INFO) - handler = logging.StreamHandler() - formatter = _UTCFormatter("%(asctime)s %(name)s %(levelname)-8s %(message)s") - handler.setFormatter(formatter) - sagemaker_logger.addHandler(handler) - # don't stream logs with the root logger handler - sagemaker_logger.propagate = 0 - - return sagemaker_logger - - -logger = get_logger() - - -@dataclasses.dataclass -class _DependencySettings: - """Dependency settings for the remote function. - - Instructs the runtime environment script on how to handle dependencies. - If ``dependency_file`` is set, the runtime environment script will attempt - to install the dependencies. If ``dependency_file`` is not set, the runtime - environment script will assume no dependencies are required. - """ - - dependency_file: str = None - - def to_string(self): - """Converts the dependency settings to a string.""" - return json.dumps(dataclasses.asdict(self)) - - @staticmethod - def from_string(dependency_settings_string): - """Converts a json string to dependency settings. - - Args: - dependency_settings_string (str): The json string to convert. - """ - if dependency_settings_string is None: - return None - dependency_settings_dict = json.loads(dependency_settings_string) - return _DependencySettings(dependency_settings_dict.get("dependency_file")) - - @staticmethod - def from_dependency_file_path(dependency_file_path): - """Converts a dependency file path to dependency settings. - - Args: - dependency_file_path (str): The path to the dependency file. - """ - if dependency_file_path is None: - return _DependencySettings() - if dependency_file_path == "auto_capture": - return _DependencySettings("env_snapshot.yml") - return _DependencySettings(os.path.basename(dependency_file_path)) - - -class RuntimeEnvironmentManager: - """Runtime Environment Manager class to manage runtime environment.""" - - def _validate_path(self, path: str) -> str: - """Validate and sanitize file path to prevent path traversal attacks. - - Args: - path (str): The file path to validate - - Returns: - str: The validated absolute path - - Raises: - ValueError: If the path is invalid or contains suspicious patterns - """ - if not path: - raise ValueError("Path cannot be empty") - - # Get absolute path to prevent path traversal - abs_path = os.path.abspath(path) - - # Check for null bytes (common in path traversal attacks) - if '\x00' in path: - raise ValueError(f"Invalid path contains null byte: {path}") - - return abs_path - - def _validate_env_name(self, env_name: str) -> None: - """Validate conda environment name to prevent command injection. - - Args: - env_name (str): The environment name to validate - - Raises: - ValueError: If the environment name contains invalid characters - """ - if not env_name: - raise ValueError("Environment name cannot be empty") - - # Allow only alphanumeric, underscore, and hyphen - import re - if not re.match(r'^[a-zA-Z0-9_-]+$', env_name): - raise ValueError( - f"Invalid environment name '{env_name}'. " - "Only alphanumeric characters, underscores, and hyphens are allowed." - ) - - def snapshot(self, dependencies: str = None) -> str: - """Creates snapshot of the user's environment - - If a req.txt or conda.yml file is provided, it verifies their existence and - returns the local file path - If ``auto_capture`` is set, this method will take the snapshot of - user's dependencies installed in the local runtime. - Current support for ``auto_capture``: - * conda env, generate a yml file and return it's local path - - Args: - dependencies (str): Local path where dependencies file exists. - - Returns: - file path of the existing or generated dependencies file - """ - - # No additional dependencies specified - if dependencies is None: - return None - - if dependencies == "auto_capture": - return self._capture_from_local_runtime() - - # Dependencies specified as either req.txt or conda_env.yml - if ( - dependencies.endswith(".txt") - or dependencies.endswith(".yml") - or dependencies.endswith(".yaml") - ): - self._is_file_exists(dependencies) - return dependencies - - raise ValueError(f'Invalid dependencies provided: "{dependencies}"') - - def _capture_from_local_runtime(self) -> str: - """Generates dependencies list from the user's local runtime. - - Raises RuntimeEnvironmentError if not able to. - - Currently supports: conda environments - """ - - # Try to capture dependencies from the conda environment, if any. - conda_env_name = self._get_active_conda_env_name() - conda_env_prefix = self._get_active_conda_env_prefix() - if conda_env_name: - logger.info("Found conda_env_name: '%s'", conda_env_name) - elif conda_env_prefix: - logger.info("Found conda_env_prefix: '%s'", conda_env_prefix) - else: - raise ValueError("No conda environment seems to be active.") - - if conda_env_name == "base": - logger.warning( - "We recommend using an environment other than base to " - "isolate your project dependencies from conda dependencies" - ) - - local_dependencies_path = os.path.join(os.getcwd(), "env_snapshot.yml") - self._export_conda_env_from_prefix(conda_env_prefix, local_dependencies_path) - - return local_dependencies_path - - def _get_active_conda_env_prefix(self) -> str: - """Returns the conda prefix from the set environment variable. None otherwise.""" - return os.getenv("CONDA_PREFIX") - - def _get_active_conda_env_name(self) -> str: - """Returns the conda environment name from the set environment variable. None otherwise.""" - return os.getenv("CONDA_DEFAULT_ENV") - - def bootstrap( - self, local_dependencies_file: str, client_python_version: str, conda_env: str = None - ): - """Bootstraps the runtime environment by installing the additional dependencies if any. - - Args: - local_dependencies_file (str): path where dependencies file exists. - conda_env (str): conda environment to be activated. Default is None. - - Returns: None - """ - - if local_dependencies_file.endswith(".txt"): - if conda_env: - self._install_req_txt_in_conda_env(conda_env, local_dependencies_file) - self._write_conda_env_to_file(conda_env) - - else: - self._install_requirements_txt(local_dependencies_file, _python_executable()) - - elif local_dependencies_file.endswith(".yml") or local_dependencies_file.endswith(".yaml"): - if conda_env: - self._update_conda_env(conda_env, local_dependencies_file) - else: - conda_env = "sagemaker-runtime-env" - self._create_conda_env(conda_env, local_dependencies_file) - self._validate_python_version(client_python_version, conda_env) - self._write_conda_env_to_file(conda_env) - - def run_pre_exec_script(self, pre_exec_script_path: str): - """Runs script of pre-execution commands if existing. - - Args: - pre_exec_script_path (str): Path to pre-execution command script file. - """ - if os.path.isfile(pre_exec_script_path): - logger.info("Running pre-execution commands in '%s'", pre_exec_script_path) - return_code, error_logs = _run_pre_execution_command_script(pre_exec_script_path) - - if return_code: - error_message = ( - f"Encountered error while running pre-execution commands. Reason: {error_logs}" - ) - raise RuntimeEnvironmentError(error_message) - else: - logger.info( - "'%s' does not exist. Assuming no pre-execution commands to run", - pre_exec_script_path, - ) - - def change_dir_permission(self, dirs: list, new_permission: str): - """Change the permission of given directories - - Args: - dirs (list[str]): A list of directories for permission update. - new_permission (str): The new permission for the given directories. - """ - - _ERROR_MSG_PREFIX = "Failed to change directory permissions due to: " - command = ["sudo", "chmod", "-R", new_permission] + dirs - logger.info("Executing '%s'.", " ".join(command)) - - try: - subprocess.run(command, check=True, stderr=subprocess.PIPE) - except subprocess.CalledProcessError as called_process_err: - err_msg = called_process_err.stderr.decode("utf-8") - raise RuntimeEnvironmentError(f"{_ERROR_MSG_PREFIX} {err_msg}") - except FileNotFoundError as file_not_found_err: - if "[Errno 2] No such file or directory: 'sudo'" in str(file_not_found_err): - raise RuntimeEnvironmentError( - f"{_ERROR_MSG_PREFIX} {file_not_found_err}. " - "Please contact the image owner to install 'sudo' in the job container " - "and provide sudo privilege to the container user." - ) - raise RuntimeEnvironmentError(file_not_found_err) - - def _is_file_exists(self, dependencies): - """Check whether the dependencies file exists at the given location. - - Raises error if not - """ - if not os.path.isfile(dependencies): - raise ValueError(f'No dependencies file named "{dependencies}" was found.') - - def _install_requirements_txt(self, local_path, python_executable): - """Install requirements.txt file""" - # Validate path to prevent command injection - validated_path = self._validate_path(local_path) - cmd = [python_executable, "-m", "pip", "install", "-r", validated_path, "-U"] - logger.info("Running command: '%s' in the dir: '%s' ", " ".join(cmd), os.getcwd()) - _run_shell_cmd(cmd) - logger.info("Command %s ran successfully", " ".join(cmd)) - - def _create_conda_env(self, env_name, local_path): - """Create conda env using conda yml file""" - # Validate inputs to prevent command injection - self._validate_env_name(env_name) - validated_path = self._validate_path(local_path) - - cmd = [self._get_conda_exe(), "env", "create", "-n", env_name, "--file", validated_path] - logger.info("Creating conda environment %s using: %s.", env_name, " ".join(cmd)) - _run_shell_cmd(cmd) - logger.info("Conda environment %s created successfully.", env_name) - - def _install_req_txt_in_conda_env(self, env_name, local_path): - """Install requirements.txt in the given conda environment""" - # Validate inputs to prevent command injection - self._validate_env_name(env_name) - validated_path = self._validate_path(local_path) - - cmd = [self._get_conda_exe(), "run", "-n", env_name, "pip", "install", "-r", validated_path, "-U"] - logger.info("Activating conda env and installing requirements: %s", " ".join(cmd)) - _run_shell_cmd(cmd) - logger.info("Requirements installed successfully in conda env %s", env_name) - - def _update_conda_env(self, env_name, local_path): - """Update conda env using conda yml file""" - # Validate inputs to prevent command injection - self._validate_env_name(env_name) - validated_path = self._validate_path(local_path) - - cmd = [self._get_conda_exe(), "env", "update", "-n", env_name, "--file", validated_path] - logger.info("Updating conda env: %s", " ".join(cmd)) - _run_shell_cmd(cmd) - logger.info("Conda env %s updated succesfully", env_name) - - def _export_conda_env_from_prefix(self, prefix, local_path): - """Export the conda env to a conda yml file""" - # Validate inputs to prevent command injection - validated_prefix = self._validate_path(prefix) - validated_path = self._validate_path(local_path) - - cmd = [self._get_conda_exe(), "env", "export", "-p", validated_prefix, "--no-builds"] - logger.info("Exporting conda environment: %s", " ".join(cmd)) - - # Capture output and write to file instead of using shell redirection - try: - process = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=False - ) - output, error_output = process.communicate() - return_code = process.wait() - - if return_code: - error_message = f"Encountered error while running command '{' '.join(cmd)}'. Reason: {error_output.decode('utf-8')}" - raise RuntimeEnvironmentError(error_message) - - # Write the captured output to the file - with open(validated_path, 'w') as f: - f.write(output.decode('utf-8')) - - logger.info("Conda environment %s exported successfully", validated_prefix) - except Exception as e: - raise RuntimeEnvironmentError(f"Failed to export conda environment: {str(e)}") - - def _write_conda_env_to_file(self, env_name): - """Writes conda env to the text file""" - - file_name = "remote_function_conda_env.txt" - file_path = os.path.join(os.getcwd(), file_name) - with open(file_path, "w") as output_file: - output_file.write(env_name) - - def _get_conda_exe(self): - """Checks whether conda or mamba is available to use""" - - if not subprocess.Popen(["which", "mamba"]).wait(): - return "mamba" - if not subprocess.Popen(["which", "conda"]).wait(): - return "conda" - raise ValueError("Neither conda nor mamba is installed on the image") - - def _python_version_in_conda_env(self, env_name): - """Returns python version inside a conda environment""" - cmd = f"{self._get_conda_exe()} run -n {env_name} python --version" - try: - output = ( - subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT) - .decode("utf-8") - .strip() - ) - # convert 'Python 3.7.16' to [3, 7, 16] - version = output.split("Python ")[1].split(".") - return version[0] + "." + version[1] - except subprocess.CalledProcessError as e: - raise RuntimeEnvironmentError(e.output) - - def _current_python_version(self): - """Returns the current python version where program is running""" - - return f"{sys.version_info.major}.{sys.version_info.minor}".strip() - - def _current_sagemaker_pysdk_version(self): - """Returns the current sagemaker python sdk version where program is running""" - try: - from importlib import metadata - - return metadata.version("sagemaker") - except Exception: - return "3.0.0.dev0" # Development version fallback - - def _validate_python_version(self, client_python_version: str, conda_env: str = None): - """Validate the python version - - Validates if the python version where remote function runs - matches the one used on client side. - """ - if conda_env: - job_python_version = self._python_version_in_conda_env(conda_env) - else: - job_python_version = self._current_python_version() - if client_python_version.strip() != job_python_version.strip(): - raise RuntimeEnvironmentError( - f"Python version found in the container is '{job_python_version}' which " - f"does not match python version '{client_python_version}' on the local client. " - f"Please make sure that the python version used in the training container " - f"is same as the local python version." - ) - - def _validate_sagemaker_pysdk_version(self, client_sagemaker_pysdk_version): - """Validate the sagemaker python sdk version - - Validates if the sagemaker python sdk version where remote function runs - matches the one used on client side. - Otherwise, log a warning to call out that unexpected behaviors - may occur in this case. - """ - job_sagemaker_pysdk_version = self._current_sagemaker_pysdk_version() - if ( - client_sagemaker_pysdk_version - and client_sagemaker_pysdk_version != job_sagemaker_pysdk_version - ): - logger.warning( - "Inconsistent sagemaker versions found: " - "sagemaker python sdk version found in the container is " - "'%s' which does not match the '%s' on the local client. " - "Please make sure that the sagemaker version used in the training container " - "is the same as the local sagemaker version in case of unexpected behaviors.", - job_sagemaker_pysdk_version, - client_sagemaker_pysdk_version, - ) - - -def _run_and_get_output_shell_cmd(cmd: str) -> str: - """Run and return the output of the given shell command""" - return subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT).decode("utf-8") - - -def _run_pre_execution_command_script(script_path: str): - """This method runs a given shell script using subprocess - - Raises RuntimeEnvironmentError if the shell script fails - """ - current_dir = os.path.dirname(script_path) - - process = subprocess.Popen( - ["/bin/bash", "-eu", script_path], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - cwd=current_dir, - ) - - _log_output(process) - error_logs = _log_error(process) - return_code = process.wait() - - return return_code, error_logs - - -def _run_shell_cmd(cmd: list): - """This method runs a given shell command using subprocess - - Args: - cmd (list): Command and arguments as a list (e.g., ['pip', 'install', '-r', 'requirements.txt']) - - Raises: - RuntimeEnvironmentError: If the command fails - ValueError: If cmd is not a list - """ - if not isinstance(cmd, list): - raise ValueError("Command must be a list of arguments for security reasons") - - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) - - _log_output(process) - error_logs = _log_error(process) - return_code = process.wait() - if return_code: - error_message = f"Encountered error while running command '{' '.join(cmd)}'. Reason: {error_logs}" - raise RuntimeEnvironmentError(error_message) - - -def _log_output(process): - """This method takes in Popen process and logs the output of that process""" - with process.stdout as pipe: - for line in iter(pipe.readline, b""): - logger.info(str(line, "UTF-8")) - - -def _log_error(process): - """This method takes in Popen process and logs the error of that process. - - Returns those logs as a string - """ - - error_logs = "" - with process.stderr as pipe: - for line in iter(pipe.readline, b""): - error_str = str(line, "UTF-8") - if "ERROR:" in error_str: - logger.error(error_str) - else: - logger.warning(error_str) - error_logs = error_logs + error_str - - return error_logs - - -def _python_executable(): - """Return the real path for the Python executable, if it exists. - - Return RuntimeEnvironmentError otherwise. - - Returns: - (str): The real path of the current Python executable. - """ - if not sys.executable: - raise RuntimeEnvironmentError( - "Failed to retrieve the path for the Python executable binary" - ) - return sys.executable - - -class RuntimeEnvironmentError(Exception): - """The base exception class for bootstrap env excepitons""" - - def __init__(self, message): - self.message = message - super().__init__(self.message) diff --git a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/spark_app.py b/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/spark_app.py deleted file mode 100644 index 21eef068b9..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/runtime_environment/spark_app.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""This is a simple scrip of spark which invokes the pickled remote function""" -from __future__ import absolute_import - -from sagemaker.core.remote_function import invoke_function - -invoke_function.main() diff --git a/sagemaker-core/src/sagemaker/core/remote_function/spark_config.py b/sagemaker-core/src/sagemaker/core/remote_function/spark_config.py deleted file mode 100644 index 6b25d5da8b..0000000000 --- a/sagemaker-core/src/sagemaker/core/remote_function/spark_config.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""This module is used to define the Spark job config to remote function.""" -from __future__ import absolute_import - -from typing import Optional, List, Dict, Union -import attr -from urllib.parse import urlparse -from sagemaker.core.workflow import is_pipeline_variable - - -def _validate_configuration(instance, attribute, configuration): - # pylint: disable=unused-argument - """This is the helper method to validate the spark configuration""" - if configuration: - SparkConfigUtils.validate_configuration(configuration=configuration) - - -def _validate_s3_uri(instance, attribute, s3_uri): - # pylint: disable=unused-argument - """This is the helper method to validate the s3 uri""" - if s3_uri: - SparkConfigUtils.validate_s3_uri(s3_uri) - - -@attr.s(frozen=True) -class SparkConfig: - """This is the class to initialize the spark configurations for remote function - - Attributes: - submit_jars (Optional[List[str]]): A list which contains paths to the jars which - are going to be submitted to Spark job. The location can be a valid s3 uri or - local path to the jar. Defaults to ``None``. - submit_py_files (Optional[List[str]]): A list which contains paths to the python - files which are going to be submitted to Spark job. The location can be a - valid s3 uri or local path to the python file. Defaults to ``None``. - submit_files (Optional[List[str]]): A list which contains paths to the files which - are going to be submitted to Spark job. The location can be a valid s3 uri or - local path to the python file. Defaults to ``None``. - configuration (list[dict] or dict): Configuration for Hadoop, Spark, or Hive. - List or dictionary of EMR-style classifications. - https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html - spark_event_logs_s3_uri (str): S3 path where Spark application events will - be published to. - """ - - submit_jars: Optional[List[str]] = attr.ib(default=None) - submit_py_files: Optional[List[str]] = attr.ib(default=None) - submit_files: Optional[List[str]] = attr.ib(default=None) - configuration: Optional[Union[List[Dict], Dict]] = attr.ib( - default=None, validator=_validate_configuration - ) - spark_event_logs_uri: Optional[str] = attr.ib(default=None, validator=_validate_s3_uri) - - -class SparkConfigUtils: - """Util class for spark configurations""" - - _valid_configuration_keys = ["Classification", "Properties", "Configurations"] - _valid_configuration_classifications = [ - "core-site", - "hadoop-env", - "hadoop-log4j", - "hive-env", - "hive-log4j", - "hive-exec-log4j", - "hive-site", - "spark-defaults", - "spark-env", - "spark-log4j", - "spark-hive-site", - "spark-metrics", - "yarn-env", - "yarn-site", - "export", - ] - - @staticmethod - def validate_configuration(configuration: Dict): - """Validates the user-provided Hadoop/Spark/Hive configuration. - - This ensures that the list or dictionary the user provides will serialize to - JSON matching the schema of EMR's application configuration - - Args: - configuration (Dict): A dict that contains the configuration overrides to - the default values. For more information, please visit: - https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html - """ - emr_configure_apps_url = ( - "https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html" - ) - if isinstance(configuration, dict): - keys = configuration.keys() - if "Classification" not in keys or "Properties" not in keys: - raise ValueError( - f"Missing one or more required keys in configuration dictionary " - f"{configuration} Please see {emr_configure_apps_url} for more information" - ) - - for key in keys: - if key not in SparkConfigUtils._valid_configuration_keys: - raise ValueError( - f"Invalid key: {key}. " - f"Must be one of {SparkConfigUtils._valid_configuration_keys}. " - f"Please see {emr_configure_apps_url} for more information." - ) - if key == "Classification": - if ( - configuration[key] - not in SparkConfigUtils._valid_configuration_classifications - ): - raise ValueError( - f"Invalid classification: {key}. Must be one of " - f"{SparkConfigUtils._valid_configuration_classifications}" - ) - - if isinstance(configuration, list): - for item in configuration: - SparkConfigUtils.validate_configuration(item) - - # TODO (guoqioa@): method only checks urlparse scheme, need to perform deep s3 validation - @staticmethod - def validate_s3_uri(spark_output_s3_path): - """Validate whether the URI uses an S3 scheme. - - In the future, this validation will perform deeper S3 validation. - - Args: - spark_output_s3_path (str): The URI of the Spark output S3 Path. - """ - if is_pipeline_variable(spark_output_s3_path): - return - - if urlparse(spark_output_s3_path).scheme != "s3": - raise ValueError( - f"Invalid s3 path: {spark_output_s3_path}. Please enter something like " - "s3://bucket-name/folder-name" - ) diff --git a/sagemaker-core/src/sagemaker/core/workflow/execution_variables.py b/sagemaker-core/src/sagemaker/core/workflow/execution_variables.py index efb0b8b6ef..380ad0c280 100644 --- a/sagemaker-core/src/sagemaker/core/workflow/execution_variables.py +++ b/sagemaker-core/src/sagemaker/core/workflow/execution_variables.py @@ -56,7 +56,7 @@ def expr(self) -> RequestType: def _pickleable(self): """The pickleable object that can be passed to a remote function invocation.""" - from sagemaker.core.remote_function.core.pipeline_variables import _ExecutionVariable + from sagemaker.train.remote_function.core.pipeline_variables import _ExecutionVariable return _ExecutionVariable(name=self.name) diff --git a/sagemaker-core/src/sagemaker/core/workflow/parameters.py b/sagemaker-core/src/sagemaker/core/workflow/parameters.py index 90505c99cc..81d6d59d94 100644 --- a/sagemaker-core/src/sagemaker/core/workflow/parameters.py +++ b/sagemaker-core/src/sagemaker/core/workflow/parameters.py @@ -96,7 +96,7 @@ def expr(self) -> Dict[str, str]: def _pickleable(self): """The pickleable object that can be passed to a remote function invocation.""" - from sagemaker.core.remote_function.core.pipeline_variables import ( + from sagemaker.train.remote_function.core.pipeline_variables import ( _ParameterString, _ParameterInteger, _ParameterBoolean, diff --git a/sagemaker-core/src/sagemaker/core/workflow/properties.py b/sagemaker-core/src/sagemaker/core/workflow/properties.py index c9e897e178..366cecfd3f 100644 --- a/sagemaker-core/src/sagemaker/core/workflow/properties.py +++ b/sagemaker-core/src/sagemaker/core/workflow/properties.py @@ -137,7 +137,7 @@ def __reduce__(self): def _pickleable(self): """The pickleable object that can be passed to a remote function invocation.""" - from sagemaker.core.remote_function.core.pipeline_variables import _Properties + from sagemaker.train.remote_function.core.pipeline_variables import _Properties prefix = f"Steps.{self.step_name}" full_path = prefix if self.path is None else f"{prefix}.{self.path}" diff --git a/sagemaker-core/tests/unit/remote_function/__init__.py b/sagemaker-core/tests/unit/remote_function/__init__.py deleted file mode 100644 index 6549052177..0000000000 --- a/sagemaker-core/tests/unit/remote_function/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. diff --git a/sagemaker-core/tests/unit/remote_function/runtime_environment/__init__.py b/sagemaker-core/tests/unit/remote_function/runtime_environment/__init__.py deleted file mode 100644 index 6549052177..0000000000 --- a/sagemaker-core/tests/unit/remote_function/runtime_environment/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. diff --git a/sagemaker-core/tests/unit/remote_function/runtime_environment/test_bootstrap_runtime_environment.py b/sagemaker-core/tests/unit/remote_function/runtime_environment/test_bootstrap_runtime_environment.py deleted file mode 100644 index cc8319f935..0000000000 --- a/sagemaker-core/tests/unit/remote_function/runtime_environment/test_bootstrap_runtime_environment.py +++ /dev/null @@ -1,548 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import pytest -from unittest.mock import Mock, patch, mock_open, MagicMock -import json -import sys - -from sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment import ( - _bootstrap_runtime_env_for_remote_function, - _bootstrap_runtime_env_for_pipeline_step, - _handle_pre_exec_scripts, - _install_dependencies, - _unpack_user_workspace, - _write_failure_reason_file, - _parse_args, - log_key_value, - log_env_variables, - mask_sensitive_info, - num_cpus, - num_gpus, - num_neurons, - safe_serialize, - set_env, - main, - SUCCESS_EXIT_CODE, - DEFAULT_FAILURE_CODE, - SENSITIVE_KEYWORDS, - HIDDEN_VALUE, -) -from sagemaker.core.remote_function.runtime_environment.runtime_environment_manager import ( - _DependencySettings, -) - - -class TestBootstrapRuntimeEnvironment: - """Test cases for bootstrap runtime environment functions""" - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment._unpack_user_workspace" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment._handle_pre_exec_scripts" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment._install_dependencies" - ) - def test_bootstrap_runtime_env_for_remote_function( - self, mock_install, mock_handle, mock_unpack - ): - """Test _bootstrap_runtime_env_for_remote_function""" - mock_unpack.return_value = "/workspace" - dependency_settings = _DependencySettings(dependency_file="requirements.txt") - - _bootstrap_runtime_env_for_remote_function( - client_python_version="3.8", conda_env="myenv", dependency_settings=dependency_settings - ) - - mock_unpack.assert_called_once() - mock_handle.assert_called_once_with("/workspace") - mock_install.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment._unpack_user_workspace" - ) - def test_bootstrap_runtime_env_for_remote_function_no_workspace(self, mock_unpack): - """Test _bootstrap_runtime_env_for_remote_function with no workspace""" - mock_unpack.return_value = None - - _bootstrap_runtime_env_for_remote_function(client_python_version="3.8") - - mock_unpack.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment._unpack_user_workspace" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.path.exists" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.mkdir" - ) - def test_bootstrap_runtime_env_for_pipeline_step(self, mock_mkdir, mock_exists, mock_unpack): - """Test _bootstrap_runtime_env_for_pipeline_step""" - mock_unpack.return_value = None - mock_exists.return_value = False - - _bootstrap_runtime_env_for_pipeline_step( - client_python_version="3.8", func_step_workspace="workspace" - ) - - mock_mkdir.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.RuntimeEnvironmentManager" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.path.isfile" - ) - def test_handle_pre_exec_scripts_exists(self, mock_isfile, mock_manager_class): - """Test _handle_pre_exec_scripts when script exists""" - mock_isfile.return_value = True - mock_manager = Mock() - mock_manager_class.return_value = mock_manager - - _handle_pre_exec_scripts("/workspace") - - mock_manager.run_pre_exec_script.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.RuntimeEnvironmentManager" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.path.isfile" - ) - def test_handle_pre_exec_scripts_not_exists(self, mock_isfile, mock_manager_class): - """Test _handle_pre_exec_scripts when script doesn't exist""" - mock_isfile.return_value = False - mock_manager = Mock() - mock_manager_class.return_value = mock_manager - - _handle_pre_exec_scripts("/workspace") - - mock_manager.run_pre_exec_script.assert_not_called() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.RuntimeEnvironmentManager" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.path.join" - ) - def test_install_dependencies_with_file(self, mock_join, mock_manager_class): - """Test _install_dependencies with dependency file""" - mock_join.return_value = "/workspace/requirements.txt" - mock_manager = Mock() - mock_manager_class.return_value = mock_manager - - dependency_settings = _DependencySettings(dependency_file="requirements.txt") - - _install_dependencies( - dependency_file_dir="/workspace", - conda_env="myenv", - client_python_version="3.8", - channel_name="channel", - dependency_settings=dependency_settings, - ) - - mock_manager.bootstrap.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.RuntimeEnvironmentManager" - ) - def test_install_dependencies_no_file(self, mock_manager_class): - """Test _install_dependencies with no dependency file""" - mock_manager = Mock() - mock_manager_class.return_value = mock_manager - - dependency_settings = _DependencySettings(dependency_file=None) - - _install_dependencies( - dependency_file_dir="/workspace", - conda_env=None, - client_python_version="3.8", - channel_name="channel", - dependency_settings=dependency_settings, - ) - - mock_manager.bootstrap.assert_not_called() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.path.exists" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.path.isfile" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.shutil.unpack_archive" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.pathlib.Path" - ) - def test_unpack_user_workspace_success(self, mock_path, mock_unpack, mock_isfile, mock_exists): - """Test _unpack_user_workspace successfully unpacks workspace""" - mock_exists.return_value = True - mock_isfile.return_value = True - mock_path.return_value.absolute.return_value = "/workspace" - - result = _unpack_user_workspace() - - assert result is not None - mock_unpack.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.path.exists" - ) - def test_unpack_user_workspace_no_directory(self, mock_exists): - """Test _unpack_user_workspace when directory doesn't exist""" - mock_exists.return_value = False - - result = _unpack_user_workspace() - - assert result is None - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.path.exists" - ) - @patch("builtins.open", new_callable=mock_open) - def test_write_failure_reason_file(self, mock_file, mock_exists): - """Test _write_failure_reason_file""" - mock_exists.return_value = False - - _write_failure_reason_file("Test error message") - - mock_file.assert_called_once() - mock_file().write.assert_called_once_with("RuntimeEnvironmentError: Test error message") - - def test_parse_args(self): - """Test _parse_args""" - args = _parse_args( - [ - "--job_conda_env", - "myenv", - "--client_python_version", - "3.8", - "--dependency_settings", - '{"dependency_file": "requirements.txt"}', - ] - ) - - assert args.job_conda_env == "myenv" - assert args.client_python_version == "3.8" - assert args.dependency_settings == '{"dependency_file": "requirements.txt"}' - - -class TestLoggingFunctions: - """Test cases for logging functions""" - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.logger" - ) - def test_log_key_value_normal(self, mock_logger): - """Test log_key_value with normal key""" - log_key_value("MY_KEY", "my_value") - - mock_logger.info.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.logger" - ) - def test_log_key_value_sensitive(self, mock_logger): - """Test log_key_value with sensitive key""" - log_key_value("MY_PASSWORD", "secret123") - - mock_logger.info.assert_called_once() - call_args = mock_logger.info.call_args[0] - assert HIDDEN_VALUE in str(call_args) - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.logger" - ) - def test_log_key_value_dict(self, mock_logger): - """Test log_key_value with dictionary value""" - log_key_value("MY_CONFIG", {"key": "value"}) - - mock_logger.info.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.logger" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.environ", - {"ENV_VAR": "value"}, - ) - def test_log_env_variables(self, mock_logger): - """Test log_env_variables""" - log_env_variables({"CUSTOM_VAR": "custom_value"}) - - assert mock_logger.info.call_count >= 2 - - def test_mask_sensitive_info(self): - """Test mask_sensitive_info""" - data = {"username": "user", "password": "secret", "nested": {"api_key": "key123"}} - - result = mask_sensitive_info(data) - - assert result["password"] == HIDDEN_VALUE - assert result["nested"]["api_key"] == HIDDEN_VALUE - assert result["username"] == "user" - - -class TestResourceFunctions: - """Test cases for resource detection functions""" - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.multiprocessing.cpu_count" - ) - def test_num_cpus(self, mock_cpu_count): - """Test num_cpus""" - mock_cpu_count.return_value = 4 - - result = num_cpus() - - assert result == 4 - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.subprocess.check_output" - ) - def test_num_gpus_with_gpus(self, mock_check_output): - """Test num_gpus when GPUs are present""" - mock_check_output.return_value = b"GPU 0: Tesla V100\nGPU 1: Tesla V100\n" - - result = num_gpus() - - assert result == 2 - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.subprocess.check_output" - ) - def test_num_gpus_no_gpus(self, mock_check_output): - """Test num_gpus when no GPUs are present""" - mock_check_output.side_effect = OSError() - - result = num_gpus() - - assert result == 0 - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.subprocess.check_output" - ) - def test_num_neurons_with_neurons(self, mock_check_output): - """Test num_neurons when neurons are present""" - mock_check_output.return_value = b'[{"nc_count": 2}, {"nc_count": 2}]' - - result = num_neurons() - - assert result == 4 - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.subprocess.check_output" - ) - def test_num_neurons_no_neurons(self, mock_check_output): - """Test num_neurons when no neurons are present""" - mock_check_output.side_effect = OSError() - - result = num_neurons() - - assert result == 0 - - -class TestSerializationFunctions: - """Test cases for serialization functions""" - - def test_safe_serialize_string(self): - """Test safe_serialize with string""" - result = safe_serialize("test_string") - - assert result == "test_string" - - def test_safe_serialize_dict(self): - """Test safe_serialize with dictionary""" - result = safe_serialize({"key": "value"}) - - assert result == '{"key": "value"}' - - def test_safe_serialize_list(self): - """Test safe_serialize with list""" - result = safe_serialize([1, 2, 3]) - - assert result == "[1, 2, 3]" - - def test_safe_serialize_non_serializable(self): - """Test safe_serialize with non-serializable object""" - - class CustomObject: - def __str__(self): - return "custom_object" - - result = safe_serialize(CustomObject()) - - assert "custom_object" in result - - -class TestSetEnv: - """Test cases for set_env function""" - - @patch("builtins.open", new_callable=mock_open) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.num_cpus" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.num_gpus" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.num_neurons" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.environ", - {"TRAINING_JOB_NAME": "test-job"}, - ) - def test_set_env_basic(self, mock_neurons, mock_gpus, mock_cpus, mock_file): - """Test set_env with basic configuration""" - mock_cpus.return_value = 4 - mock_gpus.return_value = 0 - mock_neurons.return_value = 0 - - resource_config = { - "current_host": "algo-1", - "current_instance_type": "ml.m5.xlarge", - "hosts": ["algo-1"], - "network_interface_name": "eth0", - } - - set_env(resource_config) - - mock_file.assert_called_once() - - @patch("builtins.open", new_callable=mock_open) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.num_cpus" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.num_gpus" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.num_neurons" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.environ", - {"TRAINING_JOB_NAME": "test-job"}, - ) - def test_set_env_with_torchrun(self, mock_neurons, mock_gpus, mock_cpus, mock_file): - """Test set_env with torchrun distribution""" - mock_cpus.return_value = 4 - mock_gpus.return_value = 2 - mock_neurons.return_value = 0 - - resource_config = { - "current_host": "algo-1", - "current_instance_type": "ml.p3.2xlarge", - "hosts": ["algo-1", "algo-2"], - "network_interface_name": "eth0", - } - - set_env(resource_config, distribution="torchrun") - - mock_file.assert_called_once() - - @patch("builtins.open", new_callable=mock_open) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.num_cpus" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.num_gpus" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.num_neurons" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.environ", - {"TRAINING_JOB_NAME": "test-job"}, - ) - def test_set_env_with_mpirun(self, mock_neurons, mock_gpus, mock_cpus, mock_file): - """Test set_env with mpirun distribution""" - mock_cpus.return_value = 4 - mock_gpus.return_value = 2 - mock_neurons.return_value = 0 - - resource_config = { - "current_host": "algo-1", - "current_instance_type": "ml.p3.2xlarge", - "hosts": ["algo-1", "algo-2"], - "network_interface_name": "eth0", - } - - set_env(resource_config, distribution="mpirun") - - mock_file.assert_called_once() - - -class TestMain: - """Test cases for main function""" - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment._parse_args" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment._bootstrap_runtime_env_for_remote_function" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.RuntimeEnvironmentManager" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.getpass.getuser" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment.os.path.exists" - ) - def test_main_success( - self, mock_exists, mock_getuser, mock_manager_class, mock_bootstrap, mock_parse - ): - """Test main function successful execution""" - mock_args = Mock() - mock_args.client_python_version = "3.8" - mock_args.client_sagemaker_pysdk_version = "2.0.0" - mock_args.job_conda_env = None - mock_args.pipeline_execution_id = None - mock_args.dependency_settings = None - mock_args.func_step_s3_dir = None - mock_args.distribution = None - mock_args.user_nproc_per_node = None - mock_parse.return_value = mock_args - - mock_getuser.return_value = "root" - mock_exists.return_value = False - - mock_manager = Mock() - mock_manager_class.return_value = mock_manager - - with pytest.raises(SystemExit) as exc_info: - main([]) - - assert exc_info.value.code == SUCCESS_EXIT_CODE - - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment._parse_args" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.bootstrap_runtime_environment._write_failure_reason_file" - ) - def test_main_failure(self, mock_write_failure, mock_parse): - """Test main function with failure""" - mock_parse.side_effect = Exception("Test error") - - with pytest.raises(SystemExit) as exc_info: - main([]) - - assert exc_info.value.code == DEFAULT_FAILURE_CODE - mock_write_failure.assert_called_once() diff --git a/sagemaker-core/tests/unit/remote_function/runtime_environment/test_mpi_utils_remote.py b/sagemaker-core/tests/unit/remote_function/runtime_environment/test_mpi_utils_remote.py deleted file mode 100644 index e075489b6b..0000000000 --- a/sagemaker-core/tests/unit/remote_function/runtime_environment/test_mpi_utils_remote.py +++ /dev/null @@ -1,366 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import pytest -from unittest.mock import Mock, patch, MagicMock, mock_open -import subprocess -import paramiko - -from sagemaker.core.remote_function.runtime_environment.mpi_utils_remote import ( - CustomHostKeyPolicy, - _parse_args, - _can_connect, - _write_file_to_host, - _write_failure_reason_file, - _wait_for_master, - _wait_for_status_file, - _wait_for_workers, - bootstrap_master_node, - bootstrap_worker_node, - start_sshd_daemon, - write_status_file_to_workers, - main, - SUCCESS_EXIT_CODE, - DEFAULT_FAILURE_CODE, - FINISHED_STATUS_FILE, - READY_FILE, - DEFAULT_SSH_PORT, -) - - -class TestCustomHostKeyPolicy: - """Test cases for CustomHostKeyPolicy class""" - - def test_missing_host_key_algo_hostname(self): - """Test missing_host_key accepts algo-* hostnames""" - policy = CustomHostKeyPolicy() - client = Mock() - client.get_host_keys.return_value = Mock() - key = Mock() - key.get_name.return_value = "ssh-rsa" - - # Should not raise exception - policy.missing_host_key(client, "algo-1", key) - - client.get_host_keys().add.assert_called_once() - - def test_missing_host_key_unknown_hostname(self): - """Test missing_host_key rejects unknown hostnames""" - policy = CustomHostKeyPolicy() - client = Mock() - key = Mock() - - with pytest.raises(paramiko.SSHException, match="Unknown host key"): - policy.missing_host_key(client, "unknown-host", key) - - -class TestConnectionFunctions: - """Test cases for connection functions""" - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.paramiko.SSHClient") - def test_can_connect_success(self, mock_ssh_client_class): - """Test _can_connect when connection succeeds""" - mock_client = Mock() - mock_ssh_client_class.return_value.__enter__.return_value = mock_client - - result = _can_connect("algo-1", DEFAULT_SSH_PORT) - - assert result is True - mock_client.connect.assert_called_once_with("algo-1", port=DEFAULT_SSH_PORT) - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.paramiko.SSHClient") - def test_can_connect_failure(self, mock_ssh_client_class): - """Test _can_connect when connection fails""" - mock_client = Mock() - mock_client.connect.side_effect = Exception("Connection failed") - mock_ssh_client_class.return_value.__enter__.return_value = mock_client - - result = _can_connect("algo-1", DEFAULT_SSH_PORT) - - assert result is False - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.subprocess.run") - def test_write_file_to_host_success(self, mock_run): - """Test _write_file_to_host when write succeeds""" - mock_run.return_value = Mock() - - result = _write_file_to_host("algo-1", "/tmp/status") - - assert result is True - mock_run.assert_called_once() - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.subprocess.run") - def test_write_file_to_host_failure(self, mock_run): - """Test _write_file_to_host when write fails""" - mock_run.side_effect = subprocess.CalledProcessError(1, "ssh") - - result = _write_file_to_host("algo-1", "/tmp/status") - - assert result is False - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.os.path.exists") - @patch("builtins.open", new_callable=mock_open) - def test_write_failure_reason_file(self, mock_file, mock_exists): - """Test _write_failure_reason_file""" - mock_exists.return_value = False - - _write_failure_reason_file("Test error") - - mock_file.assert_called_once() - mock_file().write.assert_called_once_with("RuntimeEnvironmentError: Test error") - - -class TestWaitFunctions: - """Test cases for wait functions""" - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._can_connect") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.time.sleep") - def test_wait_for_master_success(self, mock_sleep, mock_can_connect): - """Test _wait_for_master when master becomes available""" - mock_can_connect.side_effect = [False, False, True] - - _wait_for_master("algo-1", DEFAULT_SSH_PORT, timeout=300) - - assert mock_can_connect.call_count == 3 - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._can_connect") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.time.sleep") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.time.time") - def test_wait_for_master_timeout(self, mock_time, mock_sleep, mock_can_connect): - """Test _wait_for_master when timeout occurs""" - mock_can_connect.return_value = False - mock_time.side_effect = [0, 100, 200, 301, 301] - - with pytest.raises(TimeoutError, match="Timed out waiting for master"): - _wait_for_master("algo-1", DEFAULT_SSH_PORT, timeout=300) - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.os.path.exists") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.time.sleep") - def test_wait_for_status_file(self, mock_sleep, mock_exists): - """Test _wait_for_status_file""" - mock_exists.side_effect = [False, False, True] - - _wait_for_status_file("/tmp/status") - - assert mock_exists.call_count == 3 - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._can_connect") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.os.path.exists") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.time.sleep") - def test_wait_for_workers_success(self, mock_sleep, mock_exists, mock_can_connect): - """Test _wait_for_workers when all workers become available""" - mock_can_connect.return_value = True - mock_exists.return_value = True - - _wait_for_workers(["algo-2", "algo-3"], DEFAULT_SSH_PORT, timeout=300) - - assert mock_can_connect.call_count == 2 - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._can_connect") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.time.sleep") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.time.time") - def test_wait_for_workers_timeout(self, mock_time, mock_sleep, mock_can_connect): - """Test _wait_for_workers when timeout occurs""" - mock_can_connect.return_value = False - mock_time.side_effect = [0, 100, 200, 301, 301] - - with pytest.raises(TimeoutError, match="Timed out waiting for workers"): - _wait_for_workers(["algo-2"], DEFAULT_SSH_PORT, timeout=300) - - def test_wait_for_workers_no_workers(self): - """Test _wait_for_workers with no workers""" - # Should not raise exception - _wait_for_workers([], DEFAULT_SSH_PORT, timeout=300) - - -class TestBootstrapFunctions: - """Test cases for bootstrap functions""" - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._wait_for_workers") - def test_bootstrap_master_node(self, mock_wait): - """Test bootstrap_master_node""" - bootstrap_master_node(["algo-2", "algo-3"]) - - mock_wait.assert_called_once_with(["algo-2", "algo-3"]) - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._wait_for_master") - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._write_file_to_host" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._wait_for_status_file" - ) - def test_bootstrap_worker_node(self, mock_wait_status, mock_write, mock_wait_master): - """Test bootstrap_worker_node""" - bootstrap_worker_node("algo-1", "algo-2", "/tmp/status") - - mock_wait_master.assert_called_once_with("algo-1") - mock_write.assert_called_once() - mock_wait_status.assert_called_once_with("/tmp/status") - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.os.path.exists") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.subprocess.Popen") - def test_start_sshd_daemon_success(self, mock_popen, mock_exists): - """Test start_sshd_daemon when sshd exists""" - mock_exists.return_value = True - - start_sshd_daemon() - - mock_popen.assert_called_once() - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.os.path.exists") - def test_start_sshd_daemon_not_found(self, mock_exists): - """Test start_sshd_daemon when sshd not found""" - mock_exists.return_value = False - - with pytest.raises(RuntimeError, match="SSH daemon not found"): - start_sshd_daemon() - - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._write_file_to_host" - ) - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.time.sleep") - def test_write_status_file_to_workers_success(self, mock_sleep, mock_write): - """Test write_status_file_to_workers when writes succeed""" - mock_write.return_value = True - - write_status_file_to_workers(["algo-2", "algo-3"], "/tmp/status") - - assert mock_write.call_count == 2 - - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._write_file_to_host" - ) - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.time.sleep") - def test_write_status_file_to_workers_timeout(self, mock_sleep, mock_write): - """Test write_status_file_to_workers when timeout occurs""" - mock_write.return_value = False - - with pytest.raises(TimeoutError, match="Timed out waiting"): - write_status_file_to_workers(["algo-2"], "/tmp/status") - - -class TestParseArgs: - """Test cases for _parse_args function""" - - def test_parse_args_job_ended_false(self): - """Test _parse_args with job_ended=0""" - args = _parse_args(["--job_ended", "0"]) - - assert args.job_ended == "0" - - def test_parse_args_job_ended_true(self): - """Test _parse_args with job_ended=1""" - args = _parse_args(["--job_ended", "1"]) - - assert args.job_ended == "1" - - def test_parse_args_default(self): - """Test _parse_args with default values""" - args = _parse_args([]) - - assert args.job_ended == "0" - - -class TestMain: - """Test cases for main function""" - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._parse_args") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.start_sshd_daemon") - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.bootstrap_worker_node" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.os.environ", - {"SM_MASTER_ADDR": "algo-1", "SM_CURRENT_HOST": "algo-2"}, - ) - def test_main_worker_node_job_running(self, mock_bootstrap_worker, mock_start_sshd, mock_parse): - """Test main for worker node when job is running""" - mock_args = Mock() - mock_args.job_ended = "0" - mock_parse.return_value = mock_args - - main([]) - - mock_start_sshd.assert_called_once() - mock_bootstrap_worker.assert_called_once() - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._parse_args") - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.start_sshd_daemon") - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.bootstrap_master_node" - ) - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.json.loads") - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.os.environ", - { - "SM_MASTER_ADDR": "algo-1", - "SM_CURRENT_HOST": "algo-1", - "SM_HOSTS": '["algo-1", "algo-2", "algo-3"]', - }, - ) - def test_main_master_node_job_running( - self, mock_json_loads, mock_bootstrap_master, mock_start_sshd, mock_parse - ): - """Test main for master node when job is running""" - mock_args = Mock() - mock_args.job_ended = "0" - mock_parse.return_value = mock_args - mock_json_loads.return_value = ["algo-1", "algo-2", "algo-3"] - - main([]) - - mock_start_sshd.assert_called_once() - mock_bootstrap_master.assert_called_once_with(["algo-2", "algo-3"]) - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._parse_args") - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.write_status_file_to_workers" - ) - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.json.loads") - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.os.environ", - { - "SM_MASTER_ADDR": "algo-1", - "SM_CURRENT_HOST": "algo-1", - "SM_HOSTS": '["algo-1", "algo-2"]', - }, - ) - def test_main_master_node_job_ended(self, mock_json_loads, mock_write_status, mock_parse): - """Test main for master node when job has ended""" - mock_args = Mock() - mock_args.job_ended = "1" - mock_parse.return_value = mock_args - mock_json_loads.return_value = ["algo-1", "algo-2"] - - main([]) - - mock_write_status.assert_called_once_with(["algo-2"]) - - @patch("sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._parse_args") - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote._write_failure_reason_file" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.mpi_utils_remote.os.environ", - {"SM_MASTER_ADDR": "algo-1", "SM_CURRENT_HOST": "algo-2"}, - ) - def test_main_with_exception(self, mock_write_failure, mock_parse): - """Test main when exception occurs""" - mock_parse.side_effect = Exception("Test error") - - with pytest.raises(SystemExit) as exc_info: - main([]) - - assert exc_info.value.code == DEFAULT_FAILURE_CODE - mock_write_failure.assert_called_once() diff --git a/sagemaker-core/tests/unit/remote_function/runtime_environment/test_runtime_environment_manager.py b/sagemaker-core/tests/unit/remote_function/runtime_environment/test_runtime_environment_manager.py deleted file mode 100644 index be2f1430d6..0000000000 --- a/sagemaker-core/tests/unit/remote_function/runtime_environment/test_runtime_environment_manager.py +++ /dev/null @@ -1,562 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import pytest -from unittest.mock import Mock, patch, MagicMock, mock_open -import subprocess -import sys - -from sagemaker.core.remote_function.runtime_environment.runtime_environment_manager import ( - RuntimeEnvironmentManager, - RuntimeEnvironmentError, - _DependencySettings, - get_logger, - _run_and_get_output_shell_cmd, - _run_pre_execution_command_script, - _run_shell_cmd, - _log_output, - _log_error, - _python_executable, -) - - -class TestDependencySettings: - """Test cases for _DependencySettings class""" - - def test_init_with_file(self): - """Test initialization with dependency file""" - settings = _DependencySettings(dependency_file="requirements.txt") - - assert settings.dependency_file == "requirements.txt" - - def test_init_without_file(self): - """Test initialization without dependency file""" - settings = _DependencySettings() - - assert settings.dependency_file is None - - def test_to_string(self): - """Test to_string method""" - settings = _DependencySettings(dependency_file="requirements.txt") - - result = settings.to_string() - - assert "requirements.txt" in result - - def test_from_string(self): - """Test from_string method""" - json_str = '{"dependency_file": "requirements.txt"}' - - settings = _DependencySettings.from_string(json_str) - - assert settings.dependency_file == "requirements.txt" - - def test_from_string_none(self): - """Test from_string with None""" - settings = _DependencySettings.from_string(None) - - assert settings is None - - def test_from_dependency_file_path(self): - """Test from_dependency_file_path method""" - settings = _DependencySettings.from_dependency_file_path("/path/to/requirements.txt") - - assert settings.dependency_file == "requirements.txt" - - def test_from_dependency_file_path_auto_capture(self): - """Test from_dependency_file_path with auto_capture""" - settings = _DependencySettings.from_dependency_file_path("auto_capture") - - assert settings.dependency_file == "env_snapshot.yml" - - def test_from_dependency_file_path_none(self): - """Test from_dependency_file_path with None""" - settings = _DependencySettings.from_dependency_file_path(None) - - assert settings.dependency_file is None - - -class TestRuntimeEnvironmentManager: - """Test cases for RuntimeEnvironmentManager class""" - - def test_init(self): - """Test initialization""" - manager = RuntimeEnvironmentManager() - - assert manager is not None - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.os.path.isfile" - ) - def test_snapshot_with_requirements_txt(self, mock_isfile): - """Test snapshot with requirements.txt""" - mock_isfile.return_value = True - manager = RuntimeEnvironmentManager() - - result = manager.snapshot("requirements.txt") - - assert result == "requirements.txt" - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.os.path.isfile" - ) - def test_snapshot_with_conda_yml(self, mock_isfile): - """Test snapshot with conda environment.yml""" - mock_isfile.return_value = True - manager = RuntimeEnvironmentManager() - - result = manager.snapshot("environment.yml") - - assert result == "environment.yml" - - @patch.object(RuntimeEnvironmentManager, "_capture_from_local_runtime") - def test_snapshot_with_auto_capture(self, mock_capture): - """Test snapshot with auto_capture""" - mock_capture.return_value = "env_snapshot.yml" - manager = RuntimeEnvironmentManager() - - result = manager.snapshot("auto_capture") - - assert result == "env_snapshot.yml" - mock_capture.assert_called_once() - - def test_snapshot_with_none(self): - """Test snapshot with None""" - manager = RuntimeEnvironmentManager() - - result = manager.snapshot(None) - - assert result is None - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.os.path.isfile" - ) - def test_snapshot_with_invalid_file(self, mock_isfile): - """Test snapshot with invalid file""" - mock_isfile.return_value = False - manager = RuntimeEnvironmentManager() - - with pytest.raises(ValueError, match="No dependencies file named"): - manager.snapshot("invalid.txt") - - @patch.object(RuntimeEnvironmentManager, "_get_active_conda_env_name") - @patch.object(RuntimeEnvironmentManager, "_get_active_conda_env_prefix") - @patch.object(RuntimeEnvironmentManager, "_export_conda_env_from_prefix") - def test_capture_from_local_runtime_with_conda_env(self, mock_export, mock_prefix, mock_name): - """Test _capture_from_local_runtime with conda environment""" - mock_name.return_value = "myenv" - mock_prefix.return_value = "/opt/conda/envs/myenv" - manager = RuntimeEnvironmentManager() - - result = manager._capture_from_local_runtime() - - assert "env_snapshot.yml" in result - mock_export.assert_called_once() - - @patch.object(RuntimeEnvironmentManager, "_get_active_conda_env_name") - @patch.object(RuntimeEnvironmentManager, "_get_active_conda_env_prefix") - def test_capture_from_local_runtime_no_conda_env(self, mock_prefix, mock_name): - """Test _capture_from_local_runtime without conda environment""" - mock_name.return_value = None - mock_prefix.return_value = None - manager = RuntimeEnvironmentManager() - - with pytest.raises(ValueError, match="No conda environment"): - manager._capture_from_local_runtime() - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.os.getenv" - ) - def test_get_active_conda_env_prefix(self, mock_getenv): - """Test _get_active_conda_env_prefix""" - mock_getenv.return_value = "/opt/conda/envs/myenv" - manager = RuntimeEnvironmentManager() - - result = manager._get_active_conda_env_prefix() - - assert result == "/opt/conda/envs/myenv" - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.os.getenv" - ) - def test_get_active_conda_env_name(self, mock_getenv): - """Test _get_active_conda_env_name""" - mock_getenv.return_value = "myenv" - manager = RuntimeEnvironmentManager() - - result = manager._get_active_conda_env_name() - - assert result == "myenv" - - @patch.object(RuntimeEnvironmentManager, "_install_req_txt_in_conda_env") - @patch.object(RuntimeEnvironmentManager, "_write_conda_env_to_file") - def test_bootstrap_with_requirements_txt_and_conda_env(self, mock_write, mock_install): - """Test bootstrap with requirements.txt and conda environment""" - manager = RuntimeEnvironmentManager() - - manager.bootstrap( - local_dependencies_file="requirements.txt", - client_python_version="3.8", - conda_env="myenv", - ) - - mock_install.assert_called_once_with("myenv", "requirements.txt") - mock_write.assert_called_once_with("myenv") - - @patch.object(RuntimeEnvironmentManager, "_install_requirements_txt") - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._python_executable" - ) - def test_bootstrap_with_requirements_txt_no_conda_env(self, mock_python_exec, mock_install): - """Test bootstrap with requirements.txt without conda environment""" - mock_python_exec.return_value = "/usr/bin/python3" - manager = RuntimeEnvironmentManager() - - manager.bootstrap(local_dependencies_file="requirements.txt", client_python_version="3.8") - - mock_install.assert_called_once() - - @patch.object(RuntimeEnvironmentManager, "_update_conda_env") - @patch.object(RuntimeEnvironmentManager, "_write_conda_env_to_file") - def test_bootstrap_with_conda_yml_and_conda_env(self, mock_write, mock_update): - """Test bootstrap with conda yml and existing conda environment""" - manager = RuntimeEnvironmentManager() - - manager.bootstrap( - local_dependencies_file="environment.yml", - client_python_version="3.8", - conda_env="myenv", - ) - - mock_update.assert_called_once() - mock_write.assert_called_once() - - @patch.object(RuntimeEnvironmentManager, "_create_conda_env") - @patch.object(RuntimeEnvironmentManager, "_validate_python_version") - @patch.object(RuntimeEnvironmentManager, "_write_conda_env_to_file") - def test_bootstrap_with_conda_yml_no_conda_env(self, mock_write, mock_validate, mock_create): - """Test bootstrap with conda yml without existing conda environment""" - manager = RuntimeEnvironmentManager() - - manager.bootstrap(local_dependencies_file="environment.yml", client_python_version="3.8") - - mock_create.assert_called_once() - mock_validate.assert_called_once() - mock_write.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.os.path.isfile" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._run_pre_execution_command_script" - ) - def test_run_pre_exec_script_exists(self, mock_run_script, mock_isfile): - """Test run_pre_exec_script when script exists""" - mock_isfile.return_value = True - mock_run_script.return_value = (0, "") - manager = RuntimeEnvironmentManager() - - manager.run_pre_exec_script("/path/to/script.sh") - - mock_run_script.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.os.path.isfile" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._run_pre_execution_command_script" - ) - def test_run_pre_exec_script_fails(self, mock_run_script, mock_isfile): - """Test run_pre_exec_script when script fails""" - mock_isfile.return_value = True - mock_run_script.return_value = (1, "Error message") - manager = RuntimeEnvironmentManager() - - with pytest.raises(RuntimeEnvironmentError, match="Encountered error"): - manager.run_pre_exec_script("/path/to/script.sh") - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.run" - ) - def test_change_dir_permission_success(self, mock_run): - """Test change_dir_permission successfully""" - manager = RuntimeEnvironmentManager() - - manager.change_dir_permission(["/tmp/dir1", "/tmp/dir2"], "777") - - mock_run.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.run" - ) - def test_change_dir_permission_failure(self, mock_run): - """Test change_dir_permission with failure""" - mock_run.side_effect = subprocess.CalledProcessError( - 1, "chmod", stderr=b"Permission denied" - ) - manager = RuntimeEnvironmentManager() - - with pytest.raises(RuntimeEnvironmentError): - manager.change_dir_permission(["/tmp/dir"], "777") - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._run_shell_cmd" - ) - def test_install_requirements_txt(self, mock_run_cmd): - """Test _install_requirements_txt""" - manager = RuntimeEnvironmentManager() - - manager._install_requirements_txt("/path/to/requirements.txt", "/usr/bin/python3") - - mock_run_cmd.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._run_shell_cmd" - ) - @patch.object(RuntimeEnvironmentManager, "_get_conda_exe") - def test_create_conda_env(self, mock_get_conda, mock_run_cmd): - """Test _create_conda_env""" - mock_get_conda.return_value = "conda" - manager = RuntimeEnvironmentManager() - - manager._create_conda_env("myenv", "/path/to/environment.yml") - - mock_run_cmd.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._run_shell_cmd" - ) - @patch.object(RuntimeEnvironmentManager, "_get_conda_exe") - def test_update_conda_env(self, mock_get_conda, mock_run_cmd): - """Test _update_conda_env""" - mock_get_conda.return_value = "conda" - manager = RuntimeEnvironmentManager() - - manager._update_conda_env("myenv", "/path/to/environment.yml") - - mock_run_cmd.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.Popen" - ) - def test_get_conda_exe_mamba(self, mock_popen): - """Test _get_conda_exe returns mamba""" - mock_process = Mock() - mock_process.wait.return_value = 0 - mock_popen.return_value = mock_process - manager = RuntimeEnvironmentManager() - - result = manager._get_conda_exe() - - assert result == "mamba" - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.Popen" - ) - def test_get_conda_exe_conda(self, mock_popen): - """Test _get_conda_exe returns conda""" - mock_process = Mock() - mock_process.wait.side_effect = [1, 0] # mamba not found, conda found - mock_popen.return_value = mock_process - manager = RuntimeEnvironmentManager() - - result = manager._get_conda_exe() - - assert result == "conda" - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.Popen" - ) - def test_get_conda_exe_not_found(self, mock_popen): - """Test _get_conda_exe when neither mamba nor conda found""" - mock_process = Mock() - mock_process.wait.return_value = 1 - mock_popen.return_value = mock_process - manager = RuntimeEnvironmentManager() - - with pytest.raises(ValueError, match="Neither conda nor mamba"): - manager._get_conda_exe() - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.check_output" - ) - @patch.object(RuntimeEnvironmentManager, "_get_conda_exe") - def test_python_version_in_conda_env(self, mock_get_conda, mock_check_output): - """Test _python_version_in_conda_env""" - mock_get_conda.return_value = "conda" - mock_check_output.return_value = b"Python 3.8.10" - manager = RuntimeEnvironmentManager() - - result = manager._python_version_in_conda_env("myenv") - - assert result == "3.8" - - def test_current_python_version(self): - """Test _current_python_version""" - manager = RuntimeEnvironmentManager() - - result = manager._current_python_version() - - assert result == f"{sys.version_info.major}.{sys.version_info.minor}" - - @patch.object(RuntimeEnvironmentManager, "_python_version_in_conda_env") - def test_validate_python_version_match(self, mock_python_version): - """Test _validate_python_version when versions match""" - mock_python_version.return_value = "3.8" - manager = RuntimeEnvironmentManager() - - # Should not raise error - manager._validate_python_version("3.8", conda_env="myenv") - - @patch.object(RuntimeEnvironmentManager, "_python_version_in_conda_env") - def test_validate_python_version_mismatch(self, mock_python_version): - """Test _validate_python_version when versions don't match""" - mock_python_version.return_value = "3.9" - manager = RuntimeEnvironmentManager() - - with pytest.raises(RuntimeEnvironmentError, match="does not match"): - manager._validate_python_version("3.8", conda_env="myenv") - - @patch.object(RuntimeEnvironmentManager, "_current_sagemaker_pysdk_version") - def test_validate_sagemaker_pysdk_version_match(self, mock_version): - """Test _validate_sagemaker_pysdk_version when versions match""" - mock_version.return_value = "2.0.0" - manager = RuntimeEnvironmentManager() - - # Should not raise error, just log warning - manager._validate_sagemaker_pysdk_version("2.0.0") - - @patch.object(RuntimeEnvironmentManager, "_current_sagemaker_pysdk_version") - def test_validate_sagemaker_pysdk_version_mismatch(self, mock_version): - """Test _validate_sagemaker_pysdk_version when versions don't match""" - mock_version.return_value = "2.1.0" - manager = RuntimeEnvironmentManager() - - # Should log warning but not raise error - manager._validate_sagemaker_pysdk_version("2.0.0") - - -class TestHelperFunctions: - """Test cases for helper functions""" - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.check_output" - ) - def test_run_and_get_output_shell_cmd(self, mock_check_output): - """Test _run_and_get_output_shell_cmd""" - mock_check_output.return_value = b"output" - - result = _run_and_get_output_shell_cmd("echo test") - - assert result == "output" - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.Popen" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._log_output" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._log_error" - ) - def test_run_pre_execution_command_script(self, mock_log_error, mock_log_output, mock_popen): - """Test _run_pre_execution_command_script""" - mock_process = Mock() - mock_process.wait.return_value = 0 - mock_popen.return_value = mock_process - mock_log_error.return_value = "" - - return_code, error_logs = _run_pre_execution_command_script("/path/to/script.sh") - - assert return_code == 0 - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.Popen" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._log_output" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._log_error" - ) - def test_run_shell_cmd_success(self, mock_log_error, mock_log_output, mock_popen): - """Test _run_shell_cmd with successful command""" - mock_process = Mock() - mock_process.wait.return_value = 0 - mock_popen.return_value = mock_process - mock_log_error.return_value = "" - - _run_shell_cmd(["echo", "test"]) - - mock_popen.assert_called_once() - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.subprocess.Popen" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._log_output" - ) - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager._log_error" - ) - def test_run_shell_cmd_failure(self, mock_log_error, mock_log_output, mock_popen): - """Test _run_shell_cmd with failed command""" - mock_process = Mock() - mock_process.wait.return_value = 1 - mock_popen.return_value = mock_process - mock_log_error.return_value = "Error message" - - with pytest.raises(RuntimeEnvironmentError, match="Encountered error"): - _run_shell_cmd(["false"]) - - def test_python_executable(self): - """Test _python_executable""" - result = _python_executable() - - assert result == sys.executable - - @patch( - "sagemaker.core.remote_function.runtime_environment.runtime_environment_manager.sys.executable", - None, - ) - def test_python_executable_not_found(self): - """Test _python_executable when not found""" - with pytest.raises(RuntimeEnvironmentError, match="Failed to retrieve"): - _python_executable() - - -class TestRuntimeEnvironmentError: - """Test cases for RuntimeEnvironmentError exception""" - - def test_init(self): - """Test initialization""" - error = RuntimeEnvironmentError("Test error message") - - assert error.message == "Test error message" - assert str(error) == "Test error message" - - def test_raise(self): - """Test raising the exception""" - with pytest.raises(RuntimeEnvironmentError, match="Test error"): - raise RuntimeEnvironmentError("Test error") - - -class TestGetLogger: - """Test cases for get_logger function""" - - def test_get_logger(self): - """Test get_logger returns logger""" - logger = get_logger() - - assert logger is not None - assert logger.name == "sagemaker.remote_function" diff --git a/sagemaker-core/tests/unit/remote_function/test_client.py b/sagemaker-core/tests/unit/remote_function/test_client.py deleted file mode 100644 index 83e1a2db80..0000000000 --- a/sagemaker-core/tests/unit/remote_function/test_client.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import pytest -from unittest.mock import Mock -from collections import deque - -from sagemaker.core.remote_function.client import ( - RemoteExecutor, - _submit_worker, - _polling_worker, - _API_CALL_LIMIT, - _PENDING, - _RUNNING, - _CANCELLED, - _FINISHED, -) - - -class TestConstants: - """Test module constants""" - - def test_api_call_limit_constants(self): - assert _API_CALL_LIMIT["SubmittingIntervalInSecs"] == 1 - assert _API_CALL_LIMIT["MinBatchPollingIntervalInSecs"] == 10 - assert _API_CALL_LIMIT["PollingIntervalInSecs"] == 0.5 - - def test_future_state_constants(self): - assert _PENDING == "PENDING" - assert _RUNNING == "RUNNING" - assert _CANCELLED == "CANCELLED" - assert _FINISHED == "FINISHED" - - -class TestRemoteExecutorValidation: - """Test RemoteExecutor argument validation""" - - def test_validate_submit_args_with_valid_args(self): - def my_function(x, y, z=10): - return x + y + z - - RemoteExecutor._validate_submit_args(my_function, 1, 2, z=3) - - def test_validate_submit_args_with_missing_args(self): - def my_function(x, y): - return x + y - - with pytest.raises(TypeError): - RemoteExecutor._validate_submit_args(my_function, 1) - - def test_validate_submit_args_with_extra_args(self): - def my_function(x): - return x - - with pytest.raises(TypeError): - RemoteExecutor._validate_submit_args(my_function, 1, 2) - - -class TestWorkerFunctions: - """Test worker thread functions""" - - def test_submit_worker_exits_on_none(self): - """Test that submit worker exits when None is in queue""" - executor = Mock() - executor._pending_request_queue = deque([None]) - executor._running_jobs = {} - executor.max_parallel_jobs = 1 - - mock_condition = Mock() - mock_condition.__enter__ = Mock(return_value=mock_condition) - mock_condition.__exit__ = Mock(return_value=False) - mock_condition.wait_for = Mock(return_value=True) - executor._state_condition = mock_condition - - _submit_worker(executor) - - assert len(executor._pending_request_queue) == 0 - - def test_polling_worker_exits_on_shutdown(self): - """Test that polling worker exits when shutdown flag is set""" - executor = Mock() - executor._running_jobs = {} - executor._pending_request_queue = deque() - executor._shutdown = True - executor._state_condition = Mock() - - _polling_worker(executor) diff --git a/sagemaker-core/tests/unit/remote_function/test_job.py b/sagemaker-core/tests/unit/remote_function/test_job.py deleted file mode 100644 index abc5be68be..0000000000 --- a/sagemaker-core/tests/unit/remote_function/test_job.py +++ /dev/null @@ -1,935 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Unit tests for sagemaker.core.remote_function.job module.""" -from __future__ import absolute_import - -import json -import os -import pytest -import sys -from unittest.mock import Mock, patch, MagicMock, call, mock_open -from io import BytesIO - -from sagemaker.core.remote_function.job import ( - _JobSettings, - _Job, - _prepare_and_upload_runtime_scripts, - _generate_input_data_config, - _prepare_dependencies_and_pre_execution_scripts, - _prepare_and_upload_workspace, - _convert_run_to_json, - _prepare_and_upload_spark_dependent_files, - _upload_spark_submit_deps, - _upload_serialized_spark_configuration, - _extend_mpirun_to_request, - _extend_torchrun_to_request, - _extend_spark_config_to_request, - _update_job_request_with_checkpoint_config, - _RunInfo, - _get_initial_job_state, - _logs_for_job, - _check_job_status, - _flush_log_streams, - _rule_statuses_changed, - _logs_init, - LogState, -) -from sagemaker.core.remote_function.spark_config import SparkConfig -from sagemaker.core.remote_function.checkpoint_location import CheckpointLocation - - -@pytest.fixture -def mock_session(): - session = Mock() - session.boto_region_name = "us-west-2" - session.default_bucket.return_value = "test-bucket" - session.default_bucket_prefix = "prefix" - session.sagemaker_client = Mock() - session.boto_session = Mock() - session.sagemaker_config = None - return session - - -class TestJobSettings: - """Test _JobSettings class.""" - - def test_init_with_spark_and_image_raises_error(self, mock_session): - """Test that spark_config and image_uri cannot be set together.""" - spark_config = SparkConfig() - with pytest.raises(ValueError, match="spark_config and image_uri cannot be specified"): - _JobSettings( - sagemaker_session=mock_session, - spark_config=spark_config, - image_uri="test-image", - instance_type="ml.m5.xlarge", - ) - - def test_init_with_spark_and_conda_env_raises_error(self, mock_session): - """Test that spark_config and job_conda_env cannot be set together.""" - spark_config = SparkConfig() - with pytest.raises(ValueError, match="Remote Spark jobs do not support job_conda_env"): - _JobSettings( - sagemaker_session=mock_session, - spark_config=spark_config, - job_conda_env="test-env", - instance_type="ml.m5.xlarge", - ) - - def test_init_with_spark_and_auto_capture_raises_error(self, mock_session): - """Test that spark_config and auto_capture dependencies cannot be set together.""" - spark_config = SparkConfig() - with pytest.raises(ValueError, match="Remote Spark jobs do not support automatically"): - _JobSettings( - sagemaker_session=mock_session, - spark_config=spark_config, - dependencies="auto_capture", - instance_type="ml.m5.xlarge", - ) - - def test_init_with_pre_execution_commands_and_script_raises_error(self, mock_session): - """Test that pre_execution_commands and pre_execution_script cannot be set together.""" - with pytest.raises( - ValueError, match="Only one of pre_execution_commands or pre_execution_script" - ): - _JobSettings( - sagemaker_session=mock_session, - pre_execution_commands=["echo test"], - pre_execution_script="/path/to/script.sh", - instance_type="ml.m5.xlarge", - image_uri="test-image", - ) - - def test_init_without_instance_type_raises_error(self, mock_session): - """Test that instance_type is required.""" - with pytest.raises(ValueError, match="instance_type is a required parameter"): - _JobSettings(sagemaker_session=mock_session, image_uri="test-image") - - @patch.dict(os.environ, {"SAGEMAKER_INTERNAL_IMAGE_URI": "custom-image"}) - def test_get_default_image_from_env(self, mock_session): - """Test getting default image from environment variable.""" - image = _JobSettings._get_default_image(mock_session) - assert image == "custom-image" - - def test_get_default_image_unsupported_python_raises_error(self, mock_session): - """Test that unsupported Python version raises error.""" - with patch.object(sys, "version_info", (3, 7, 0)): - with pytest.raises( - ValueError, match="Default image is supported only for Python versions" - ): - _JobSettings._get_default_image(mock_session) - - def test_get_default_spark_image_unsupported_python_raises_error(self, mock_session): - """Test that unsupported Python version for Spark raises error.""" - with patch.object(sys, "version_info", (3, 8, 0)): - with pytest.raises( - ValueError, - match="SageMaker Spark image for remote job only supports Python version 3.9", - ): - _JobSettings._get_default_spark_image(mock_session) - - -class TestJob: - """Test _Job class.""" - - def test_init(self, mock_session): - """Test _Job initialization.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - assert job.job_name == "test-job" - assert job.s3_uri == "s3://bucket/output" - assert job.hmac_key == "test-key" - - def test_from_describe_response(self, mock_session): - """Test creating _Job from describe response.""" - response = { - "TrainingJobName": "test-job", - "OutputDataConfig": {"S3OutputPath": "s3://bucket/output"}, - "Environment": {"REMOTE_FUNCTION_SECRET_KEY": "test-key"}, - } - job = _Job.from_describe_response(response, mock_session) - assert job.job_name == "test-job" - assert job.s3_uri == "s3://bucket/output" - assert job.hmac_key == "test-key" - - def test_describe_returns_cached_response(self, mock_session): - """Test that describe returns cached response for completed jobs.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - job._last_describe_response = {"TrainingJobStatus": "Completed"} - - result = job.describe() - assert result["TrainingJobStatus"] == "Completed" - mock_session.sagemaker_client.describe_training_job.assert_not_called() - - def test_describe_calls_api_for_in_progress_jobs(self, mock_session): - """Test that describe calls API for in-progress jobs.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - mock_session.sagemaker_client.describe_training_job.return_value = { - "TrainingJobStatus": "InProgress" - } - - result = job.describe() - assert result["TrainingJobStatus"] == "InProgress" - mock_session.sagemaker_client.describe_training_job.assert_called_once() - - def test_stop(self, mock_session): - """Test stopping a job.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - job.stop() - mock_session.sagemaker_client.stop_training_job.assert_called_once_with( - TrainingJobName="test-job" - ) - - @patch("sagemaker.core.remote_function.job._logs_for_job") - def test_wait(self, mock_logs, mock_session): - """Test waiting for job completion.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - mock_logs.return_value = {"TrainingJobStatus": "Completed"} - - job.wait(timeout=100) - mock_logs.assert_called_once_with( - sagemaker_session=mock_session, job_name="test-job", wait=True, timeout=100 - ) - - -class TestUpdateJobRequestWithCheckpointConfig: - """Test _update_job_request_with_checkpoint_config function.""" - - def test_with_checkpoint_in_args(self): - """Test checkpoint config in positional args.""" - checkpoint = CheckpointLocation(s3_uri="s3://bucket/checkpoint") - args = (checkpoint,) - kwargs = {} - request_dict = {} - - _update_job_request_with_checkpoint_config(args, kwargs, request_dict) - - assert "CheckpointConfig" in request_dict - assert request_dict["CheckpointConfig"]["S3Uri"] == "s3://bucket/checkpoint" - assert request_dict["CheckpointConfig"]["LocalPath"] == "/opt/ml/checkpoints/" - - def test_with_checkpoint_in_kwargs(self): - """Test checkpoint config in keyword args.""" - checkpoint = CheckpointLocation(s3_uri="s3://bucket/checkpoint") - args = () - kwargs = {"checkpoint": checkpoint} - request_dict = {} - - _update_job_request_with_checkpoint_config(args, kwargs, request_dict) - - assert "CheckpointConfig" in request_dict - - def test_with_multiple_checkpoints_raises_error(self): - """Test that multiple checkpoints raise error.""" - checkpoint1 = CheckpointLocation(s3_uri="s3://bucket/checkpoint1") - checkpoint2 = CheckpointLocation(s3_uri="s3://bucket/checkpoint2") - args = (checkpoint1,) - kwargs = {"checkpoint": checkpoint2} - request_dict = {} - - with pytest.raises( - ValueError, match="cannot have more than one argument of type CheckpointLocation" - ): - _update_job_request_with_checkpoint_config(args, kwargs, request_dict) - - def test_without_checkpoint(self): - """Test without checkpoint location.""" - args = ("arg1", "arg2") - kwargs = {"key": "value"} - request_dict = {} - - _update_job_request_with_checkpoint_config(args, kwargs, request_dict) - - assert "CheckpointConfig" not in request_dict - - -class TestConvertRunToJson: - """Test _convert_run_to_json function.""" - - def test_convert_run_to_json(self): - """Test converting run to JSON.""" - mock_run = Mock() - mock_run.experiment_name = "test-experiment" - mock_run.run_name = "test-run" - - result = _convert_run_to_json(mock_run) - data = json.loads(result) - - assert data["experiment_name"] == "test-experiment" - assert data["run_name"] == "test-run" - - -class TestUploadSerializedSparkConfiguration: - """Test _upload_serialized_spark_configuration function.""" - - @patch("sagemaker.core.remote_function.job.S3Uploader") - def test_upload_spark_config(self, mock_uploader, mock_session): - """Test uploading Spark configuration.""" - config = {"spark.executor.memory": "4g"} - - _upload_serialized_spark_configuration("s3://bucket/base", "kms-key", config, mock_session) - - mock_uploader.upload_string_as_file_body.assert_called_once() - - def test_upload_spark_config_none(self, mock_session): - """Test uploading None Spark configuration.""" - result = _upload_serialized_spark_configuration( - "s3://bucket/base", "kms-key", None, mock_session - ) - - assert result is None - - -class TestUploadSparkSubmitDeps: - """Test _upload_spark_submit_deps function.""" - - def test_with_none_deps(self, mock_session): - """Test with None dependencies.""" - result = _upload_spark_submit_deps( - None, "workspace", "s3://bucket", "kms-key", mock_session - ) - assert result is None - - def test_with_s3_uri(self, mock_session): - """Test with S3 URI.""" - deps = ["s3://bucket/dep.jar"] - result = _upload_spark_submit_deps( - deps, "workspace", "s3://bucket", "kms-key", mock_session - ) - assert "s3://bucket/dep.jar" in result - - def test_with_empty_workspace_raises_error(self, mock_session): - """Test with empty workspace name.""" - deps = ["s3://bucket/dep.jar"] - with pytest.raises(ValueError, match="workspace_name or s3_base_uri may not be empty"): - _upload_spark_submit_deps(deps, "", "s3://bucket", "kms-key", mock_session) - - @patch("os.path.isfile", return_value=False) - def test_with_invalid_local_file_raises_error(self, mock_isfile, mock_session): - """Test with invalid local file.""" - deps = ["/invalid/path.jar"] - with pytest.raises(ValueError, match="is not a valid local file"): - _upload_spark_submit_deps(deps, "workspace", "s3://bucket", "kms-key", mock_session) - - -class TestExtendMpirunToRequest: - """Test _extend_mpirun_to_request function.""" - - def test_without_mpirun(self, mock_session): - """Test without mpirun enabled.""" - job_settings = Mock() - job_settings.use_mpirun = False - request_dict = {"InputDataConfig": []} - - result = _extend_mpirun_to_request(request_dict, job_settings) - assert result == request_dict - - def test_with_single_instance(self, mock_session): - """Test with single instance.""" - job_settings = Mock() - job_settings.use_mpirun = True - job_settings.instance_count = 1 - request_dict = {"InputDataConfig": []} - - result = _extend_mpirun_to_request(request_dict, job_settings) - assert result == request_dict - - def test_with_multiple_instances(self, mock_session): - """Test with multiple instances.""" - job_settings = Mock() - job_settings.use_mpirun = True - job_settings.instance_count = 2 - request_dict = { - "InputDataConfig": [{"DataSource": {"S3DataSource": {"S3Uri": "s3://bucket/data"}}}] - } - - result = _extend_mpirun_to_request(request_dict, job_settings) - assert ( - result["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3DataDistributionType"] - == "FullyReplicated" - ) - - -class TestExtendTorchrunToRequest: - """Test _extend_torchrun_to_request function.""" - - def test_without_torchrun(self, mock_session): - """Test without torchrun enabled.""" - job_settings = Mock() - job_settings.use_torchrun = False - request_dict = {"InputDataConfig": []} - - result = _extend_torchrun_to_request(request_dict, job_settings) - assert result == request_dict - - def test_with_single_instance(self, mock_session): - """Test with single instance.""" - job_settings = Mock() - job_settings.use_torchrun = True - job_settings.instance_count = 1 - request_dict = {"InputDataConfig": []} - - result = _extend_torchrun_to_request(request_dict, job_settings) - assert result == request_dict - - def test_with_multiple_instances(self, mock_session): - """Test with multiple instances.""" - job_settings = Mock() - job_settings.use_torchrun = True - job_settings.instance_count = 2 - request_dict = { - "InputDataConfig": [{"DataSource": {"S3DataSource": {"S3Uri": "s3://bucket/data"}}}] - } - - result = _extend_torchrun_to_request(request_dict, job_settings) - assert ( - result["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3DataDistributionType"] - == "FullyReplicated" - ) - - -class TestExtendSparkConfigToRequest: - """Test _extend_spark_config_to_request function.""" - - def test_without_spark_config(self, mock_session): - """Test without spark config.""" - job_settings = Mock() - job_settings.spark_config = None - request_dict = {"AlgorithmSpecification": {"ContainerEntrypoint": []}} - - result = _extend_spark_config_to_request(request_dict, job_settings, "s3://bucket") - assert result == request_dict - - @patch("sagemaker.core.remote_function.job._prepare_and_upload_spark_dependent_files") - def test_with_spark_config(self, mock_upload, mock_session): - """Test with spark config.""" - mock_upload.return_value = (None, None, None, "s3://bucket/config.json") - - job_settings = Mock() - spark_config = SparkConfig(spark_event_logs_uri="s3://bucket/logs") - job_settings.spark_config = spark_config - job_settings.s3_kms_key = None - job_settings.sagemaker_session = mock_session - - request_dict = { - "AlgorithmSpecification": {"ContainerEntrypoint": []}, - "InputDataConfig": [{"DataSource": {"S3DataSource": {"S3Uri": "s3://bucket/data"}}}], - } - - result = _extend_spark_config_to_request(request_dict, job_settings, "s3://bucket") - assert ( - "--spark-event-logs-s3-uri" in result["AlgorithmSpecification"]["ContainerEntrypoint"] - ) - - -class TestGetInitialJobState: - """Test _get_initial_job_state function.""" - - def test_with_completed_job_and_wait(self): - """Test with completed job and wait=True.""" - description = {"TrainingJobStatus": "Completed"} - state = _get_initial_job_state(description, "TrainingJobStatus", True) - assert state == LogState.COMPLETE - - def test_with_in_progress_job_and_wait(self): - """Test with in-progress job and wait=True.""" - description = {"TrainingJobStatus": "InProgress"} - state = _get_initial_job_state(description, "TrainingJobStatus", True) - assert state == LogState.TAILING - - def test_with_in_progress_job_and_no_wait(self): - """Test with in-progress job and wait=False.""" - description = {"TrainingJobStatus": "InProgress"} - state = _get_initial_job_state(description, "TrainingJobStatus", False) - assert state == LogState.COMPLETE - - -class TestCheckJobStatus: - """Test _check_job_status function.""" - - def test_with_completed_status(self): - """Test with completed status.""" - desc = {"TrainingJobStatus": "Completed"} - _check_job_status("test-job", desc, "TrainingJobStatus") - - def test_with_stopped_status(self): - """Test with stopped status.""" - desc = {"TrainingJobStatus": "Stopped"} - with patch("sagemaker.core.remote_function.job.logger") as mock_logger: - _check_job_status("test-job", desc, "TrainingJobStatus") - mock_logger.warning.assert_called_once() - - def test_with_failed_status_raises_error(self): - """Test with failed status.""" - desc = {"TrainingJobStatus": "Failed", "FailureReason": "Test failure"} - with pytest.raises(Exception): - _check_job_status("test-job", desc, "TrainingJobStatus") - - def test_with_capacity_error_raises_capacity_error(self): - """Test with CapacityError.""" - desc = { - "TrainingJobStatus": "Failed", - "FailureReason": "CapacityError: Insufficient capacity", - } - from sagemaker.core import exceptions - - with pytest.raises(exceptions.CapacityError): - _check_job_status("test-job", desc, "TrainingJobStatus") - - -class TestRuleStatusesChanged: - """Test _rule_statuses_changed function.""" - - def test_with_no_last_statuses(self): - """Test with no last statuses.""" - current = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "InProgress"}] - result = _rule_statuses_changed(current, None) - assert result is True - - def test_with_changed_status(self): - """Test with changed status.""" - current = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "Completed"}] - last = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "InProgress"}] - result = _rule_statuses_changed(current, last) - assert result is True - - def test_with_unchanged_status(self): - """Test with unchanged status.""" - current = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "InProgress"}] - last = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "InProgress"}] - result = _rule_statuses_changed(current, last) - assert result is False - - -class TestLogsInit: - """Test _logs_init function.""" - - def test_with_training_job(self, mock_session): - """Test with training job.""" - description = {"ResourceConfig": {"InstanceCount": 2}} - result = _logs_init(mock_session.boto_session, description, "Training") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 2 - assert log_group == "/aws/sagemaker/TrainingJobs" - - def test_with_training_job_instance_groups(self, mock_session): - """Test with training job using instance groups.""" - description = { - "ResourceConfig": {"InstanceGroups": [{"InstanceCount": 2}, {"InstanceCount": 3}]} - } - result = _logs_init(mock_session.boto_session, description, "Training") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 5 - - def test_with_transform_job(self, mock_session): - """Test with transform job.""" - description = {"TransformResources": {"InstanceCount": 1}} - result = _logs_init(mock_session.boto_session, description, "Transform") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 1 - assert log_group == "/aws/sagemaker/TransformJobs" - - def test_with_processing_job(self, mock_session): - """Test with processing job.""" - description = {"ProcessingResources": {"ClusterConfig": {"InstanceCount": 3}}} - result = _logs_init(mock_session.boto_session, description, "Processing") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 3 - assert log_group == "/aws/sagemaker/ProcessingJobs" - - def test_with_automl_job(self, mock_session): - """Test with AutoML job.""" - description = {} - result = _logs_init(mock_session.boto_session, description, "AutoML") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 0 - assert log_group == "/aws/sagemaker/AutoMLJobs" - - -class TestFlushLogStreams: - """Test _flush_log_streams function.""" - - @patch("sagemaker.core.remote_function.job.sagemaker_logs") - def test_with_no_streams(self, mock_logs, mock_session): - """Test with no log streams.""" - stream_names = [] - positions = {} - client = Mock() - client.describe_log_streams.return_value = {"logStreams": []} - - _flush_log_streams( - stream_names, - 1, - client, - "/aws/sagemaker/TrainingJobs", - "test-job", - positions, - False, - lambda x, y: None, - ) - - @patch("sagemaker.core.remote_function.job.sagemaker_logs") - def test_with_client_error_resource_not_found(self, mock_logs, mock_session): - """Test with ResourceNotFoundException.""" - from botocore.exceptions import ClientError - - stream_names = [] - positions = {} - client = Mock() - error_response = {"Error": {"Code": "ResourceNotFoundException"}} - client.describe_log_streams.side_effect = ClientError( - error_response, "describe_log_streams" - ) - - _flush_log_streams( - stream_names, - 1, - client, - "/aws/sagemaker/TrainingJobs", - "test-job", - positions, - False, - lambda x, y: None, - ) - - @patch("sagemaker.core.remote_function.job.sagemaker_logs") - def test_with_client_error_other(self, mock_logs, mock_session): - """Test with other ClientError.""" - from botocore.exceptions import ClientError - - stream_names = [] - positions = {} - client = Mock() - error_response = {"Error": {"Code": "OtherError"}} - client.describe_log_streams.side_effect = ClientError( - error_response, "describe_log_streams" - ) - - with pytest.raises(ClientError): - _flush_log_streams( - stream_names, - 1, - client, - "/aws/sagemaker/TrainingJobs", - "test-job", - positions, - False, - lambda x, y: None, - ) - - -class TestPrepareAndUploadRuntimeScripts: - """Test _prepare_and_upload_runtime_scripts function.""" - - @patch("sagemaker.core.remote_function.job.S3Uploader") - @patch("sagemaker.core.remote_function.job._tmpdir") - @patch("sagemaker.core.remote_function.job.shutil") - @patch("builtins.open", new_callable=mock_open) - def test_without_spark_or_distributed( - self, mock_file, mock_shutil, mock_tmpdir, mock_uploader, mock_session - ): - """Test without Spark or distributed training.""" - mock_tmpdir.return_value.__enter__ = Mock(return_value="/tmp/test") - mock_tmpdir.return_value.__exit__ = Mock(return_value=False) - mock_uploader.upload.return_value = "s3://bucket/scripts" - - result = _prepare_and_upload_runtime_scripts( - None, "s3://bucket", "kms-key", mock_session, False, False - ) - - assert result == "s3://bucket/scripts" - - @patch("sagemaker.core.remote_function.job.S3Uploader") - @patch("sagemaker.core.remote_function.job._tmpdir") - @patch("sagemaker.core.remote_function.job.shutil") - @patch("builtins.open", new_callable=mock_open) - def test_with_spark(self, mock_file, mock_shutil, mock_tmpdir, mock_uploader, mock_session): - """Test with Spark config.""" - mock_tmpdir.return_value.__enter__ = Mock(return_value="/tmp/test") - mock_tmpdir.return_value.__exit__ = Mock(return_value=False) - mock_uploader.upload.return_value = "s3://bucket/scripts" - - spark_config = SparkConfig() - result = _prepare_and_upload_runtime_scripts( - spark_config, "s3://bucket", "kms-key", mock_session, False, False - ) - - assert result == "s3://bucket/scripts" - - @patch("sagemaker.core.remote_function.job.S3Uploader") - @patch("sagemaker.core.remote_function.job._tmpdir") - @patch("sagemaker.core.remote_function.job.shutil") - @patch("builtins.open", new_callable=mock_open) - def test_with_torchrun(self, mock_file, mock_shutil, mock_tmpdir, mock_uploader, mock_session): - """Test with torchrun.""" - mock_tmpdir.return_value.__enter__ = Mock(return_value="/tmp/test") - mock_tmpdir.return_value.__exit__ = Mock(return_value=False) - mock_uploader.upload.return_value = "s3://bucket/scripts" - - result = _prepare_and_upload_runtime_scripts( - None, "s3://bucket", "kms-key", mock_session, True, False - ) - - assert result == "s3://bucket/scripts" - - @patch("sagemaker.core.remote_function.job.S3Uploader") - @patch("sagemaker.core.remote_function.job._tmpdir") - @patch("sagemaker.core.remote_function.job.shutil") - @patch("builtins.open", new_callable=mock_open) - def test_with_mpirun(self, mock_file, mock_shutil, mock_tmpdir, mock_uploader, mock_session): - """Test with mpirun.""" - mock_tmpdir.return_value.__enter__ = Mock(return_value="/tmp/test") - mock_tmpdir.return_value.__exit__ = Mock(return_value=False) - mock_uploader.upload.return_value = "s3://bucket/scripts" - - result = _prepare_and_upload_runtime_scripts( - None, "s3://bucket", "kms-key", mock_session, False, True - ) - - assert result == "s3://bucket/scripts" - - -class TestPrepareAndUploadWorkspace: - """Test _prepare_and_upload_workspace function.""" - - def test_without_dependencies_or_workdir(self, mock_session): - """Test without dependencies or workdir.""" - result = _prepare_and_upload_workspace( - None, False, None, None, "s3://bucket", "kms-key", mock_session, None - ) - assert result is None - - @patch("sagemaker.core.remote_function.job.S3Uploader") - @patch("sagemaker.core.remote_function.job._tmpdir") - @patch("sagemaker.core.remote_function.job.shutil") - @patch("sagemaker.core.remote_function.job.copy_workdir") - @patch("os.mkdir") - @patch("os.path.isdir", return_value=False) - def test_with_workdir( - self, - mock_isdir, - mock_mkdir, - mock_copy, - mock_shutil, - mock_tmpdir, - mock_uploader, - mock_session, - ): - """Test with workdir.""" - mock_tmpdir.return_value.__enter__ = Mock(return_value="/tmp/test") - mock_tmpdir.return_value.__exit__ = Mock(return_value=False) - mock_shutil.make_archive.return_value = "/tmp/test/workspace.zip" - mock_uploader.upload.return_value = "s3://bucket/workspace.zip" - - result = _prepare_and_upload_workspace( - None, True, None, None, "s3://bucket", "kms-key", mock_session, None - ) - - assert result == "s3://bucket/workspace.zip" - - -class TestPrepareDependenciesAndPreExecutionScripts: - """Test _prepare_dependencies_and_pre_execution_scripts function.""" - - def test_without_dependencies_or_scripts(self, mock_session): - """Test without dependencies or scripts.""" - result = _prepare_dependencies_and_pre_execution_scripts( - None, None, None, "s3://bucket", "kms-key", mock_session, "/tmp" - ) - assert result is None - - @patch("sagemaker.core.workflow.utilities.load_step_compilation_context") - @patch("sagemaker.core.remote_function.job.shutil") - @patch("sagemaker.core.remote_function.job.S3Uploader") - def test_with_dependencies(self, mock_uploader, mock_shutil, mock_context, mock_session): - """Test with dependencies file.""" - mock_shutil.copy2.return_value = "/tmp/requirements.txt" - mock_uploader.upload.return_value = "s3://bucket/deps" - mock_context.return_value = Mock(step_name="step", pipeline_build_time="123") - - result = _prepare_dependencies_and_pre_execution_scripts( - "/path/to/requirements.txt", None, None, "s3://bucket", "kms-key", mock_session, "/tmp" - ) - - assert result == "s3://bucket/deps" - - @patch("sagemaker.core.workflow.utilities.load_step_compilation_context") - @patch("builtins.open", create=True) - @patch("sagemaker.core.remote_function.job.S3Uploader") - def test_with_pre_execution_commands( - self, mock_uploader, mock_open, mock_context, mock_session - ): - """Test with pre-execution commands.""" - mock_uploader.upload.return_value = "s3://bucket/scripts" - mock_context.return_value = Mock(step_name="step", pipeline_build_time="123") - - result = _prepare_dependencies_and_pre_execution_scripts( - None, ["echo test"], None, "s3://bucket", "kms-key", mock_session, "/tmp" - ) - - assert result == "s3://bucket/scripts" - - @patch("sagemaker.core.workflow.utilities.load_step_compilation_context") - @patch("sagemaker.core.remote_function.job.shutil") - @patch("sagemaker.core.remote_function.job.S3Uploader") - def test_with_pre_execution_script( - self, mock_uploader, mock_shutil, mock_context, mock_session - ): - """Test with pre-execution script.""" - mock_shutil.copy2.return_value = "/tmp/pre_exec.sh" - mock_uploader.upload.return_value = "s3://bucket/scripts" - mock_context.return_value = Mock(step_name="step", pipeline_build_time="123") - - result = _prepare_dependencies_and_pre_execution_scripts( - None, None, "/path/to/script.sh", "s3://bucket", "kms-key", mock_session, "/tmp" - ) - - assert result == "s3://bucket/scripts" - - -class TestPrepareAndUploadSparkDependentFiles: - """Test _prepare_and_upload_spark_dependent_files function.""" - - def test_without_spark_config(self, mock_session): - """Test without Spark config.""" - result = _prepare_and_upload_spark_dependent_files( - None, "s3://bucket", "kms-key", mock_session - ) - assert result == (None, None, None, None) - - @patch("sagemaker.core.remote_function.job._upload_spark_submit_deps") - @patch("sagemaker.core.remote_function.job._upload_serialized_spark_configuration") - def test_with_spark_config(self, mock_upload_config, mock_upload_deps, mock_session): - """Test with Spark config.""" - mock_upload_deps.return_value = "s3://bucket/deps" - mock_upload_config.return_value = "s3://bucket/config.json" - - spark_config = SparkConfig( - submit_jars=["test.jar"], - submit_py_files=["test.py"], - submit_files=["test.txt"], - configuration={"Classification": "spark-defaults", "Properties": {"key": "value"}}, - ) - - result = _prepare_and_upload_spark_dependent_files( - spark_config, "s3://bucket", "kms-key", mock_session - ) - - assert len(result) == 4 - - -class TestJobCompile: - """Test _Job.compile method.""" - - @patch("sagemaker.core.remote_function.job.StoredFunction") - @patch("sagemaker.core.remote_function.job._generate_input_data_config") - def test_compile_basic(self, mock_input_config, mock_stored_func, mock_session): - """Test basic compile.""" - mock_input_config.return_value = [] - mock_stored_func.return_value.save = Mock() - - job_settings = Mock() - job_settings.max_runtime_in_seconds = 3600 - job_settings.max_wait_time_in_seconds = None - job_settings.max_retry_attempts = 1 - job_settings.role = "arn:aws:iam::123456789012:role/test" - job_settings.tags = None - job_settings.s3_kms_key = None - job_settings.disable_output_compression = False - job_settings.volume_size = 30 - job_settings.instance_count = 1 - job_settings.instance_type = "ml.m5.xlarge" - job_settings.volume_kms_key = None - job_settings.keep_alive_period_in_seconds = None - job_settings.enable_network_isolation = False - job_settings.encrypt_inter_container_traffic = False - job_settings.vpc_config = None - job_settings.use_spot_instances = False - job_settings.environment_variables = {} - job_settings.image_uri = "test-image" - job_settings.sagemaker_session = mock_session - job_settings.use_torchrun = False - job_settings.use_mpirun = False - job_settings.nproc_per_node = None - job_settings.job_conda_env = None - job_settings.spark_config = None - job_settings.dependencies = None - - def test_func(): - pass - - result = _Job.compile(job_settings, "test-job", "s3://bucket", test_func, (), {}) - - assert result["TrainingJobName"] == "test-job" - assert result["RoleArn"] == "arn:aws:iam::123456789012:role/test" - - -class TestJobStart: - """Test _Job.start method.""" - - @patch("sagemaker.core.remote_function.job._Job.compile") - @patch("sagemaker.core.remote_function.job._Job._get_job_name") - def test_start(self, mock_get_name, mock_compile, mock_session): - """Test starting a job.""" - mock_get_name.return_value = "test-job" - mock_compile.return_value = { - "TrainingJobName": "test-job", - "Environment": {"REMOTE_FUNCTION_SECRET_KEY": "test-key"}, - } - - job_settings = Mock() - job_settings.s3_root_uri = "s3://bucket" - job_settings.sagemaker_session = mock_session - - def test_func(): - pass - - job = _Job.start(job_settings, test_func, (), {}) - - assert job.job_name == "test-job" - mock_session.sagemaker_client.create_training_job.assert_called_once() - - -class TestJobGetJobName: - """Test _Job._get_job_name method.""" - - def test_with_job_name_prefix(self, mock_session): - """Test with job_name_prefix.""" - job_settings = Mock() - job_settings.job_name_prefix = "my-job" - - def test_func(): - pass - - result = _Job._get_job_name(job_settings, test_func) - assert "my-job" in result - - def test_without_job_name_prefix(self, mock_session): - """Test without job_name_prefix.""" - job_settings = Mock() - job_settings.job_name_prefix = None - - def test_func(): - pass - - result = _Job._get_job_name(job_settings, test_func) - assert "test-func" in result - - def test_with_special_characters_in_func_name(self, mock_session): - """Test with special characters in function name.""" - job_settings = Mock() - job_settings.job_name_prefix = None - - def _test_func(): - pass - - result = _Job._get_job_name(job_settings, _test_func) - assert result.startswith("test-func") diff --git a/sagemaker-core/tests/unit/remote_function/test_job_comprehensive.py b/sagemaker-core/tests/unit/remote_function/test_job_comprehensive.py deleted file mode 100644 index 4069029685..0000000000 --- a/sagemaker-core/tests/unit/remote_function/test_job_comprehensive.py +++ /dev/null @@ -1,535 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Comprehensive unit tests for uncovered lines in sagemaker.core.remote_function.job module.""" -from __future__ import absolute_import - -import json -import os -import pytest -import sys -import tempfile -from unittest.mock import Mock, patch, MagicMock, mock_open -from io import BytesIO - -from sagemaker.core.remote_function.job import ( - _JobSettings, - _Job, - _update_job_request_with_checkpoint_config, - _convert_run_to_json, - _upload_spark_submit_deps, - _upload_serialized_spark_configuration, - _extend_mpirun_to_request, - _extend_torchrun_to_request, - _check_job_status, - _rule_statuses_changed, - _logs_init, - _get_initial_job_state, - LogState, - _RunInfo, -) -from sagemaker.core.remote_function.checkpoint_location import CheckpointLocation - - -@pytest.fixture -def mock_session(): - session = Mock() - session.boto_region_name = "us-west-2" - session.default_bucket.return_value = "test-bucket" - session.default_bucket_prefix = "prefix" - session.sagemaker_client = Mock() - session.boto_session = Mock() - session.sagemaker_config = {} - return session - - -class TestJobSettingsValidation: - """Test _JobSettings validation logic for uncovered lines.""" - - def test_spark_config_with_image_uri_raises_error(self, mock_session): - """Test lines 619-620: spark_config and image_uri validation.""" - from sagemaker.core.remote_function.spark_config import SparkConfig - - spark_config = SparkConfig() - with pytest.raises(ValueError, match="spark_config and image_uri cannot be specified"): - _JobSettings( - sagemaker_session=mock_session, - spark_config=spark_config, - image_uri="test-image", - instance_type="ml.m5.xlarge", - ) - - def test_spark_config_with_conda_env_raises_error(self, mock_session): - """Test lines 622-623: spark_config and job_conda_env validation.""" - from sagemaker.core.remote_function.spark_config import SparkConfig - - spark_config = SparkConfig() - with pytest.raises(ValueError, match="Remote Spark jobs do not support job_conda_env"): - _JobSettings( - sagemaker_session=mock_session, - spark_config=spark_config, - job_conda_env="test-env", - instance_type="ml.m5.xlarge", - ) - - def test_spark_config_with_auto_capture_raises_error(self, mock_session): - """Test lines 625-628: spark_config and auto_capture validation.""" - from sagemaker.core.remote_function.spark_config import SparkConfig - - spark_config = SparkConfig() - with pytest.raises(ValueError, match="Remote Spark jobs do not support automatically"): - _JobSettings( - sagemaker_session=mock_session, - spark_config=spark_config, - dependencies="auto_capture", - instance_type="ml.m5.xlarge", - ) - - def test_pre_execution_commands_and_script_raises_error(self, mock_session): - """Test lines 651-653: pre_execution validation.""" - with pytest.raises( - ValueError, match="Only one of pre_execution_commands or pre_execution_script" - ): - _JobSettings( - sagemaker_session=mock_session, - pre_execution_commands=["echo test"], - pre_execution_script="/path/to/script.sh", - instance_type="ml.m5.xlarge", - image_uri="test-image", - ) - - def test_instance_type_required(self, mock_session): - """Test lines 665-666: instance_type validation.""" - with pytest.raises(ValueError, match="instance_type is a required parameter"): - _JobSettings(sagemaker_session=mock_session, image_uri="test-image") - - @patch.dict(os.environ, {"SAGEMAKER_INTERNAL_IMAGE_URI": "custom-image"}) - def test_get_default_image_from_env(self, mock_session): - """Test lines 785-788: get default image from environment.""" - image = _JobSettings._get_default_image(mock_session) - assert image == "custom-image" - - def test_get_default_image_unsupported_python(self, mock_session): - """Test lines 792-795: unsupported Python version.""" - with patch.object(sys, "version_info", (3, 7, 0)): - with pytest.raises( - ValueError, match="Default image is supported only for Python versions" - ): - _JobSettings._get_default_image(mock_session) - - def test_get_default_spark_image_unsupported_python(self, mock_session): - """Test lines 815-817: unsupported Python for Spark.""" - with patch.object(sys, "version_info", (3, 8, 0)): - with pytest.raises( - ValueError, - match="SageMaker Spark image for remote job only supports Python version 3.9", - ): - _JobSettings._get_default_spark_image(mock_session) - - -class TestJobMethods: - """Test _Job class methods for uncovered lines.""" - - def test_from_describe_response(self, mock_session): - """Test lines 848-852: from_describe_response method.""" - response = { - "TrainingJobName": "test-job", - "OutputDataConfig": {"S3OutputPath": "s3://bucket/output"}, - "Environment": {"REMOTE_FUNCTION_SECRET_KEY": "test-key"}, - } - job = _Job.from_describe_response(response, mock_session) - assert job.job_name == "test-job" - assert job.s3_uri == "s3://bucket/output" - assert job.hmac_key == "test-key" - assert job._last_describe_response == response - - def test_describe_cached_completed(self, mock_session): - """Test lines 865-871: describe with cached completed job.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - job._last_describe_response = {"TrainingJobStatus": "Completed"} - - result = job.describe() - assert result["TrainingJobStatus"] == "Completed" - mock_session.sagemaker_client.describe_training_job.assert_not_called() - - def test_describe_cached_failed(self, mock_session): - """Test lines 865-871: describe with cached failed job.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - job._last_describe_response = {"TrainingJobStatus": "Failed"} - - result = job.describe() - assert result["TrainingJobStatus"] == "Failed" - mock_session.sagemaker_client.describe_training_job.assert_not_called() - - def test_describe_cached_stopped(self, mock_session): - """Test lines 865-871: describe with cached stopped job.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - job._last_describe_response = {"TrainingJobStatus": "Stopped"} - - result = job.describe() - assert result["TrainingJobStatus"] == "Stopped" - mock_session.sagemaker_client.describe_training_job.assert_not_called() - - def test_stop(self, mock_session): - """Test lines 886-887: stop method.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - job.stop() - mock_session.sagemaker_client.stop_training_job.assert_called_once_with( - TrainingJobName="test-job" - ) - - @patch("sagemaker.core.remote_function.job._logs_for_job") - def test_wait(self, mock_logs, mock_session): - """Test lines 889-903: wait method.""" - job = _Job("test-job", "s3://bucket/output", mock_session, "test-key") - mock_logs.return_value = {"TrainingJobStatus": "Completed"} - - job.wait(timeout=100) - mock_logs.assert_called_once_with( - sagemaker_session=mock_session, job_name="test-job", wait=True, timeout=100 - ) - assert job._last_describe_response["TrainingJobStatus"] == "Completed" - - -class TestCheckpointConfig: - """Test checkpoint configuration for uncovered lines.""" - - def test_checkpoint_in_args(self): - """Test lines 1219-1227: checkpoint in positional args.""" - checkpoint = CheckpointLocation(s3_uri="s3://bucket/checkpoint") - args = (checkpoint,) - kwargs = {} - request_dict = {} - - _update_job_request_with_checkpoint_config(args, kwargs, request_dict) - - assert "CheckpointConfig" in request_dict - assert request_dict["CheckpointConfig"]["S3Uri"] == "s3://bucket/checkpoint" - assert request_dict["CheckpointConfig"]["LocalPath"] == "/opt/ml/checkpoints/" - - def test_checkpoint_in_kwargs(self): - """Test lines 1228-1230: checkpoint in keyword args.""" - checkpoint = CheckpointLocation(s3_uri="s3://bucket/checkpoint") - args = () - kwargs = {"checkpoint": checkpoint} - request_dict = {} - - _update_job_request_with_checkpoint_config(args, kwargs, request_dict) - - assert "CheckpointConfig" in request_dict - assert request_dict["CheckpointConfig"]["S3Uri"] == "s3://bucket/checkpoint" - - def test_multiple_checkpoints_raises_error(self): - """Test lines 1237-1239: multiple checkpoints error.""" - checkpoint1 = CheckpointLocation(s3_uri="s3://bucket/checkpoint1") - checkpoint2 = CheckpointLocation(s3_uri="s3://bucket/checkpoint2") - args = (checkpoint1,) - kwargs = {"checkpoint": checkpoint2} - request_dict = {} - - with pytest.raises( - ValueError, match="cannot have more than one argument of type CheckpointLocation" - ): - _update_job_request_with_checkpoint_config(args, kwargs, request_dict) - - def test_no_checkpoint(self): - """Test lines 1232-1233: no checkpoint location.""" - args = ("arg1", "arg2") - kwargs = {"key": "value"} - request_dict = {} - - _update_job_request_with_checkpoint_config(args, kwargs, request_dict) - - assert "CheckpointConfig" not in request_dict - - -class TestConvertRunToJson: - """Test _convert_run_to_json for uncovered lines.""" - - def test_convert_run(self): - """Test lines 1276-1278: convert run to JSON.""" - mock_run = Mock() - mock_run.experiment_name = "test-experiment" - mock_run.run_name = "test-run" - - result = _convert_run_to_json(mock_run) - data = json.loads(result) - - assert data["experiment_name"] == "test-experiment" - assert data["run_name"] == "test-run" - - -class TestSparkDependencies: - """Test Spark dependency functions for uncovered lines.""" - - def test_upload_spark_config_none(self, mock_session): - """Test lines 1356: upload None Spark configuration.""" - result = _upload_serialized_spark_configuration( - "s3://bucket/base", "kms-key", None, mock_session - ) - assert result is None - - @patch("sagemaker.core.remote_function.job.S3Uploader") - def test_upload_spark_config(self, mock_uploader, mock_session): - """Test lines 1339-1356: upload Spark configuration.""" - config = {"spark.executor.memory": "4g"} - mock_uploader.upload_string_as_file_body = Mock() - - _upload_serialized_spark_configuration("s3://bucket/base", "kms-key", config, mock_session) - - mock_uploader.upload_string_as_file_body.assert_called_once() - - def test_upload_spark_deps_none(self, mock_session): - """Test lines 1379-1380: None dependencies.""" - result = _upload_spark_submit_deps( - None, "workspace", "s3://bucket", "kms-key", mock_session - ) - assert result is None - - def test_upload_spark_deps_s3_uri(self, mock_session): - """Test lines 1388-1389: S3 URI dependency.""" - deps = ["s3://bucket/dep.jar"] - result = _upload_spark_submit_deps( - deps, "workspace", "s3://bucket", "kms-key", mock_session - ) - assert "s3://bucket/dep.jar" in result - - def test_upload_spark_deps_s3a_uri(self, mock_session): - """Test lines 1388-1389: S3A URI dependency.""" - deps = ["s3a://bucket/dep.jar"] - result = _upload_spark_submit_deps( - deps, "workspace", "s3://bucket", "kms-key", mock_session - ) - assert "s3a://bucket/dep.jar" in result - - def test_upload_spark_deps_empty_workspace_raises_error(self, mock_session): - """Test lines 1382-1383: empty workspace validation.""" - deps = ["s3://bucket/dep.jar"] - with pytest.raises(ValueError, match="workspace_name or s3_base_uri may not be empty"): - _upload_spark_submit_deps(deps, "", "s3://bucket", "kms-key", mock_session) - - @patch("os.path.isfile", return_value=False) - def test_upload_spark_deps_invalid_file_raises_error(self, mock_isfile, mock_session): - """Test lines 1391-1392: invalid local file.""" - deps = ["/invalid/path.jar"] - with pytest.raises(ValueError, match="is not a valid local file"): - _upload_spark_submit_deps(deps, "workspace", "s3://bucket", "kms-key", mock_session) - - -class TestDistributedTraining: - """Test distributed training functions for uncovered lines.""" - - def test_extend_mpirun_no_mpirun(self, mock_session): - """Test lines 1441-1442: mpirun disabled.""" - job_settings = Mock() - job_settings.use_mpirun = False - request_dict = {"InputDataConfig": []} - - result = _extend_mpirun_to_request(request_dict, job_settings) - assert result == request_dict - - def test_extend_mpirun_single_instance(self, mock_session): - """Test lines 1444-1445: single instance.""" - job_settings = Mock() - job_settings.use_mpirun = True - job_settings.instance_count = 1 - request_dict = {"InputDataConfig": []} - - result = _extend_mpirun_to_request(request_dict, job_settings) - assert result == request_dict - - def test_extend_mpirun_multiple_instances(self, mock_session): - """Test lines 1447-1453: multiple instances.""" - job_settings = Mock() - job_settings.use_mpirun = True - job_settings.instance_count = 2 - request_dict = { - "InputDataConfig": [{"DataSource": {"S3DataSource": {"S3Uri": "s3://bucket/data"}}}] - } - - result = _extend_mpirun_to_request(request_dict, job_settings) - assert ( - result["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3DataDistributionType"] - == "FullyReplicated" - ) - - def test_extend_torchrun_no_torchrun(self, mock_session): - """Test lines 1506-1507: torchrun disabled.""" - job_settings = Mock() - job_settings.use_torchrun = False - request_dict = {"InputDataConfig": []} - - result = _extend_torchrun_to_request(request_dict, job_settings) - assert result == request_dict - - def test_extend_torchrun_single_instance(self, mock_session): - """Test lines 1524-1525: single instance.""" - job_settings = Mock() - job_settings.use_torchrun = True - job_settings.instance_count = 1 - request_dict = {"InputDataConfig": []} - - result = _extend_torchrun_to_request(request_dict, job_settings) - assert result == request_dict - - def test_extend_torchrun_multiple_instances(self, mock_session): - """Test lines 1527-1533: multiple instances.""" - job_settings = Mock() - job_settings.use_torchrun = True - job_settings.instance_count = 2 - request_dict = { - "InputDataConfig": [{"DataSource": {"S3DataSource": {"S3Uri": "s3://bucket/data"}}}] - } - - result = _extend_torchrun_to_request(request_dict, job_settings) - assert ( - result["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3DataDistributionType"] - == "FullyReplicated" - ) - - -class TestJobStatus: - """Test job status functions for uncovered lines.""" - - def test_check_job_status_completed(self): - """Test lines 1978-1979: completed status.""" - desc = {"TrainingJobStatus": "Completed"} - _check_job_status("test-job", desc, "TrainingJobStatus") - - def test_check_job_status_stopped(self): - """Test lines 1978-1986: stopped status.""" - desc = {"TrainingJobStatus": "Stopped"} - with patch("sagemaker.core.remote_function.job.logger") as mock_logger: - _check_job_status("test-job", desc, "TrainingJobStatus") - mock_logger.warning.assert_called_once() - - def test_check_job_status_failed(self): - """Test lines 1987-2011: failed status.""" - desc = {"TrainingJobStatus": "Failed", "FailureReason": "Test failure"} - from sagemaker.core import exceptions - - with pytest.raises(exceptions.UnexpectedStatusException): - _check_job_status("test-job", desc, "TrainingJobStatus") - - def test_check_job_status_capacity_error(self): - """Test lines 2002-2007: CapacityError.""" - desc = { - "TrainingJobStatus": "Failed", - "FailureReason": "CapacityError: Insufficient capacity", - } - from sagemaker.core import exceptions - - with pytest.raises(exceptions.CapacityError): - _check_job_status("test-job", desc, "TrainingJobStatus") - - -class TestRuleStatuses: - """Test rule status functions for uncovered lines.""" - - def test_rule_statuses_no_last(self): - """Test lines 2092-2093: no last statuses.""" - current = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "InProgress"}] - result = _rule_statuses_changed(current, None) - assert result is True - - def test_rule_statuses_changed(self): - """Test lines 2095-2098: changed status.""" - current = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "Completed"}] - last = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "InProgress"}] - result = _rule_statuses_changed(current, last) - assert result is True - - def test_rule_statuses_unchanged(self): - """Test lines 2100: unchanged status.""" - current = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "InProgress"}] - last = [{"RuleConfigurationName": "rule1", "RuleEvaluationStatus": "InProgress"}] - result = _rule_statuses_changed(current, last) - assert result is False - - -class TestLogsInit: - """Test _logs_init function for uncovered lines.""" - - def test_logs_init_training_job(self, mock_session): - """Test lines 2098-2105: training job.""" - description = {"ResourceConfig": {"InstanceCount": 2}} - result = _logs_init(mock_session.boto_session, description, "Training") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 2 - assert log_group == "/aws/sagemaker/TrainingJobs" - - def test_logs_init_training_job_instance_groups(self, mock_session): - """Test lines 2098-2103: training job with instance groups.""" - description = { - "ResourceConfig": {"InstanceGroups": [{"InstanceCount": 2}, {"InstanceCount": 3}]} - } - result = _logs_init(mock_session.boto_session, description, "Training") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 5 - - def test_logs_init_transform_job(self, mock_session): - """Test lines 2106-2107: transform job.""" - description = {"TransformResources": {"InstanceCount": 1}} - result = _logs_init(mock_session.boto_session, description, "Transform") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 1 - assert log_group == "/aws/sagemaker/TransformJobs" - - def test_logs_init_processing_job(self, mock_session): - """Test lines 2108-2109: processing job.""" - description = {"ProcessingResources": {"ClusterConfig": {"InstanceCount": 3}}} - result = _logs_init(mock_session.boto_session, description, "Processing") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 3 - assert log_group == "/aws/sagemaker/ProcessingJobs" - - def test_logs_init_automl_job(self, mock_session): - """Test lines 2110-2111: AutoML job.""" - description = {} - result = _logs_init(mock_session.boto_session, description, "AutoML") - instance_count, stream_names, positions, client, log_group, dot, color_wrap = result - assert instance_count == 0 - assert log_group == "/aws/sagemaker/AutoMLJobs" - - -class TestGetInitialJobState: - """Test _get_initial_job_state for uncovered lines.""" - - def test_completed_with_wait(self): - """Test lines 2021-2023: completed job with wait.""" - description = {"TrainingJobStatus": "Completed"} - state = _get_initial_job_state(description, "TrainingJobStatus", True) - assert state == LogState.COMPLETE - - def test_failed_with_wait(self): - """Test lines 2021-2023: failed job with wait.""" - description = {"TrainingJobStatus": "Failed"} - state = _get_initial_job_state(description, "TrainingJobStatus", True) - assert state == LogState.COMPLETE - - def test_stopped_with_wait(self): - """Test lines 2021-2023: stopped job with wait.""" - description = {"TrainingJobStatus": "Stopped"} - state = _get_initial_job_state(description, "TrainingJobStatus", True) - assert state == LogState.COMPLETE - - def test_in_progress_with_wait(self): - """Test lines 2022: in-progress job with wait.""" - description = {"TrainingJobStatus": "InProgress"} - state = _get_initial_job_state(description, "TrainingJobStatus", True) - assert state == LogState.TAILING - - def test_in_progress_without_wait(self): - """Test lines 2022: in-progress job without wait.""" - description = {"TrainingJobStatus": "InProgress"} - state = _get_initial_job_state(description, "TrainingJobStatus", False) - assert state == LogState.COMPLETE diff --git a/sagemaker-mlops/src/sagemaker/mlops/workflow/function_step.py b/sagemaker-mlops/src/sagemaker/mlops/workflow/function_step.py index 1f51612c59..ecdf1135a6 100644 --- a/sagemaker-mlops/src/sagemaker/mlops/workflow/function_step.py +++ b/sagemaker-mlops/src/sagemaker/mlops/workflow/function_step.py @@ -44,8 +44,8 @@ from sagemaker.core.common_utils import unique_name_from_base_uuid4, format_tags, Tags if TYPE_CHECKING: - from sagemaker.core.remote_function.spark_config import SparkConfig - from sagemaker.core.remote_function.job import _JobSettings + from sagemaker.train.remote_function.spark_config import SparkConfig + from sagemaker.train.remote_function.job import _JobSettings logger = logging.getLogger(__name__) @@ -83,11 +83,11 @@ def __init__( func_kwargs (dict): keyword arguments of the python function. **kwargs: Additional arguments to be passed to the `step` decorator. """ - from sagemaker.core.remote_function.core.pipeline_variables import ( + from sagemaker.train.remote_function.core.pipeline_variables import ( convert_pipeline_variables_to_pickleable, ) - from sagemaker.core.remote_function.core.serialization import CloudpickleSerializer - from sagemaker.core.remote_function.core.stored_function import _SerializedData + from sagemaker.train.remote_function.core.serialization import CloudpickleSerializer + from sagemaker.train.remote_function.core.stored_function import _SerializedData super(_FunctionStep, self).__init__( name, StepTypeEnum.TRAINING, display_name, description, depends_on, retry_policies @@ -151,7 +151,7 @@ def depends_on(self, depends_on: List[Union[str, "Step", StepOutput]]): def _job_settings(self) -> "_JobSettings": """Returns the job settings for the step.""" - from sagemaker.core.remote_function.job import _JobSettings + from sagemaker.train.remote_function.job import _JobSettings context = load_step_compilation_context() @@ -193,7 +193,7 @@ def _job_settings(self) -> "_JobSettings": @property def arguments(self) -> RequestType: """Generates the arguments dictionary that is used to call `create_training_job`.""" - from sagemaker.core.remote_function.job import _Job + from sagemaker.train.remote_function.job import _Job step_compilation_context = load_step_compilation_context() @@ -274,7 +274,7 @@ def expr(self) -> RequestType: def _to_json_get(self) -> JsonGet: """Expression structure for workflow service calls using JsonGet resolution.""" - from sagemaker.core.remote_function.core.stored_function import ( + from sagemaker.train.remote_function.core.stored_function import ( JSON_SERIALIZED_RESULT_KEY, JSON_RESULTS_FILE, ) @@ -547,7 +547,7 @@ def _step(func): raise ValueError("Auto Capture of dependencies is not supported for pipeline steps.") # avoid circular import - from sagemaker.core.remote_function.client import RemoteExecutor + from sagemaker.train.remote_function.client import RemoteExecutor @wraps(func) def wrapper(*args, **kwargs): diff --git a/sagemaker-mlops/src/sagemaker/mlops/workflow/pipeline.py b/sagemaker-mlops/src/sagemaker/mlops/workflow/pipeline.py index 30fbba3639..cf48464bbe 100644 --- a/sagemaker-mlops/src/sagemaker/mlops/workflow/pipeline.py +++ b/sagemaker-mlops/src/sagemaker/mlops/workflow/pipeline.py @@ -28,10 +28,10 @@ from sagemaker.core.local.local_session import LocalSession from sagemaker.core._studio import _append_project_tags from sagemaker.core.config.config_schema import PIPELINE_ROLE_ARN_PATH, PIPELINE_TAGS_PATH -from sagemaker.core.remote_function.core.serialization import deserialize_obj_from_s3 -from sagemaker.core.remote_function.core.stored_function import RESULTS_FOLDER -from sagemaker.core.remote_function.errors import RemoteFunctionError -from sagemaker.core.remote_function.job import JOBS_CONTAINER_ENTRYPOINT +from sagemaker.train.remote_function.core.serialization import deserialize_obj_from_s3 +from sagemaker.train.remote_function.core.stored_function import RESULTS_FOLDER +from sagemaker.train.remote_function.errors import RemoteFunctionError +from sagemaker.train.remote_function.job import JOBS_CONTAINER_ENTRYPOINT from sagemaker.core.s3 import s3_path_join from sagemaker.core.helper.session_helper import Session from sagemaker.core.common_utils import resolve_value_from_config, retry_with_backoff, format_tags, Tags diff --git a/sagemaker-mlops/tests/unit/workflow/test_pipeline.py b/sagemaker-mlops/tests/unit/workflow/test_pipeline.py index 55922a66ca..ebab1ef865 100644 --- a/sagemaker-mlops/tests/unit/workflow/test_pipeline.py +++ b/sagemaker-mlops/tests/unit/workflow/test_pipeline.py @@ -352,8 +352,8 @@ def test_get_function_step_result_wrong_container(mock_session): def test_get_function_step_result_incomplete_job(mock_session): from sagemaker.mlops.workflow.pipeline import get_function_step_result - from sagemaker.core.remote_function.job import JOBS_CONTAINER_ENTRYPOINT - from sagemaker.core.remote_function.errors import RemoteFunctionError + from sagemaker.train.remote_function.job import JOBS_CONTAINER_ENTRYPOINT + from sagemaker.train.remote_function.errors import RemoteFunctionError step_list = [{"StepName": "step1", "Metadata": {"TrainingJob": {"Arn": "arn:aws:sagemaker:us-west-2:123456789012:training-job/job"}}}] mock_session.describe_training_job.return_value = { @@ -369,7 +369,7 @@ def test_get_function_step_result_incomplete_job(mock_session): def test_get_function_step_result_success(mock_session): from sagemaker.mlops.workflow.pipeline import get_function_step_result - from sagemaker.core.remote_function.job import JOBS_CONTAINER_ENTRYPOINT + from sagemaker.train.remote_function.job import JOBS_CONTAINER_ENTRYPOINT step_list = [{"StepName": "step1", "Metadata": {"TrainingJob": {"Arn": "arn:aws:sagemaker:us-west-2:123456789012:training-job/job"}}}] mock_session.describe_training_job.return_value = { @@ -433,7 +433,7 @@ def test_pipeline_execution_result_waiter_error(mock_session): def test_pipeline_execution_result_terminal_failure(mock_session): from sagemaker.mlops.workflow.pipeline import _PipelineExecution from botocore.exceptions import WaiterError - from sagemaker.core.remote_function.job import JOBS_CONTAINER_ENTRYPOINT + from sagemaker.train.remote_function.job import JOBS_CONTAINER_ENTRYPOINT execution = _PipelineExecution(arn="arn:aws:sagemaker:us-west-2:123456789012:pipeline/test/execution/exec-id", sagemaker_session=mock_session) mock_session.sagemaker_client.list_pipeline_execution_steps.return_value = { @@ -454,7 +454,7 @@ def test_pipeline_execution_result_terminal_failure(mock_session): def test_get_function_step_result_obsolete_s3_path(mock_session): from sagemaker.mlops.workflow.pipeline import get_function_step_result - from sagemaker.core.remote_function.job import JOBS_CONTAINER_ENTRYPOINT + from sagemaker.train.remote_function.job import JOBS_CONTAINER_ENTRYPOINT step_list = [{"StepName": "step1", "Metadata": {"TrainingJob": {"Arn": "arn:aws:sagemaker:us-west-2:123456789012:training-job/job"}}}] mock_session.describe_training_job.return_value = { diff --git a/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py b/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py index 1c3016cf86..2533d6d799 100644 --- a/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py +++ b/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py @@ -134,7 +134,7 @@ def build(self): generate_secret_key, compute_hash, ) -from sagemaker.core.remote_function.core.serialization import _MetaData +from sagemaker.train.remote_function.core.serialization import _MetaData from sagemaker.serve.model_server.triton.config_template import CONFIG_TEMPLATE SPECULATIVE_DRAFT_MODEL = "/opt/ml/additional-model-data-sources" diff --git a/sagemaker-serve/src/sagemaker/serve/model_server/multi_model_server/prepare.py b/sagemaker-serve/src/sagemaker/serve/model_server/multi_model_server/prepare.py index 37ca745987..7882cea87f 100644 --- a/sagemaker-serve/src/sagemaker/serve/model_server/multi_model_server/prepare.py +++ b/sagemaker-serve/src/sagemaker/serve/model_server/multi_model_server/prepare.py @@ -29,7 +29,7 @@ generate_secret_key, compute_hash, ) -from sagemaker.core.remote_function.core.serialization import _MetaData +from sagemaker.train.remote_function.core.serialization import _MetaData logger = logging.getLogger(__name__) diff --git a/sagemaker-serve/src/sagemaker/serve/model_server/smd/prepare.py b/sagemaker-serve/src/sagemaker/serve/model_server/smd/prepare.py index b66de32bf7..410b1bc558 100644 --- a/sagemaker-serve/src/sagemaker/serve/model_server/smd/prepare.py +++ b/sagemaker-serve/src/sagemaker/serve/model_server/smd/prepare.py @@ -15,7 +15,7 @@ generate_secret_key, compute_hash, ) -from sagemaker.core.remote_function.core.serialization import _MetaData +from sagemaker.train.remote_function.core.serialization import _MetaData from sagemaker.serve.spec.inference_base import CustomOrchestrator, AsyncCustomOrchestrator diff --git a/sagemaker-serve/src/sagemaker/serve/model_server/tensorflow_serving/prepare.py b/sagemaker-serve/src/sagemaker/serve/model_server/tensorflow_serving/prepare.py index 3525cc9b4a..6c132e3f79 100644 --- a/sagemaker-serve/src/sagemaker/serve/model_server/tensorflow_serving/prepare.py +++ b/sagemaker-serve/src/sagemaker/serve/model_server/tensorflow_serving/prepare.py @@ -14,7 +14,7 @@ generate_secret_key, compute_hash, ) -from sagemaker.core.remote_function.core.serialization import _MetaData +from sagemaker.train.remote_function.core.serialization import _MetaData def prepare_for_tf_serving( diff --git a/sagemaker-serve/src/sagemaker/serve/model_server/torchserve/prepare.py b/sagemaker-serve/src/sagemaker/serve/model_server/torchserve/prepare.py index 988acf646d..8a159b29cf 100644 --- a/sagemaker-serve/src/sagemaker/serve/model_server/torchserve/prepare.py +++ b/sagemaker-serve/src/sagemaker/serve/model_server/torchserve/prepare.py @@ -17,7 +17,7 @@ compute_hash, ) from sagemaker.serve.validations.check_image_uri import is_1p_image_uri -from sagemaker.core.remote_function.core.serialization import _MetaData +from sagemaker.train.remote_function.core.serialization import _MetaData def prepare_for_torchserve( diff --git a/sagemaker-serve/src/sagemaker/serve/validations/check_integrity.py b/sagemaker-serve/src/sagemaker/serve/validations/check_integrity.py index 4363d8d6ed..4fe2737084 100644 --- a/sagemaker-serve/src/sagemaker/serve/validations/check_integrity.py +++ b/sagemaker-serve/src/sagemaker/serve/validations/check_integrity.py @@ -7,7 +7,7 @@ import os from pathlib import Path -from sagemaker.core.remote_function.core.serialization import _MetaData +from sagemaker.train.remote_function.core.serialization import _MetaData def generate_secret_key(nbytes: int = 32) -> str: diff --git a/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb b/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb deleted file mode 100644 index 5cb75f506c..0000000000 --- a/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb +++ /dev/null @@ -1,2817 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# SageMaker Benchmark Evaluation - Basic Usage\n", - "\n", - "This notebook demonstrates the basic user-facing flow for creating and managing benchmark evaluation jobs using the BenchmarkEvaluator with Jinja2 template-based pipeline generation." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 1: Discover Available Benchmarks\n", - "\n", - "Discover the benchmark properties and available options:\n", - "https://docs.aws.amazon.com/sagemaker/latest/dg/nova-model-evaluation.html" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n",
-       "<_Benchmark.MMLU: 'mmlu'>,\n",
-       "<_Benchmark.MMLU_PRO: 'mmlu_pro'>,\n",
-       "<_Benchmark.BBH: 'bbh'>,\n",
-       "<_Benchmark.GPQA: 'gpqa'>,\n",
-       "<_Benchmark.MATH: 'math'>,\n",
-       "<_Benchmark.STRONG_REJECT: 'strong_reject'>,\n",
-       "<_Benchmark.IFEVAL: 'ifeval'>,\n",
-       "<_Benchmark.GEN_QA: 'gen_qa'>,\n",
-       "<_Benchmark.MMMU: 'mmmu'>,\n",
-       "<_Benchmark.LLM_JUDGE: 'llm_judge'>,\n",
-       "<_Benchmark.INFERENCE_ONLY: 'inference_only'>\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225m_Benchmark.MMLU:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'mmlu'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MMLU_PRO: \u001b[0m\u001b[38;2;0;135;0m'mmlu_pro'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.BBH: \u001b[0m\u001b[38;2;0;135;0m'bbh'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.GPQA: \u001b[0m\u001b[38;2;0;135;0m'gpqa'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MATH: \u001b[0m\u001b[38;2;0;135;0m'math'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.STRONG_REJECT: \u001b[0m\u001b[38;2;0;135;0m'strong_reject'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.IFEVAL: \u001b[0m\u001b[38;2;0;135;0m'ifeval'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MMMU: \u001b[0m\u001b[38;2;0;135;0m'mmmu'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.LLM_JUDGE: \u001b[0m\u001b[38;2;0;135;0m'llm_judge'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.INFERENCE_ONLY: \u001b[0m\u001b[38;2;0;135;0m'inference_only'\u001b[0m\u001b[1m>\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n",
-       "'modality': 'Multi-Modal (image)',\n",
-       "'description': 'Custom Dataset Evaluation – Lets you supply your own dataset for benchmarking, comparing model outputs to reference answers with metrics such as ROUGE and BLEU. gen_qa supports image inference for models which have multimodal support.',\n",
-       "'metrics': ['all'],\n",
-       "'strategy': 'gen_qa',\n",
-       "'subtask_available': False,\n",
-       "'subtasks': None\n",
-       "}\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'modality'\u001b[0m: \u001b[38;2;0;135;0m'Multi-Modal \u001b[0m\u001b[1;38;2;0;135;0m(\u001b[0m\u001b[38;2;0;135;0mimage\u001b[0m\u001b[1;38;2;0;135;0m)\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'description'\u001b[0m: \u001b[38;2;0;135;0m'Custom Dataset Evaluation – Lets you supply your own dataset for benchmarking, comparing model outputs to reference answers with metrics such as ROUGE and BLEU. gen_qa supports image inference for models which have multimodal support.'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'metrics'\u001b[0m: \u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'all'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'subtask_available'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'subtasks'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from sagemaker.train.evaluate import get_benchmarks, get_benchmark_properties\n", - "from rich.pretty import pprint\n", - "\n", - "# Configure logging to show INFO messages\n", - "import logging\n", - "logging.basicConfig(\n", - " level=logging.INFO,\n", - " format='%(levelname)s - %(name)s - %(message)s'\n", - ")\n", - "\n", - "# Get available benchmarks\n", - "Benchmark = get_benchmarks()\n", - "pprint(list(Benchmark))\n", - "\n", - "# Print properties for a specific benchmark\n", - "pprint(get_benchmark_properties(benchmark=Benchmark.GEN_QA))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 2: Create BenchmarkEvaluator\n", - "\n", - "Create a BenchmarkEvaluator instance with the desired benchmark. The evaluator will use Jinja2 templates to render a complete pipeline definition.\n", - "\n", - "**Required Parameters:**\n", - "- `benchmark`: Benchmark type from the Benchmark enum\n", - "- `base_model`: Model ARN from SageMaker hub content\n", - "- `output_s3_location`: S3 location for evaluation outputs\n", - "- `mlflow_resource_arn`: MLflow tracking server ARN for experiment tracking\n", - "\n", - "**Optional Template Fields:**\n", - "These fields are used for template rendering. If not provided, defaults will be used:\n", - "- `model_package_group`: Model package group ARN\n", - "- `source_model_package`: Source model package ARN\n", - "- `dataset`: S3 URI of evaluation dataset\n", - "- `model_artifact`: ARN of model artifact for lineage tracking (auto-inferred from source_model_package)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:39:45] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:39:45]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=314173;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=126855;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved MLflow resource ARN:                                    base_evaluator.py:113\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/                      \n",
-       "                             mmlu-eval-experiment                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=480390;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=329695;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Model package group provided as ARN:                             base_evaluator.py:145\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa                      \n",
-       "                             mple-name-aovqo                                                                       \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Model package group provided as ARN: \u001b]8;id=572070;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=299487;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#145\u001b\\\u001b[2m145\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mple-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchMarkEvaluator(\n",
-       "region=None,\n",
-       "sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x13cd28e60>,\n",
-       "model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n",
-       "base_eval_name='gen-qa-eval-demo',\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n",
-       "mlflow_experiment_name=None,\n",
-       "mlflow_run_name=None,\n",
-       "networking=None,\n",
-       "kms_key_id=None,\n",
-       "model_package_group='arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo',\n",
-       "benchmark=<_Benchmark.GEN_QA: 'gen_qa'>,\n",
-       "subtasks=None,\n",
-       "dataset='s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n",
-       "evaluate_base_model=True\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchMarkEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x13cd28e60\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbenchmark\u001b[0m\u001b[39m=<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msubtasks\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;0;135;0mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from sagemaker.train.evaluate import BenchMarkEvaluator\n", - "\n", - "# Create evaluator with GEN_QA benchmark\n", - "# These values match our successfully tested configuration\n", - "evaluator = BenchMarkEvaluator(\n", - " benchmark=Benchmark.GEN_QA,\n", - " model=\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\",\n", - " s3_output_path=\"s3://mufi-test-serverless-smtj/eval/\",\n", - " mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment\",\n", - " dataset=\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\",\n", - " model_package_group=\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo\", # Optional inferred from model if model package\n", - " base_eval_name=\"gen-qa-eval-demo\",\n", - " # Note: sagemaker_session is optional and will be auto-created if not provided\n", - " # Note: region is optional and will be auto deduced using environment variables - SAGEMAKER_REGION, AWS_REGION\n", - ")\n", - "\n", - "pprint(evaluator)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
-       " in <module>:13                                                                                   \n",
-       "                                                                                                  \n",
-       "   10 # Create evaluator with GEN_QA benchmark                                                    \n",
-       "   11 # These values match our successfully tested configuration                                  \n",
-       "   12 evaluator = BenchMarkEvaluator(                                                             \n",
-       " 13 benchmark=Benchmark.GEN_QA,                                                             \n",
-       "   14 model=\"meta-textgeneration-llama-3-2-1b-instruct\",                                      \n",
-       "   15 s3_output_path=\"s3://mufi-test-serverless-smtj/eval/\",                                  \n",
-       "   16 mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server    \n",
-       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "NameError: name 'Benchmark' is not defined\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in :13 \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m10 \u001b[0m\u001b[2m# Create evaluator with GEN_QA benchmark\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m11 \u001b[0m\u001b[2m# These values match our successfully tested configuration\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m12 \u001b[0mevaluator = BenchMarkEvaluator( \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m13 \u001b[2m│ \u001b[0mbenchmark=\u001b[1;4mBenchmark\u001b[0m.GEN_QA, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m14 \u001b[0m\u001b[2m│ \u001b[0mmodel=\u001b[33m\"\u001b[0m\u001b[33mmeta-textgeneration-llama-3-2-1b-instruct\u001b[0m\u001b[33m\"\u001b[0m, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m15 \u001b[0m\u001b[2m│ \u001b[0ms3_output_path=\u001b[33m\"\u001b[0m\u001b[33ms3://mufi-test-serverless-smtj/eval/\u001b[0m\u001b[33m\"\u001b[0m, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m16 \u001b[0m\u001b[2m│ \u001b[0mmlflow_resource_arn=\u001b[33m\"\u001b[0m\u001b[33marn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", - "\u001b[1;91mNameError: \u001b[0mname \u001b[38;2;0;135;0m'Benchmark'\u001b[0m is not defined\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# # [Optional] BASE MODEL EVAL\n", - "\n", - "# from sagemaker.train.evaluate import BenchMarkEvaluator\n", - "\n", - "# # Create evaluator with GEN_QA benchmark\n", - "# # These values match our successfully tested configuration\n", - "# evaluator = BenchMarkEvaluator(\n", - "# benchmark=Benchmark.GEN_QA,\n", - "# model=\"meta-textgeneration-llama-3-2-1b-instruct\",\n", - "# s3_output_path=\"s3://mufi-test-serverless-smtj/eval/\",\n", - "# mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment\",\n", - "# dataset=\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\",\n", - "# # model_package_group=\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo\", # Optional inferred from model if model package\n", - "# base_eval_name=\"gen-qa-eval-demo\",\n", - "# # Note: sagemaker_session is optional and will be auto-created if not provided\n", - "# # Note: region is optional and will be auto deduced using environment variables - SAGEMAKER_REGION, AWS_REGION\n", - "# )\n", - "\n", - "# pprint(evaluator)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - botocore.credentials - Found credentials in shared credentials file: ~/.aws/credentials\n", - "INFO - sagemaker.modules.evaluate.base_evaluator - Model package group provided as ARN: arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models\n" - ] - }, - { - "data": { - "text/html": [ - "
BenchMarkEvaluator(\n",
-       "region='us-east-1',\n",
-       "sagemaker_session=<sagemaker_core.helper.session_helper.Session object at 0x356a03950>,\n",
-       "model='arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3',\n",
-       "base_eval_name='gen-qa-eval-demo',\n",
-       "s3_output_path='s3://mufi-test-serverless-iad/eval/',\n",
-       "mlflow_resource_arn='arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server',\n",
-       "mlflow_experiment_name=None,\n",
-       "mlflow_run_name=None,\n",
-       "networking=None,\n",
-       "kms_key_id=None,\n",
-       "model_package_group='arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models',\n",
-       "benchmark=<_Benchmark.GEN_QA: 'gen_qa'>,\n",
-       "subtasks=None,\n",
-       "dataset='s3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n",
-       "evaluate_base_model=True\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchMarkEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[38;2;0;135;0m'us-east-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker_core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x356a03950\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m's3://mufi-test-serverless-iad/eval/'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbenchmark\u001b[0m\u001b[39m=<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msubtasks\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;0;135;0mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# # [Optional] Nova testing IAD Prod\n", - "\n", - "# from sagemaker.train.evaluate import BenchMarkEvaluator\n", - "\n", - "# # Create evaluator with GEN_QA benchmark\n", - "# # These values match our successfully tested configuration\n", - "# evaluator = BenchMarkEvaluator(\n", - "# benchmark=Benchmark.GEN_QA,\n", - "# # model=\"arn:aws:sagemaker:us-east-1:052150106756:model-package/bgrv-nova-micro-sft-lora/1\",\n", - "# model=\"arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3\",\n", - "# s3_output_path=\"s3://mufi-test-serverless-iad/eval/\",\n", - "# mlflow_resource_arn=\"arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server\",\n", - "# dataset=\"s3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\",\n", - "# model_package_group=\"arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models\", # Optional inferred from model if model package\n", - "# base_eval_name=\"gen-qa-eval-demo\",\n", - "# region=\"us-east-1\",\n", - "# # Note: sagemaker_session is optional and will be auto-created if not provided\n", - "# # Note: region is optional and will be auto deduced using environment variables - SAGEMAKER_REGION, AWS_REGION\n", - "# )\n", - "\n", - "# pprint(evaluator)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Optionally update the hyperparameters" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:26:31] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:26:31]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=665742;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=28065;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching evaluation override parameters for hyperparameters benchmark_evaluator.py:495\n",
-       "                             property                                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for hyperparameters \u001b]8;id=668827;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=344195;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#495\u001b\\\u001b[2m495\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching hub content metadata for                                  recipe_utils.py:201\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=912465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=530916;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  No region provided. Using default region.                                 utils.py:340\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No region provided. Using default region. \u001b]8;id=483608;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=394176;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#340\u001b\\\u001b[2m340\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Runs on sagemaker us-west-2, region:us-west-2                             utils.py:354\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker us-west-\u001b[1;36m2\u001b[0m, region:us-west-\u001b[1;36m2\u001b[0m \u001b]8;id=127187;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=740445;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001b\\\u001b[2m354\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for evaluation recipe with Type='Evaluation' and         recipe_utils.py:221\n",
-       "                             EvaluationType='DeterministicEvaluation'                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=26417;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=309515;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Downloading override parameters from                               recipe_utils.py:249\n",
-       "                             s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta-                    \n",
-       "                             textgeneration-llama-3-2-1b-instruct-deterministic_override_params                    \n",
-       "                             _sm_jobs_v1.0.19.json                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=762738;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=1149;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n",
-       "'max_new_tokens': '8192',\n",
-       "'temperature': '0',\n",
-       "'top_k': '-1',\n",
-       "'top_p': '1.0',\n",
-       "'aggregation': '',\n",
-       "'postprocessing': 'False',\n",
-       "'max_model_len': '12000'\n",
-       "}\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pprint(evaluator.hyperparameters.to_dict())\n", - "\n", - "# optionally update hyperparameters\n", - "# evaluator.hyperparameters.temperature = \"0.1\"\n", - "\n", - "# optionally get more info on types, limits, defaults.\n", - "# evaluator.hyperparameters.get_info()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 3: Run Evaluation\n", - "\n", - "Start a benchmark evaluation job. The system will:\n", - "1. Build template context with all required parameters\n", - "2. Render the pipeline definition from `DETERMINISTIC_TEMPLATE` using Jinja2\n", - "3. Create or update the pipeline with the rendered definition\n", - "4. Start the pipeline execution with empty parameters (all values pre-substituted)\n", - "\n", - "**What happens during execution:**\n", - "- CreateEvaluationAction: Sets up lineage tracking\n", - "- EvaluateBaseModel & EvaluateCustomModel: Run in parallel as serverless training jobs\n", - "- AssociateLineage: Links evaluation results to lineage tracking" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:40:20] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=39435;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=899931;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Getting or creating artifact for source:                         base_evaluator.py:597\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=774478;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=222956;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for existing artifact for model package:               base_evaluator.py:459\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=672788;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=533927;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found existing artifact:                                         base_evaluator.py:468\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3                      \n",
-       "                             138877d772ec489bef                                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=555230;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=311641;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using resolved model_package_group ARN:                          base_evaluator.py:414\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa                      \n",
-       "                             mple-name-aovqo                                                                       \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using resolved model_package_group ARN: \u001b]8;id=350625;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=393598;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#414\u001b\\\u001b[2m414\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mple-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using ModelPackage - model_package_group_arn:               benchmark_evaluator.py:644\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-grou                           \n",
-       "                             p/example-name-aovqo                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=534430;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=895229;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#644\u001b\\\u001b[2m644\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-grou \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m p/example-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved model info - base_model_name:                      benchmark_evaluator.py:647\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct, base_model_arn:                            \n",
-       "                             arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublic                           \n",
-       "                             Hub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0,                           \n",
-       "                              source_model_package_arn:                                                            \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test                           \n",
-       "                             -finetuned-models-gamma/28                                                            \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=1084;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=849460;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#647\u001b\\\u001b[2m647\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublic \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m Hub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct/\u001b[1;36m1.10\u001b[0m.\u001b[1;36m0\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=537782;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=387290;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching evaluation override parameters for hyperparameters benchmark_evaluator.py:495\n",
-       "                             property                                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for hyperparameters \u001b]8;id=706064;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=284205;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#495\u001b\\\u001b[2m495\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching hub content metadata for                                  recipe_utils.py:201\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=502448;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=531984;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for evaluation recipe with Type='Evaluation' and         recipe_utils.py:221\n",
-       "                             EvaluationType='DeterministicEvaluation'                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=67072;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=119115;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Downloading override parameters from                               recipe_utils.py:249\n",
-       "                             s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta-                    \n",
-       "                             textgeneration-llama-3-2-1b-instruct-deterministic_override_params                    \n",
-       "                             _sm_jobs_v1.0.19.json                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=954396;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=959350;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:40:21] INFO     Using configured hyperparameters: {'max_new_tokens':        benchmark_evaluator.py:568\n",
-       "                             '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0',                            \n",
-       "                             'aggregation': '', 'postprocessing': 'False',                                         \n",
-       "                             'max_model_len': '12000'}                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using configured hyperparameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b]8;id=584498;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=126531;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#568\u001b\\\u001b[2m568\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using full template for ModelPackage                             base_evaluator.py:655\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=556396;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=773270;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved template parameters: {'role_arn':                       base_evaluator.py:693\n",
-       "                             'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn':                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment', 'mlflow_experiment_name': None,                               \n",
-       "                             'mlflow_run_name': None, 'model_package_group_arn':                                   \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex                      \n",
-       "                             ample-name-aovqo', 'source_model_package_arn':                                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28', 'base_model_arn':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0',                              \n",
-       "                             's3_output_path': 's3://mufi-test-serverless-smtj/eval/',                             \n",
-       "                             'dataset_artifact_arn':                                                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef', 'action_arn_prefix':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:action',                                    \n",
-       "                             'dataset_uri':                                                                        \n",
-       "                             's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'task':                        \n",
-       "                             'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all',                           \n",
-       "                             'subtask': '', 'pipeline_name':                                                       \n",
-       "                             'SagemakerEvaluation-Deterministic', 'evaluate_base_model':                           \n",
-       "                             True, 'max_new_tokens': '8192', 'temperature': '0', 'top_k':                          \n",
-       "                             '-1', 'top_p': '1.0', 'aggregation': '', 'postprocessing':                            \n",
-       "                             'False', 'max_model_len': '12000'}                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=970601;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=386360;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'task'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'evaluation_metric'\u001b[0m: \u001b[38;2;0;135;0m'all'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'subtask'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'SagemakerEvaluation-Deterministic'\u001b[0m, \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[3;38;2;0;135;0mTrue\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Rendered pipeline definition:                                    base_evaluator.py:702\n",
-       "                             {                                                                                     \n",
-       "                               \"Version\": \"2020-12-01\",                                                            \n",
-       "                               \"Metadata\": {},                                                                     \n",
-       "                               \"MlflowConfig\": {                                                                   \n",
-       "                                 \"MlflowResourceArn\":                                                              \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment\"                                                                \n",
-       "                               },                                                                                  \n",
-       "                               \"Parameters\": [],                                                                   \n",
-       "                               \"Steps\": [                                                                          \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"CreateEvaluationAction\",                                               \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Actions\": [                                                                  \n",
-       "                                       {                                                                           \n",
-       "                                         \"ActionName\": {                                                           \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ActionType\": \"Evaluation\",                                               \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\":                                                            \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\",                                                              \n",
-       "                                           \"SourceType\": \"ModelPackage\"                                            \n",
-       "                                         },                                                                        \n",
-       "                                         \"Properties\": {                                                           \n",
-       "                                           \"PipelineExecutionArn\": {                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           },                                                                      \n",
-       "                                           \"PipelineName\":                                                         \n",
-       "                             \"SagemakerEvaluation-Deterministic\"                                                   \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Contexts\": [                                                                 \n",
-       "                                       {                                                                           \n",
-       "                                         \"ContextName\": {                                                          \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ContextType\": \"PipelineExecution\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Action\"                                                        \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Context\"                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Arn\":                                                                  \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef\"                                                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateBaseModel\",                                                    \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex                      \n",
-       "                             ample-name-aovqo\",                                                                    \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     },                                                                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"BenchmarkEvaluation\"                                     \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"task\": \"gen_qa\",                                                           \n",
-       "                                       \"strategy\": \"gen_qa\",                                                       \n",
-       "                                       \"evaluation_metric\": \"all\",                                                 \n",
-       "                                       \"max_new_tokens\": \"8192\",                                                   \n",
-       "                                       \"temperature\": \"0\",                                                         \n",
-       "                                       \"top_k\": \"-1\",                                                              \n",
-       "                                       \"top_p\": \"1.0\",                                                             \n",
-       "                                       \"max_model_len\": \"12000\",                                                   \n",
-       "                                       \"aggregation\": \"\",                                                          \n",
-       "                                       \"postprocessing\": \"False\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\":                                                             \n",
-       "                             \"s3://mufi-test-serverless-smtj/eval/\",                                               \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"InputDataConfig\": [                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ChannelName\": \"train\",                                                   \n",
-       "                                         \"DataSource\": {                                                           \n",
-       "                                           \"S3DataSource\": {                                                       \n",
-       "                                             \"S3DataType\": \"S3Prefix\",                                             \n",
-       "                                             \"S3Uri\":                                                              \n",
-       "                             \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"                                 \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateCustomModel\",                                                  \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex                      \n",
-       "                             ample-name-aovqo\",                                                                    \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     },                                                                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"BenchmarkEvaluation\"                                     \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"task\": \"gen_qa\",                                                           \n",
-       "                                       \"strategy\": \"gen_qa\",                                                       \n",
-       "                                       \"evaluation_metric\": \"all\",                                                 \n",
-       "                                       \"max_new_tokens\": \"8192\",                                                   \n",
-       "                                       \"temperature\": \"0\",                                                         \n",
-       "                                       \"top_k\": \"-1\",                                                              \n",
-       "                                       \"top_p\": \"1.0\",                                                             \n",
-       "                                       \"max_model_len\": \"12000\",                                                   \n",
-       "                                       \"aggregation\": \"\",                                                          \n",
-       "                                       \"postprocessing\": \"False\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\":                                                             \n",
-       "                             \"s3://mufi-test-serverless-smtj/eval/\",                                               \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"InputDataConfig\": [                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ChannelName\": \"train\",                                                   \n",
-       "                                         \"DataSource\": {                                                           \n",
-       "                                           \"S3DataSource\": {                                                       \n",
-       "                                             \"S3DataType\": \"S3Prefix\",                                             \n",
-       "                                             \"S3Uri\":                                                              \n",
-       "                             \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"                                 \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"AssociateLineage\",                                                     \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"CreateEvaluationAction\"                                                      \n",
-       "                                   ],                                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Artifacts\": [                                                                \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"base-eval-report\"                                                  \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"EvaluationReport\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateBaseModel.OutputDataConfig.S3OutputPath\"                               \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"custom-eval-report\"                                                \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"EvaluationReport\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"                             \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"base-eval-report\"                                                \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"custom-eval-report\"                                              \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 }                                                                                 \n",
-       "                               ]                                                                                   \n",
-       "                             }                                                                                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=330131;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=262009;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerEvaluation-Deterministic\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateBaseModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"False\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"False\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"base-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateBaseModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"base-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found existing pipeline:                                              execution.py:199\n",
-       "                             SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2                 \n",
-       "                             9171c42                                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing pipeline: \u001b]8;id=588942;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=925025;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Updating pipeline                                                     execution.py:202\n",
-       "                             SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2                 \n",
-       "                             9171c42 with latest definition                                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline \u001b]8;id=746487;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=234699;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#202\u001b\\\u001b[2m202\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m with latest definition \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Updating pipeline resource.                                         resources.py:30306\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=908194;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=233215;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30306\u001b\\\u001b[2m30306\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:40:22] INFO     Successfully updated pipeline:                                        execution.py:208\n",
-       "                             SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2                 \n",
-       "                             9171c42                                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:22]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully updated pipeline: \u001b]8;id=321336;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=381496;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#208\u001b\\\u001b[2m208\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Starting pipeline execution: gen-qa-eval-demo-1764452422              execution.py:263\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: gen-qa-eval-demo-\u001b[1;36m1764452422\u001b[0m \u001b]8;id=359442;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=958972;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Pipeline execution started:                                           execution.py:274\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/9                 \n",
-       "                             5qr3e96dblb                                                                           \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=73999;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=223527;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b29171c42\u001b[0m/execution/9 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 5qr3e96dblb \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n",
-       "arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb',\n",
-       "name='gen-qa-eval-demo',\n",
-       "status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n",
-       "last_modified_time=datetime.datetime(2025, 11, 29, 13, 40, 22, 284000, tzinfo=tzlocal()),\n",
-       "eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "steps=[]\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m29\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m40\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m284000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.BENCHMARK:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'benchmark'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb\n", - "Initial Status: Executing\n" - ] - } - ], - "source": [ - "# Run evaluation with configured parameters\n", - "execution = evaluator.evaluate()\n", - "pprint(execution)\n", - "\n", - "print(f\"\\nPipeline Execution ARN: {execution.arn}\")\n", - "print(f\"Initial Status: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Alternative: Override Subtasks at Runtime\n", - "\n", - "For benchmarks with subtask support, you can override subtasks when calling evaluate():" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Override subtasks at evaluation time\n", - "# execution = mmlu_evaluator.evaluate(subtask=\"abstract_algebra\") # Single subtask\n", - "# execution = mmlu_evaluator.evaluate(subtask=[\"abstract_algebra\", \"anatomy\"]) # Multiple subtasks" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 4: Monitor Execution\n", - "\n", - "Check the job status and refresh as needed:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n",
-       "overall_status='Executing',\n",
-       "step_details=[\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateCustomModel',\n",
-       "│   │   │   status='Executing',\n",
-       "│   │   │   start_time='2025-11-29T13:26:38.084000-08:00',\n",
-       "│   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateBaseModel',\n",
-       "│   │   │   status='Executing',\n",
-       "│   │   │   start_time='2025-11-29T13:26:38.083000-08:00',\n",
-       "│   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-29T13:26:38.083000-08:00',\n",
-       "│   │   │   end_time='2025-11-29T13:26:42.759000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   )\n",
-       "],\n",
-       "failure_reason=None\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.084000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120de0b60>'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'EvaluateBaseModel'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Executing'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:42.759000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Step Details:\n", - " EvaluateCustomModel: Executing\n", - " EvaluateBaseModel: Executing\n", - " CreateEvaluationAction: Succeeded\n" - ] - } - ], - "source": [ - "# Refresh status\n", - "execution.refresh()\n", - "\n", - "# Display job status with step details\n", - "pprint(execution.status)\n", - "\n", - "# Display individual step statuses\n", - "if execution.status.step_details:\n", - " print(\"\\nStep Details:\")\n", - " for step in execution.status.step_details:\n", - " print(f\" {step.name}: {step.status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 5: Wait for Completion\n", - "\n", - "Wait for the pipeline to complete. This provides rich progress updates in Jupyter notebooks:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n",
-       "  Overall Status        Succeeded                                                                                \n",
-       "  Target Status         Succeeded                                                                                \n",
-       "  Elapsed Time          0.5s                                                                                     \n",
-       "                                                                                                                 \n",
-       " Pipeline Steps                                                                                                  \n",
-       "  Step Name                       Status           Duration                                                      \n",
-       "  AssociateLineage                Succeeded        3.3s                                                          \n",
-       "  EvaluateCustomModel             Succeeded        3714.0s                                                       \n",
-       "  EvaluateBaseModel               Succeeded        5366.2s                                                       \n",
-       "  CreateEvaluationAction          Succeeded        2.7s                                                          \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m0.5s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m3.3s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m3714.0s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateBaseModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m5366.2s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m2.7s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:20] INFO     Final Resource Status: Succeeded                                      execution.py:979\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=401306;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=749;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Final Status: Succeeded\n" - ] - } - ], - "source": [ - "# Wait for job completion with progress updates\n", - "# This will show a rich progress display in Jupyter\n", - "execution.wait(target_status=\"Succeeded\", poll=5, timeout=3600)\n", - "\n", - "print(f\"\\nFinal Status: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 6: View Results\n", - "\n", - "Display the evaluation results in a formatted table:\n", - "\n", - "Output Structure:\n", - "\n", - "Evaluation results are stored in S3:\n", - "\n", - "```\n", - "s3://your-bucket/output/\n", - "└── job_name/\n", - " └── output/\n", - " └── output.tar.gz\n", - "```\n", - "\n", - "Extract output.tar.gz to reveal:\n", - "\n", - "```\n", - "run_name/\n", - "├── eval_results/\n", - "│ ├── results_[timestamp].json\n", - "│ ├── inference_output.jsonl (for gen_qa)\n", - "│ └── details/\n", - "│ └── model/\n", - "│ └── /\n", - "│ └── details__#_.parquet\n", - "└── tensorboard_results/\n", - " └── eval/\n", - " └── events.out.tfevents.[timestamp]\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
's3://mufi-test-serverless-smtj/eval/'\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:25] INFO     S3 bucket: mufi-test-serverless-smtj, prefix: eval           show_results_utils.py:130\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:25]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=671086;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=908024;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#130\u001b\\\u001b[2m130\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted training job name:                                  show_results_utils.py:63\n",
-       "                             pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7 from                            \n",
-       "                             step: EvaluateCustomModel                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=813615;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=57499;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:26] INFO     Extracted training job name:                                  show_results_utils.py:63\n",
-       "                             pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI from                              \n",
-       "                             step: EvaluateBaseModel                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:26]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=745707;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=953308;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateBaseModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for results_*.json in                              show_results_utils.py:150\n",
-       "                             s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E                          \n",
-       "                             valuateCustomModel-F51y8F3Pg7/output/output/                                          \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=805603;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=739949;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModel-F51y8F3Pg7/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found results file:                                          show_results_utils.py:168\n",
-       "                             eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/o                          \n",
-       "                             utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct-                          \n",
-       "                             -or8pa/eval_results/results_2025-11-29T22-41-53.186048+00-00                          \n",
-       "                             .json                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=188825;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=667854;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -or8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T22-\u001b[1;36m41\u001b[0m-\u001b[1;36m53.186048\u001b[0m+\u001b[1;36m00-00\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.j\u001b[0mson \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for results_*.json in                              show_results_utils.py:150\n",
-       "                             s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E                          \n",
-       "                             valuateBaseModel-VA9YzcdIVI/output/output/                                            \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=270113;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844454;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateBaseModel-VA9YzcdIVI/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found results file:                                          show_results_utils.py:168\n",
-       "                             eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/out                          \n",
-       "                             put/output/eval-meta_textgeneration_llama_3_2_1b_instruct--o                          \n",
-       "                             r8pa/eval_results/results_2025-11-29T23-09-21.277725+00-00.j                          \n",
-       "                             son                                                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=221667;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=736866;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/out \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m put/output/eval-meta_textgeneration_llama_3_2_1b_instruct--o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m r8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T23-\u001b[1;36m09\u001b[0m-\u001b[1;36m21.277725\u001b[0m+\u001b[1;36m00-00.j\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m son \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using metrics from 'all' key (standard benchmark format)      show_results_utils.py:93\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from \u001b[38;2;0;135;0m'all'\u001b[0m key \u001b[1m(\u001b[0mstandard benchmark format\u001b[1m)\u001b[0m \u001b]8;id=431825;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=75452;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#93\u001b\\\u001b[2m93\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using metrics from 'all' key (standard benchmark format)      show_results_utils.py:93\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from \u001b[38;2;0;135;0m'all'\u001b[0m key \u001b[1m(\u001b[0mstandard benchmark format\u001b[1m)\u001b[0m \u001b]8;id=866976;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=697222;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#93\u001b\\\u001b[2m93\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                Custom Model Results                \n",
-       "╭────────────────────────────────┬─────────────────╮\n",
-       "│ Metric                                    Value │\n",
-       "├────────────────────────────────┼─────────────────┤\n",
-       "│ bleu                                     6.6928 │\n",
-       "│ bleu_stderr                              0.7801 │\n",
-       "│ em                                        1.23% │\n",
-       "│ em_stderr                                0.0018 │\n",
-       "│ f1                                       19.04% │\n",
-       "│ f1_score_quasi                           25.25% │\n",
-       "│ f1_score_quasi_stderr                    0.0049 │\n",
-       "│ f1_stderr                                0.0047 │\n",
-       "│ qem                                       2.16% │\n",
-       "│ qem_stderr                               0.0024 │\n",
-       "│ rouge1                                   25.69% │\n",
-       "│ rouge1_stderr                            0.0047 │\n",
-       "│ rouge2                                   19.09% │\n",
-       "│ rouge2_stderr                            0.0047 │\n",
-       "│ rougeL                                   25.02% │\n",
-       "│ rougeL_stderr                            0.0047 │\n",
-       "╰────────────────────────────────┴─────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;32mCustom Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;32m \u001b[0m\u001b[1;32mMetric \u001b[0m\u001b[1;32m \u001b[0m│\u001b[1;32m \u001b[0m\u001b[1;32m Value\u001b[0m\u001b[1;32m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7801\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.23%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0018\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.25%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.16%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0024\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.69%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.09%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.02%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                 Base Model Results                 \n",
-       "╭────────────────────────────────┬─────────────────╮\n",
-       "│ Metric                                    Value │\n",
-       "├────────────────────────────────┼─────────────────┤\n",
-       "│ bleu                                     6.6928 │\n",
-       "│ bleu_stderr                              0.7803 │\n",
-       "│ em                                        1.29% │\n",
-       "│ em_stderr                                0.0019 │\n",
-       "│ f1                                       19.09% │\n",
-       "│ f1_score_quasi                           25.22% │\n",
-       "│ f1_score_quasi_stderr                    0.0049 │\n",
-       "│ f1_stderr                                0.0047 │\n",
-       "│ qem                                       2.18% │\n",
-       "│ qem_stderr                               0.0024 │\n",
-       "│ rouge1                                   25.61% │\n",
-       "│ rouge1_stderr                            0.0047 │\n",
-       "│ rouge2                                   19.04% │\n",
-       "│ rouge2_stderr                            0.0047 │\n",
-       "│ rougeL                                   24.95% │\n",
-       "│ rougeL_stderr                            0.0047 │\n",
-       "╰────────────────────────────────┴─────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;33mBase Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;33m \u001b[0m\u001b[1;33mMetric \u001b[0m\u001b[1;33m \u001b[0m│\u001b[1;33m \u001b[0m\u001b[1;33m Value\u001b[0m\u001b[1;33m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7803\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.29%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0019\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.09%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.22%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.18%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0024\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.61%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 24.95%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "  📦 Full evaluation artifacts available at:                                                                     \n",
-       "                                                                                                                 \n",
-       "  Custom Model:                                                                                                  \n",
-       "    s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/output/output/Non  \n",
-       "  e/eval_results/                                                                                                \n",
-       "                                                                                                                 \n",
-       "  Base Model:                                                                                                    \n",
-       "    s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/output/output/None/  \n",
-       "  eval_results/                                                                                                  \n",
-       "                                                                                                                 \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;32mCustom Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/output/output/Non\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36me/eval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;33mBase Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/output/output/None/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36meval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pprint(execution.s3_output_path)\n", - "# Display results in a formatted table\n", - "execution.show_results()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 7: Retrieve an Existing Job\n", - "\n", - "You can retrieve and inspect any existing evaluation job:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:35:47] INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q:                                \n",
-       "                             s3://mufi-test-serverless-smtj/eval/                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:35:47]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=148252;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=588100;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n",
-       "arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/inlsexrd7jes',\n",
-       "name='inlsexrd7jes',\n",
-       "status=PipelineExecutionStatus(\n",
-       "│   │   overall_status='Executing',\n",
-       "│   │   step_details=[\n",
-       "│   │   │   StepDetail(\n",
-       "│   │   │   │   name='EvaluateCustomModel',\n",
-       "│   │   │   │   status='Executing',\n",
-       "│   │   │   │   start_time='2025-11-29T13:26:38.084000-08:00',\n",
-       "│   │   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n",
-       "│   │   │   │   display_name=None,\n",
-       "│   │   │   │   failure_reason=None\n",
-       "│   │   │   ),\n",
-       "│   │   │   StepDetail(\n",
-       "│   │   │   │   name='EvaluateBaseModel',\n",
-       "│   │   │   │   status='Executing',\n",
-       "│   │   │   │   start_time='2025-11-29T13:26:38.083000-08:00',\n",
-       "│   │   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n",
-       "│   │   │   │   display_name=None,\n",
-       "│   │   │   │   failure_reason=None\n",
-       "│   │   │   ),\n",
-       "│   │   │   StepDetail(\n",
-       "│   │   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   │   status='Succeeded',\n",
-       "│   │   │   │   start_time='2025-11-29T13:26:38.083000-08:00',\n",
-       "│   │   │   │   end_time='2025-11-29T13:26:42.759000-08:00',\n",
-       "│   │   │   │   display_name=None,\n",
-       "│   │   │   │   failure_reason=None\n",
-       "│   │   │   )\n",
-       "│   │   ],\n",
-       "│   │   failure_reason=None\n",
-       "),\n",
-       "last_modified_time=datetime.datetime(2025, 11, 29, 13, 26, 37, 300000, tzinfo=tzlocal()),\n",
-       "eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "steps=[]\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/inlsexrd7jes'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'inlsexrd7jes'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.084000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120de0b60>'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'EvaluateBaseModel'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Executing'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m''\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Succeeded'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:42.759000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;39m)\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;39m]\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m\u001b[39m=\u001b[0m\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m2025\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m11\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m29\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m13\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m26\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m37\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m300000\u001b[0m\u001b[39m, \u001b[0m\u001b[38;2;215;175;0mtzinfo\u001b[0m\u001b[39m=\u001b[0m\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m\u001b[39m=\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
-       " in <module>:22                                                                                   \n",
-       "                                                                                                  \n",
-       "   19 pprint(existing_execution)                                                                  \n",
-       "   20 print(f\"\\nStatus: {existing_execution.status.overall_status}\")                              \n",
-       "   21                                                                                             \n",
-       " 22 existing_execution.show_results()                                                           \n",
-       "   23                                                                                             \n",
-       "                                                                                                  \n",
-       " /Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/tele \n",
-       " metry_logging.py:175 in wrapper                                                                  \n",
-       "                                                                                                  \n",
-       "   172 │   │   │   │   │   \"sagemaker_session is not provided or not valid.\",                     \n",
-       "   173 │   │   │   │   │   func_name,                                                             \n",
-       "   174 │   │   │   │   )                                                                          \n",
-       " 175 │   │   │   │   return func(*args, **kwargs)                                               \n",
-       "   176 │   │                                                                                      \n",
-       "   177 │   │   return wrapper                                                                     \n",
-       "   178                                                                                            \n",
-       "                                                                                                  \n",
-       " /Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/exe \n",
-       " cution.py:1223 in show_results                                                                   \n",
-       "                                                                                                  \n",
-       "   1220 │   │   self.refresh()                                                                    \n",
-       "   1221 │   │                                                                                     \n",
-       "   1222 │   │   if self.status.overall_status != \"Succeeded\":                                     \n",
-       " 1223 │   │   │   raise ValueError(                                                             \n",
-       "   1224 │   │   │   │   f\"Cannot show results. Execution status is '{self.status.overall_status}  \n",
-       "   1225 │   │   │   │   f\"Results are only available after successful execution. \"                \n",
-       "   1226 │   │   │   │   f\"Use execution.wait() to wait for completion or check execution.status   \n",
-       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "ValueError: Cannot show results. Execution status is 'Executing'. Results are only available after successful \n",
-       "execution. Use execution.wait() to wait for completion or check execution.status for details.\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m22\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m19 \u001b[0mpprint(existing_execution) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m20 \u001b[0m\u001b[96mprint\u001b[0m(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33mStatus: \u001b[0m\u001b[33m{\u001b[0mexisting_execution.status.overall_status\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m21 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m22 \u001b[1;4mexisting_execution.show_results()\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m23 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2;33m/Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/\u001b[0m\u001b[1;33mtele\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[1;33mmetry_logging.py\u001b[0m:\u001b[94m175\u001b[0m in \u001b[92mwrapper\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m172 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[33m\"\u001b[0m\u001b[33msagemaker_session is not provided or not valid.\u001b[0m\u001b[33m\"\u001b[0m, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m173 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mfunc_name, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m174 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m175 \u001b[2m│ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[1;4mfunc(*args, **kwargs)\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m176 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m177 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m wrapper \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m178 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2;33m/Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/\u001b[0m\u001b[1;33mexe\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[1;33mcution.py\u001b[0m:\u001b[94m1223\u001b[0m in \u001b[92mshow_results\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1220 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m.refresh() \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1221 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1222 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.status.overall_status != \u001b[33m\"\u001b[0m\u001b[33mSucceeded\u001b[0m\u001b[33m\"\u001b[0m: \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m1223 \u001b[2m│ │ │ \u001b[0m\u001b[1;4;94mraise\u001b[0m\u001b[1;4m \u001b[0m\u001b[1;4;96mValueError\u001b[0m\u001b[1;4m(\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1224 \u001b[0m\u001b[1;2;4m│ │ │ │ \u001b[0m\u001b[1;4;33mf\u001b[0m\u001b[1;4;33m\"\u001b[0m\u001b[1;4;33mCannot show results. Execution status is \u001b[0m\u001b[1;4;33m'\u001b[0m\u001b[1;4;33m{\u001b[0m\u001b[1;4;96mself\u001b[0m\u001b[1;4m.status.overall_status\u001b[0m\u001b[1;4;33m}\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1225 \u001b[0m\u001b[1;2;4m│ │ │ │ \u001b[0m\u001b[1;4;33mf\u001b[0m\u001b[1;4;33m\"\u001b[0m\u001b[1;4;33mResults are only available after successful execution. \u001b[0m\u001b[1;4;33m\"\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1226 \u001b[0m\u001b[1;2;4m│ │ │ │ \u001b[0m\u001b[1;4;33mf\u001b[0m\u001b[1;4;33m\"\u001b[0m\u001b[1;4;33mUse execution.wait() to wait for completion or check execution.status \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", - "\u001b[1;91mValueError: \u001b[0mCannot show results. Execution status is \u001b[38;2;0;135;0m'Executing'\u001b[0m. Results are only available after successful \n", - "execution. Use \u001b[1;38;2;225;0;225mexecution.wait\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m to wait for completion or check execution.status for details.\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from sagemaker.train.evaluate import EvaluationPipelineExecution\n", - "from rich.pretty import pprint\n", - "\n", - "\n", - "# Get an existing job by ARN\n", - "# Replace with your actual pipeline execution ARN\n", - "existing_arn = \"arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/inlsexrd7jes\"\n", - "\n", - "# base model only example\n", - "# existing_arn = \"arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gdp9f4dbv2vi\"\n", - "existing_execution = EvaluationPipelineExecution.get(\n", - " arn=existing_arn,\n", - " region=\"us-west-2\"\n", - ")\n", - "\n", - "pprint(existing_execution)\n", - "print(f\"\\nStatus: {existing_execution.status.overall_status}\")\n", - "\n", - "existing_execution.show_results()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Getting or creating artifact for source: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - "INFO - sagemaker.modules.evaluate.base_evaluator - Searching for existing artifact for model package: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - "INFO - sagemaker.modules.evaluate.base_evaluator - Found existing artifact: arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3138877d772ec489bef\n", - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Resolved model info - base_model_name: meta-textgeneration-llama-3-2-1b-instruct, base_model_arn: arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0, source_model_package_arn: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Using configured hyperparameters: {'max_new_tokens': '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0', 'aggregation': '', 'postprocessing': 'False', 'max_model_len': '12000'}\n", - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Using DETERMINISTIC_TEMPLATE for ModelPackage\n", - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Resolved template parameters: {'role_arn': 'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn': 'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment', 'mlflow_experiment_name': None, 'mlflow_run_name': None, 'model_package_group_arn': 'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo', 'source_model_package_arn': 'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28', 'base_model_arn': 'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0', 'task': 'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all', 's3_output_path': 's3://mufi-test-serverless-smtj/eval/', 'dataset_uri': 's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'subtask': '', 'pipeline_name': 'SagemakerEvaluation-Deterministic', 'dataset_artifact_arn': 'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3138877d772ec489bef', 'action_arn_prefix': 'arn:aws:sagemaker:us-west-2:052150106756:action', 'evaluate_base_model': True, 'max_new_tokens': '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0', 'aggregation': '', 'postprocessing': 'False', 'max_model_len': '12000'}\n", - "INFO - sagemaker.modules.evaluate.execution - Found existing pipeline: SagemakerEvaluation-benchmark\n", - "INFO - sagemaker.modules.evaluate.execution - Updating pipeline SagemakerEvaluation-benchmark with latest definition for benchmark\n" - ] - }, - { - "data": { - "text/html": [ - "
[11/22/25 12:24:36] INFO     Updating pipeline resource.                                         resources.py:30485\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/22/25 12:24:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=707103;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=260368;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/resources.py#30485\u001b\\\u001b[2m30485\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker_core.main.resources - Updating pipeline resource.\n", - "INFO - sagemaker.modules.evaluate.execution - Successfully updated pipeline: SagemakerEvaluation-benchmark\n", - "INFO - sagemaker.modules.evaluate.execution - Starting pipeline execution: gen-qa-eval-demo-1763843077\n", - "INFO - sagemaker.modules.evaluate.execution - Pipeline execution started: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8\n" - ] - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n",
-       "arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8',\n",
-       "name='gen-qa-eval-demo',\n",
-       "status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n",
-       "last_modified_time=datetime.datetime(2025, 11, 22, 12, 24, 37, 828000, tzinfo=tzlocal()),\n",
-       "eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "steps=[]\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m12\u001b[0m, \u001b[1;36m24\u001b[0m, \u001b[1;36m37\u001b[0m, \u001b[1;36m828000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.BENCHMARK:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'benchmark'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8\n", - "Initial Status: Executing\n" - ] - } - ], - "source": [ - "# Run evaluation with configured parameters\n", - "execution = evaluator.evaluate()\n", - "pprint(execution)\n", - "\n", - "print(f\"\\nPipeline Execution ARN: {execution.arn}\")\n", - "print(f\"Initial Status: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 8: List All Benchmark Evaluations\n", - "\n", - "Retrieve all benchmark evaluation executions:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:41:19] INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7:                                \n",
-       "                             s3://mufi-test-serverless-smtj/eval/                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:41:19]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=166943;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=816278;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q:                                \n",
-       "                             s3://mufi-test-serverless-smtj/eval/                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=521868;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=351282;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 2 evaluation(s)\n", - "\n", - " 95qr3e96dblb: Executing\n", - " inlsexrd7jes: Executing\n" - ] - } - ], - "source": [ - "# Get all benchmark evaluations (returns iterator)\n", - "all_executions_iter = BenchMarkEvaluator.get_all(region=\"us-west-2\")\n", - "all_executions = list(all_executions_iter)\n", - "\n", - "print(f\"Found {len(all_executions)} evaluation(s)\\n\")\n", - "for exec in all_executions[:5]: # Show first 5\n", - " print(f\" {exec.name}: {exec.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 9: Stop a Running Job (Optional)\n", - "\n", - "You can stop a running evaluation if needed:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/shapes.py:2350: UserWarning: Field name \"schema\" in \"AutoMLSnowflakeDatasetDefinition\" shadows an attribute in parent \"Base\"\n", - " class AutoMLSnowflakeDatasetDefinition(Base):\n", - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/shapes.py:6372: UserWarning: Field name \"schema\" in \"SnowflakeDatasetDefinition\" shadows an attribute in parent \"Base\"\n", - " class SnowflakeDatasetDefinition(Base):\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
[11/22/25 18:32:01] WARNING  No boto3 session provided. Creating a new session.                        utils.py:339\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/22/25 18:32:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No boto3 session provided. Creating a new session. \u001b]8;id=549422;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=573139;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py#339\u001b\\\u001b[2m339\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  No config provided. Using default config.                                 utils.py:347\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No config provided. Using default config. \u001b]8;id=278829;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=978800;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py#347\u001b\\\u001b[2m347\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Succeeded\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "AWS service error when stopping pipeline execution: Pipeline execution with ARN arn:aws:sagemaker:us-west-2:052150106756:pipeline/sagemakerevaluation-benchmark/execution/7rr30o7c2qfb status 'Succeeded'. Only pipelines with 'Executing' status can be stopped.\n" - ] - } - ], - "source": [ - "# Uncomment to stop the job\n", - "# existing_execution.stop()\n", - "# print(f\"Execution stopped. Status: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Understanding the Pipeline Structure\n", - "\n", - "The rendered pipeline definition includes:\n", - "\n", - "**4 Steps:**\n", - "1. **CreateEvaluationAction** (Lineage): Sets up tracking\n", - "2. **EvaluateBaseModel** (Training): Evaluates base model\n", - "3. **EvaluateCustomModel** (Training): Evaluates custom model\n", - "4. **AssociateLineage** (Lineage): Links results\n", - "\n", - "**Key Features:**\n", - "- Template-based: Uses Jinja2 for flexible pipeline generation\n", - "- Parallel execution: Base and custom models evaluated simultaneously\n", - "- Serverless: No need to manage compute resources\n", - "- MLflow integration: Automatic experiment tracking\n", - "- Lineage tracking: Full traceability of evaluation artifacts\n", - "\n", - "**Typical Execution Time:**\n", - "- Total: ~10-12 minutes\n", - "- Downloading phase: ~5-7 minutes (model and dataset)\n", - "- Training phase: ~3-5 minutes (running evaluation)\n", - "- Lineage steps: ~2-4 seconds each" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.12" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb b/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb deleted file mode 100644 index 6cf049cb79..0000000000 --- a/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb +++ /dev/null @@ -1,1842 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# SageMaker Custom Scorer Evaluation - Demo\n", - "\n", - "This notebook demonstrates how to use the CustomScorerEvaluator to evaluate models with custom evaluator functions." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup\n", - "\n", - "Import necessary modules." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from sagemaker.train.evaluate import CustomScorerEvaluator\n", - "from rich.pretty import pprint\n", - "\n", - "# Configure logging to show INFO messages\n", - "import logging\n", - "logging.basicConfig(\n", - " level=logging.INFO,\n", - " format='%(levelname)s - %(name)s - %(message)s'\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Configure Evaluation Parameters\n", - "\n", - "Set up the parameters for your custom scorer evaluation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Configuration:\n", - " Evaluator: arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1\n", - " Dataset: s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\n", - " Base Model: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - " Output Location: s3://mufi-test-serverless-smtj/eval/\n" - ] - } - ], - "source": [ - "# Evaluator ARN (custom evaluator from AI Registry)\n", - "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/00-goga-qa-evaluation/1.0.0\"\n", - "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/nikmehta-reward-function/1.0.0\"\n", - "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/eval-lambda-test/0.0.1\"\n", - "evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1\"\n", - "\n", - "# Dataset - can be S3 URI or AIRegistry DataSet ARN\n", - "dataset = \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\n", - "\n", - "# Base model - can be:\n", - "# 1. Model package ARN: \"arn:aws:sagemaker:region:account:model-package/name/version\"\n", - "# 2. JumpStart model ID: \"llama-3-2-1b-instruct\" [Evaluation with Base Model Only is yet to be implemented/tested - Not Working currently]\n", - "base_model = \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\"\n", - "\n", - "# S3 location for outputs\n", - "s3_output_path = \"s3://mufi-test-serverless-smtj/eval/\"\n", - "\n", - "# Optional: MLflow tracking server ARN\n", - "mlflow_resource_arn = \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment\"\n", - "\n", - "print(\"Configuration:\")\n", - "print(f\" Evaluator: {evaluator_arn}\")\n", - "print(f\" Dataset: {dataset}\")\n", - "print(f\" Base Model: {base_model}\")\n", - "print(f\" Output Location: {s3_output_path}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create CustomScorerEvaluator Instance\n", - "\n", - "Instantiate the evaluator with your configuration. The evaluator can accept:\n", - "- **Custom Evaluator ARN** (string): Points to your custom evaluator in AI Registry\n", - "- **Built-in Metric** (string or enum): Use preset metrics like \"code_executions\", \"math_answers\", etc.\n", - "- **Evaluator Object**: A sagemaker.ai_registry.evaluator.Evaluator instance" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:33] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:33]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=639873;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=963387;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved MLflow resource ARN:                                    base_evaluator.py:113\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/                      \n",
-       "                             mmlu-eval-experiment                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=342593;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=318918;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "✓ CustomScorerEvaluator created successfully\n" - ] - }, - { - "data": { - "text/html": [ - "
CustomScorerEvaluator(\n",
-       "region=None,\n",
-       "sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x116ae9f40>,\n",
-       "model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n",
-       "base_eval_name='eval-meta-1b49b716',\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n",
-       "mlflow_experiment_name=None,\n",
-       "mlflow_run_name=None,\n",
-       "networking=None,\n",
-       "kms_key_id=None,\n",
-       "model_package_group=None,\n",
-       "evaluator='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1',\n",
-       "dataset='s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n",
-       "evaluate_base_model=False\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mCustomScorerEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x116ae9f40\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-1b49b716'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluator\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;215;0;0mFalse\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Create evaluator with custom evaluator ARN\n", - "evaluator = CustomScorerEvaluator(\n", - " evaluator=evaluator_arn, # Custom evaluator ARN\n", - " dataset=dataset,\n", - " model=base_model,\n", - " s3_output_path=s3_output_path,\n", - " mlflow_resource_arn=mlflow_resource_arn,\n", - " # model_package_group=\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/Demo-test-deb-2\", \n", - " evaluate_base_model=False # Set to True to also evaluate the base model\n", - ")\n", - "\n", - "print(\"\\n✓ CustomScorerEvaluator created successfully\")\n", - "pprint(evaluator)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Optionally update the hyperparameters" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:38] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:38]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=848286;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=998219;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching evaluation override parameters for             custom_scorer_evaluator.py:236\n",
-       "                             hyperparameters property                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for \u001b]8;id=20210;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=113368;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#236\u001b\\\u001b[2m236\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m hyperparameters property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching hub content metadata for                                  recipe_utils.py:201\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=402391;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=385188;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  No region provided. Using default region.                                 utils.py:340\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No region provided. Using default region. \u001b]8;id=442028;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=947914;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#340\u001b\\\u001b[2m340\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Runs on sagemaker us-west-2, region:us-west-2                             utils.py:354\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker us-west-\u001b[1;36m2\u001b[0m, region:us-west-\u001b[1;36m2\u001b[0m \u001b]8;id=708289;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=968385;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001b\\\u001b[2m354\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for evaluation recipe with Type='Evaluation' and         recipe_utils.py:221\n",
-       "                             EvaluationType='DeterministicEvaluation'                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=711157;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=750371;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Downloading override parameters from                               recipe_utils.py:249\n",
-       "                             s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta-                    \n",
-       "                             textgeneration-llama-3-2-1b-instruct-deterministic_override_params                    \n",
-       "                             _sm_jobs_v1.0.19.json                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=762518;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=755839;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n",
-       "'max_new_tokens': '8192',\n",
-       "'temperature': '0',\n",
-       "'top_k': '-1',\n",
-       "'top_p': '1.0',\n",
-       "'aggregation': '',\n",
-       "'postprocessing': 'False',\n",
-       "'max_model_len': '12000'\n",
-       "}\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pprint(evaluator.hyperparameters.to_dict())\n", - "\n", - "# optionally update hyperparameters\n", - "# evaluator.hyperparameters.temperature = \"0.1\"\n", - "\n", - "# optionally get more info on types, limits, defaults.\n", - "# evaluator.hyperparameters.get_info()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Alternative: Using Built-in Metrics\n", - "\n", - "Instead of a custom evaluator ARN, you can use built-in metrics:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Example with built-in metrics (commented out)\n", - "# from sagemaker.train.evaluate import get_builtin_metrics\n", - "# \n", - "# BuiltInMetric = get_builtin_metrics()\n", - "# \n", - "# evaluator_builtin = CustomScorerEvaluator(\n", - "# evaluator=BuiltInMetric.PRIME_MATH, # Or use string: \"prime_math\"\n", - "# dataset=dataset,\n", - "# base_model=base_model,\n", - "# s3_output_path=s3_output_path\n", - "# )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Start Evaluation\n", - "\n", - "Call `evaluate()` to start the evaluation job. This will:\n", - "1. Create or update the evaluation pipeline\n", - "2. Start a pipeline execution\n", - "3. Return an `EvaluationPipelineExecution` object for monitoring" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:43] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:43]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=201476;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=125527;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Getting or creating artifact for source:                         base_evaluator.py:597\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=336129;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=429516;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for existing artifact for model package:               base_evaluator.py:459\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=916341;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=92767;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found existing artifact:                                         base_evaluator.py:468\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3                      \n",
-       "                             138877d772ec489bef                                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=110957;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=865654;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Inferred model package group ARN:                                base_evaluator.py:386\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes                      \n",
-       "                             t-finetuned-models-gamma from                                                         \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Inferred model package group ARN: \u001b]8;id=126121;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=198580;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#386\u001b\\\u001b[2m386\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Automatically inferred model_package_group:                      base_evaluator.py:421\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes                      \n",
-       "                             t-finetuned-models-gamma                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Automatically inferred model_package_group: \u001b]8;id=183930;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=417470;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using ModelPackage - model_package_group_arn:           custom_scorer_evaluator.py:421\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-                               \n",
-       "                             group/test-finetuned-models-gamma                                                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=191140;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=51752;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m group/test-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved model info - base_model_name:                  custom_scorer_evaluator.py:424\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct,                                            \n",
-       "                             base_model_arn:                                                                       \n",
-       "                             arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPu                               \n",
-       "                             blicHub/Model/meta-textgeneration-llama-3-2-1b-instruct                               \n",
-       "                             /1.10.0, source_model_package_arn:                                                    \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/                               \n",
-       "                             test-finetuned-models-gamma/28                                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=359160;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=935533;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#424\u001b\\\u001b[2m424\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPu \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m blicHub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m /\u001b[1;36m1.10\u001b[0m.\u001b[1;36m0\u001b[0m, source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m test-finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=189431;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=22751;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using configured hyperparameters: {'max_new_tokens':    custom_scorer_evaluator.py:299\n",
-       "                             '8192', 'temperature': '0', 'top_k': '-1', 'top_p':                                   \n",
-       "                             '1.0', 'aggregation': '', 'postprocessing': 'False',                                  \n",
-       "                             'max_model_len': '12000'}                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using configured hyperparameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b]8;id=536279;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=194605;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#299\u001b\\\u001b[2m299\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using full template for ModelPackage                             base_evaluator.py:655\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=164880;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=880373;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:42:44] INFO     Resolved template parameters: {'role_arn':                       base_evaluator.py:693\n",
-       "                             'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn':                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment', 'mlflow_experiment_name': None,                               \n",
-       "                             'mlflow_run_name': None, 'model_package_group_arn':                                   \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma', 'source_model_package_arn':                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28', 'base_model_arn':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0',                              \n",
-       "                             's3_output_path': 's3://mufi-test-serverless-smtj/eval/',                             \n",
-       "                             'dataset_artifact_arn':                                                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef', 'action_arn_prefix':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:action',                                    \n",
-       "                             'dataset_uri':                                                                        \n",
-       "                             's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'task':                        \n",
-       "                             'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all',                           \n",
-       "                             'pipeline_name': 'SagemakerEvaluation-Deterministic',                                 \n",
-       "                             'evaluate_base_model': False, 'evaluator_arn':                                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW                      \n",
-       "                             PZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t                      \n",
-       "                             est/0.0.1', 'max_new_tokens': '8192', 'temperature': '0',                             \n",
-       "                             'top_k': '-1', 'top_p': '1.0', 'aggregation': 'mean',                                 \n",
-       "                             'postprocessing': 'True', 'max_model_len': '12000'}                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:44]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=863350;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=151185;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'task'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'evaluation_metric'\u001b[0m: \u001b[38;2;0;135;0m'all'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[38;2;0;135;0m'SagemakerEvaluation-Deterministic'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m, \u001b[38;2;0;135;0m'evaluator_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mest/0.0.1'\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m'mean'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'True'\u001b[0m, \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Rendered pipeline definition:                                    base_evaluator.py:702\n",
-       "                             {                                                                                     \n",
-       "                               \"Version\": \"2020-12-01\",                                                            \n",
-       "                               \"Metadata\": {},                                                                     \n",
-       "                               \"MlflowConfig\": {                                                                   \n",
-       "                                 \"MlflowResourceArn\":                                                              \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment\"                                                                \n",
-       "                               },                                                                                  \n",
-       "                               \"Parameters\": [],                                                                   \n",
-       "                               \"Steps\": [                                                                          \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"CreateEvaluationAction\",                                               \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Actions\": [                                                                  \n",
-       "                                       {                                                                           \n",
-       "                                         \"ActionName\": {                                                           \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ActionType\": \"Evaluation\",                                               \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\":                                                            \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\",                                                              \n",
-       "                                           \"SourceType\": \"ModelPackage\"                                            \n",
-       "                                         },                                                                        \n",
-       "                                         \"Properties\": {                                                           \n",
-       "                                           \"PipelineExecutionArn\": {                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           },                                                                      \n",
-       "                                           \"PipelineName\":                                                         \n",
-       "                             \"SagemakerEvaluation-Deterministic\"                                                   \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Contexts\": [                                                                 \n",
-       "                                       {                                                                           \n",
-       "                                         \"ContextName\": {                                                          \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ContextType\": \"PipelineExecution\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Action\"                                                        \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Context\"                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Arn\":                                                                  \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef\"                                                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateCustomModel\",                                                  \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma\",                                                           \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     },                                                                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"CustomScorerEvaluation\",                                 \n",
-       "                                       \"EvaluatorArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW                      \n",
-       "                             PZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t                      \n",
-       "                             est/0.0.1\"                                                                            \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"task\": \"gen_qa\",                                                           \n",
-       "                                       \"strategy\": \"gen_qa\",                                                       \n",
-       "                                       \"evaluation_metric\": \"all\",                                                 \n",
-       "                                       \"max_new_tokens\": \"8192\",                                                   \n",
-       "                                       \"temperature\": \"0\",                                                         \n",
-       "                                       \"top_k\": \"-1\",                                                              \n",
-       "                                       \"top_p\": \"1.0\",                                                             \n",
-       "                                       \"max_model_len\": \"12000\",                                                   \n",
-       "                                       \"aggregation\": \"mean\",                                                      \n",
-       "                                       \"postprocessing\": \"True\"                                                    \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\":                                                             \n",
-       "                             \"s3://mufi-test-serverless-smtj/eval/\",                                               \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"InputDataConfig\": [                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ChannelName\": \"train\",                                                   \n",
-       "                                         \"DataSource\": {                                                           \n",
-       "                                           \"S3DataSource\": {                                                       \n",
-       "                                             \"S3DataType\": \"S3Prefix\",                                             \n",
-       "                                             \"S3Uri\":                                                              \n",
-       "                             \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"                                 \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"AssociateLineage\",                                                     \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"CreateEvaluationAction\"                                                      \n",
-       "                                   ],                                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Artifacts\": [                                                                \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"custom-eval-report\"                                                \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"EvaluationReport\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"                             \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"custom-eval-report\"                                              \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 }                                                                                 \n",
-       "                               ]                                                                                   \n",
-       "                             }                                                                                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=395506;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=123517;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerEvaluation-Deterministic\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomScorerEvaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluatorArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mest/0.0.1\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"mean\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"True\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     No existing pipeline found with prefix                                execution.py:212\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation, creating new one                          \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m No existing pipeline found with prefix \u001b]8;id=437465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=501901;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#212\u001b\\\u001b[2m212\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation, creating new one \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Creating new pipeline:                                                 execution.py:57\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82                \n",
-       "                             3cbe579c3                                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating new pipeline: \u001b]8;id=91501;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=923226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#57\u001b\\\u001b[2m57\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Creating pipeline resource.                                         resources.py:30147\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating pipeline resource. \u001b]8;id=877192;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=410393;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30147\u001b\\\u001b[2m30147\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Successfully created pipeline:                                         execution.py:76\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82                \n",
-       "                             3cbe579c3                                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully created pipeline: \u001b]8;id=802515;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=256656;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#76\u001b\\\u001b[2m76\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Waiting for pipeline                                                   execution.py:79\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82                \n",
-       "                             3cbe579c3 to be ready...                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Waiting for pipeline \u001b]8;id=984002;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=40351;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#79\u001b\\\u001b[2m79\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m to be ready\u001b[33m...\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/rich/live.py:231: UserWarning: \n",
-       "install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/rich/live.py:231: UserWarning: \n", - "install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Final Resource Status: Active                                       resources.py:30410\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: \u001b[1mActive\u001b[0m \u001b]8;id=750224;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=46929;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30410\u001b\\\u001b[2m30410\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
                    INFO     Pipeline                                                               execution.py:82\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82                \n",
-       "                             3cbe579c3 is now active and ready for execution                                       \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline \u001b]8;id=674167;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=265281;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#82\u001b\\\u001b[2m82\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m is now active and ready for execution \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Starting pipeline execution: eval-meta-1b49b716-1764452564            execution.py:263\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: eval-meta-1b49b716-\u001b[1;36m1764452564\u001b[0m \u001b]8;id=27465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=541837;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:42:45] INFO     Pipeline execution started:                                           execution.py:274\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e823cbe579c3/executio                 \n",
-       "                             n/u2q2dl1w5aiq                                                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:45]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=368377;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=144012;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e823cbe579c3\u001b[0m/executio \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m n/u2q2dl1w5aiq \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "✓ Evaluation execution started successfully!\n", - " Execution Name: eval-meta-1b49b716\n", - " Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e823cbe579c3/execution/u2q2dl1w5aiq\n", - " Status: Executing\n" - ] - } - ], - "source": [ - "# Start evaluation\n", - "execution = evaluator.evaluate()\n", - "\n", - "print(\"\\n✓ Evaluation execution started successfully!\")\n", - "print(f\" Execution Name: {execution.name}\")\n", - "print(f\" Pipeline Execution ARN: {execution.arn}\")\n", - "print(f\" Status: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Monitor Job Progress\n", - "\n", - "Use `refresh()` to update the job status, or `wait()` to block until completion." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n",
-       "overall_status='Executing',\n",
-       "step_details=[\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateCustomModel',\n",
-       "│   │   │   status='Executing',\n",
-       "│   │   │   start_time='2025-11-29T13:42:45.523000-08:00',\n",
-       "│   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120ab8f80>',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-29T13:42:45.523000-08:00',\n",
-       "│   │   │   end_time='2025-11-29T13:42:48.017000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   )\n",
-       "],\n",
-       "failure_reason=None\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:45.523000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120ab8f80\u001b[0m\u001b[1;38;2;0;135;0m>\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:45.523000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:48.017000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Check current status\n", - "execution.refresh()\n", - "print(f\"Current Status: {execution.status.overall_status}\")\n", - "\n", - "pprint(execution.status)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Wait for Completion\n", - "\n", - "Block execution until the job completes. This provides a rich visual experience in Jupyter notebooks." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n",
-       "  Overall Status        Succeeded                                                                                \n",
-       "  Target Status         Succeeded                                                                                \n",
-       "  Elapsed Time          0.9s                                                                                     \n",
-       "                                                                                                                 \n",
-       " Pipeline Steps                                                                                                  \n",
-       "  Step Name                       Status           Duration                                                      \n",
-       "  AssociateLineage                Succeeded        1.9s                                                          \n",
-       "  EvaluateCustomModel             Succeeded        7462.5s                                                       \n",
-       "  CreateEvaluationAction          Succeeded        2.5s                                                          \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m0.9s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1.9s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m7462.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m2.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:36] INFO     Final Resource Status: Succeeded                                      execution.py:979\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=693225;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=873243;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Final Status: Succeeded\n" - ] - } - ], - "source": [ - "# Wait for job to complete (with rich visual feedback)\n", - "execution.wait(poll=30, timeout=3600)\n", - "\n", - "print(f\"\\nFinal Status: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 16:21:42] INFO     S3 bucket: mufi-test-serverless-smtj, prefix: eval           show_results_utils.py:130\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:42]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=425698;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=639097;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#130\u001b\\\u001b[2m130\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted training job name:                                  show_results_utils.py:63\n",
-       "                             pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf from                            \n",
-       "                             step: EvaluateCustomModel                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=993672;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=652226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for results_*.json in                              show_results_utils.py:150\n",
-       "                             s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-E                          \n",
-       "                             valuateCustomModel-FNSg2Knqlf/output/output/                                          \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=724854;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=324888;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModel-FNSg2Knqlf/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found results file:                                          show_results_utils.py:168\n",
-       "                             eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/o                          \n",
-       "                             utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct-                          \n",
-       "                             -or8pa/eval_results/results_2025-11-29T23-46-45.108093+00-00                          \n",
-       "                             .json                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=770358;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=338226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -or8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T23-\u001b[1;36m46\u001b[0m-\u001b[1;36m45.108093\u001b[0m+\u001b[1;36m00-00\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.j\u001b[0mson \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:43] INFO     Using metrics from key: 'custom|gen_qa_gen_qa|0' (gen_qa or  show_results_utils.py:100\n",
-       "                             custom_scorer format)                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:43]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from key: \u001b[38;2;0;135;0m'custom|gen_qa_gen_qa|0'\u001b[0m \u001b[1m(\u001b[0mgen_qa or \u001b]8;id=904034;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=137242;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#100\u001b\\\u001b[2m100\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m custom_scorer format\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                Custom Model Results                \n",
-       "╭────────────────────────────────┬─────────────────╮\n",
-       "│ Metric                                    Value │\n",
-       "├────────────────────────────────┼─────────────────┤\n",
-       "│ bleu                                     6.6928 │\n",
-       "│ bleu_stderr                              0.7769 │\n",
-       "│ byoc_failure_count                    3572.0000 │\n",
-       "│ em                                        1.26% │\n",
-       "│ em_stderr                                0.0019 │\n",
-       "│ f1                                       19.13% │\n",
-       "│ f1_score_quasi                           25.29% │\n",
-       "│ f1_score_quasi_stderr                    0.0049 │\n",
-       "│ f1_stderr                                0.0047 │\n",
-       "│ qem                                       2.21% │\n",
-       "│ qem_stderr                               0.0025 │\n",
-       "│ rouge1                                   25.73% │\n",
-       "│ rouge1_stderr                            0.0047 │\n",
-       "│ rouge2                                   19.15% │\n",
-       "│ rouge2_stderr                            0.0047 │\n",
-       "│ rougeL                                   25.04% │\n",
-       "│ rougeL_stderr                            0.0047 │\n",
-       "╰────────────────────────────────┴─────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;32mCustom Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;32m \u001b[0m\u001b[1;32mMetric \u001b[0m\u001b[1;32m \u001b[0m│\u001b[1;32m \u001b[0m\u001b[1;32m Value\u001b[0m\u001b[1;32m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7769\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbyoc_failure_count \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 3572.0000\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.26%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0019\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.13%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.29%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.21%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0025\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.73%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.15%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "  📦 Full evaluation artifacts available at:                                                                     \n",
-       "                                                                                                                 \n",
-       "  Custom Model:                                                                                                  \n",
-       "    s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/output/output/Non  \n",
-       "  e/eval_results/                                                                                                \n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;32mCustom Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/output/output/Non\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36me/eval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# show results\n", - "execution.show_results()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Retrieve Existing Job\n", - "\n", - "You can retrieve a previously started evaluation job using its ARN." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker.modules.evaluate.execution - Extracted s3_output_path from training job pipelines-amlk8q2ukw8x-EvaluateCustomModel-VElzvyVY19: s3://mufi-test-serverless-smtj/eval/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Retrieved job: amlk8q2ukw8x\n", - "Status: Succeeded\n" - ] - } - ], - "source": [ - "from sagemaker.train.evaluate import EvaluationPipelineExecution\n", - "\n", - "# Get existing job by ARN\n", - "existing_arn = execution.arn # Or use a specific ARN\n", - "\n", - "existing_exec = EvaluationPipelineExecution.get(arn=existing_arn)\n", - "\n", - "print(f\"Retrieved job: {existing_exec.name}\")\n", - "print(f\"Status: {existing_exec.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## List All Custom Scorer Evaluations\n", - "\n", - "Retrieve all custom scorer evaluation executions." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 0 custom scorer evaluation(s):\n", - "\n" - ] - } - ], - "source": [ - "# Get all custom scorer evaluations\n", - "all_executions = list(CustomScorerEvaluator.get_all())\n", - "\n", - "print(f\"Found {len(all_executions)} custom scorer evaluation(s):\\n\")\n", - "for execution in all_executions:\n", - " print(f\" - {execution.name} - {execution.arn}: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Stop a Running Job (Optional)\n", - "\n", - "You can stop a running evaluation if needed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Uncomment to stop the job\n", - "# execution.stop()\n", - "# print(f\"Execution stopped. Status: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "This notebook demonstrated:\n", - "1. ✅ Creating a CustomScorerEvaluator with a custom evaluator ARN\n", - "2. ✅ Starting an evaluation job\n", - "3. ✅ Monitoring job progress with refresh() and wait()\n", - "4. ✅ Retrieving existing jobs\n", - "5. ✅ Listing all custom scorer evaluations\n", - "\n", - "### Key Points:\n", - "- The `evaluator` parameter accepts:\n", - " - Custom evaluator ARN (for AI Registry evaluators)\n", - " - Built-in metric names (\"code_executions\", \"math_answers\", \"exact_match\")\n", - " - Evaluator objects from sagemaker.ai_registry.evaluator.Evaluator\n", - "- Set `evaluate_base_model=False` to only evaluate the custom model\n", - "- Use `execution.wait()` for automatic monitoring with rich visual feedback\n", - "- Use `execution.refresh()` for manual status updates\n", - "- The SageMaker session is automatically inferred from your environment" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.12" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb b/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb deleted file mode 100644 index 8ba50c3ae7..0000000000 --- a/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb +++ /dev/null @@ -1,2472 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# SageMaker LLM-as-Judge Evaluation - Basic Usage\n", - "\n", - "This notebook demonstrates the basic user-facing flow for creating and managing LLM-as-Judge evaluation jobs using the LLMAsJudgeEvaluator." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Configuration" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Configuration\n", - "REGION = 'us-west-2'\n", - "S3_BUCKET = 's3://mufi-test-serverless-smtj/eval/'\n", - "# DATASET = 'arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/DataSet/gen-qa-test-content/1.0.1' # Dataset ARN or S3 URI\n", - "DATASET = \"s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-dataset/gen_qa.jsonl\"\n", - "MLFLOW_ARN = 'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 1: Import Required Libraries\n", - "\n", - "Import the LLMAsJudgeEvaluator class." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from sagemaker.train.evaluate import LLMAsJudgeEvaluator\n", - "from rich.pretty import pprint\n", - "\n", - "# Configure logging to show INFO messages\n", - "import logging\n", - "logging.basicConfig(\n", - " level=logging.INFO,\n", - " format='%(levelname)s - %(name)s - %(message)s'\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 2: Create LLMAsJudgeEvaluator\n", - "\n", - "Create an LLMAsJudgeEvaluator instance with the desired evaluator model, dataset, and metrics.\n", - "\n", - "### Key Parameters:\n", - "- `model`: Model Package (or Base Model) to be evaluated (required)\n", - "- `evaluator_model`: Bedrock model ID to use as judge (required)\n", - "- `dataset`: S3 URI or Dataset ARN (required)\n", - "- `builtin_metrics`: List of built-in metrics (optional, no 'Builtin.' prefix needed)\n", - "- `custom_metrics`: JSON string of custom metrics (optional)\n", - "- `evaluate_base_model`: Whether to evaluate base model in addition to custom model (optional, default=True)\n", - "- `mlflow_resource_arn`: MLflow tracking server ARN (optional)\n", - "- `model_package_group`: Model package group ARN (optional)\n", - "- `s3_output_path`: S3 output location (required)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### A. Using custom metrics (as JSON string)\n", - "\n", - "Custom metrics must be provided as a properly escaped JSON string. You can either:\n", - "1. Create a Python dict and use `json.dumps()` to convert it\n", - "2. Provide a pre-escaped JSON string directly" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Method 1: Create dict and convert to JSON string\n", - "custom_metric_dict = {\n", - " \"customMetricDefinition\": {\n", - " \"name\": \"PositiveSentiment\",\n", - " \"instructions\": (\n", - " \"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. \"\n", - " \"Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\n\\n\"\n", - " \"Consider the following:\\n\"\n", - " \"- Does the response have a positive, encouraging tone?\\n\"\n", - " \"- Is the response helpful and constructive?\\n\"\n", - " \"- Does it avoid negative language or criticism?\\n\\n\"\n", - " \"Rate on this scale:\\n\"\n", - " \"- Good: Response has positive sentiment\\n\"\n", - " \"- Poor: Response lacks positive sentiment\\n\\n\"\n", - " \"Here is the actual task:\\n\"\n", - " \"Prompt: {{prompt}}\\n\"\n", - " \"Response: {{prediction}}\"\n", - " ),\n", - " \"ratingScale\": [\n", - " {\"definition\": \"Good\", \"value\": {\"floatValue\": 1}},\n", - " {\"definition\": \"Poor\", \"value\": {\"floatValue\": 0}}\n", - " ]\n", - " }\n", - "}\n", - "\n", - "# Convert to JSON string\n", - "custom_metrics_json = json.dumps([custom_metric_dict]) # Note: wrap in list" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:43:52] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:43:52]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=406523;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=534480;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved MLflow resource ARN:                                    base_evaluator.py:113\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/                      \n",
-       "                             mmlu-eval-experiment                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=360312;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=805617;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
LLMAsJudgeEvaluator(\n",
-       "region=None,\n",
-       "sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x15f5c11c0>,\n",
-       "model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n",
-       "base_eval_name='eval-meta-04295d90',\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n",
-       "mlflow_experiment_name=None,\n",
-       "mlflow_run_name=None,\n",
-       "networking=None,\n",
-       "kms_key_id=None,\n",
-       "model_package_group=None,\n",
-       "evaluator_model='anthropic.claude-3-5-haiku-20241022-v1:0',\n",
-       "dataset='s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-dataset/gen_qa.jsonl',\n",
-       "builtin_metrics=['Completeness', 'Faithfulness'],\n",
-       "custom_metrics='[{\"customMetricDefinition\": {\"name\": \"PositiveSentiment\", \"instructions\": \"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: {{prompt}}\\\\nResponse: {{prediction}}\", \"ratingScale\": [{\"definition\": \"Good\", \"value\": {\"floatValue\": 1}}, {\"definition\": \"Poor\", \"value\": {\"floatValue\": 0}}]}}]',\n",
-       "evaluate_base_model=False\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mLLMAsJudgeEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x15f5c11c0\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-04295d90'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluator_model\u001b[0m=\u001b[38;2;0;135;0m'anthropic.claude-3-5-haiku-20241022-v1:0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-dataset/gen_qa.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbuiltin_metrics\u001b[0m=\u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'Completeness'\u001b[0m, \u001b[38;2;0;135;0m'Faithfulness'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mcustom_metrics\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"customMetricDefinition\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"name\": \"PositiveSentiment\", \"instructions\": \"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0mprompt\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m\\\\nResponse: \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0mprediction\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m\", \"ratingScale\": \u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"definition\": \"Good\", \"value\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"floatValue\": 1\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m, \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"definition\": \"Poor\", \"value\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"floatValue\": 0\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;215;0;0mFalse\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "\n", - "# Create evaluator with custom metrics\n", - "evaluator = LLMAsJudgeEvaluator(\n", - " # base_model='arn:aws:sagemaker:us-west-2:052150106756:model-package/Demo-test-deb-2/1', # Required\n", - " model=\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\",\n", - " evaluator_model=\"anthropic.claude-3-5-haiku-20241022-v1:0\", # Required\n", - " dataset=DATASET, # Required: S3 URI or Dataset ARN\n", - " builtin_metrics=[\"Completeness\", \"Faithfulness\"], # Optional: Can combine with custom metrics\n", - " custom_metrics=custom_metrics_json, # Optional: JSON string of custom metrics\n", - " mlflow_resource_arn=MLFLOW_ARN, # Optional\n", - " # model_package_group=MODEL_PACKAGE_GROUP, # Optional if BASE_MODEL is a Model Package ARN/Object\n", - " s3_output_path=S3_BUCKET, # Required\n", - " evaluate_base_model=False\n", - ")\n", - "\n", - "pprint(evaluator)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### [Optional] Example with multiple custom metrics" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "# # Create multiple custom metrics\n", - "# custom_metrics_list = [\n", - "# {\n", - "# \"customMetricDefinition\": {\n", - "# \"name\": \"GoodMetric\",\n", - "# \"instructions\": (\n", - "# \"Assess if the response has positive sentiment. \"\n", - "# \"Prompt: {{prompt}}\\nResponse: {{prediction}}\"\n", - "# ),\n", - "# \"ratingScale\": [\n", - "# {\"definition\": \"Good\", \"value\": {\"floatValue\": 1}},\n", - "# {\"definition\": \"Poor\", \"value\": {\"floatValue\": 0}}\n", - "# ]\n", - "# }\n", - "# },\n", - "# {\n", - "# \"customMetricDefinition\": {\n", - "# \"name\": \"BadMetric\",\n", - "# \"instructions\": (\n", - "# \"Assess if the response has negative sentiment. \"\n", - "# \"Prompt: {{prompt}}\\nResponse: {{prediction}}\"\n", - "# ),\n", - "# \"ratingScale\": [\n", - "# {\"definition\": \"Bad\", \"value\": {\"floatValue\": 1}},\n", - "# {\"definition\": \"Good\", \"value\": {\"floatValue\": 0}}\n", - "# ]\n", - "# }\n", - "# }\n", - "# ]\n", - "\n", - "# # Convert list to JSON string\n", - "# custom_metrics_json = json.dumps(custom_metrics_list)\n", - "\n", - "# # Create evaluator\n", - "# evaluator = LLMAsJudgeEvaluator(\n", - "# base_model=BASE_MODEL,\n", - "# evaluator_model=\"anthropic.claude-3-5-haiku-20241022-v1:0\",\n", - "# dataset=DATASET,\n", - "# custom_metrics=custom_metrics_json, # Multiple custom metrics\n", - "# s3_output_path=S3_BUCKET,\n", - "# )\n", - "\n", - "# print(f\"✅ Created evaluator with {len(json.loads(custom_metrics_json))} custom metrics\")\n", - "# pprint(evaluator)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### [Optional] Skipping base model evaluation (evaluate custom model only)\n", - "\n", - "By default, LLM-as-Judge evaluates both the base model and custom model. You can skip base model evaluation to save time and cost by setting `evaluate_base_model=False`." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# # Define custom metrics (same as test script)\n", - "# custom_metrics = \"[{\\\"customMetricDefinition\\\":{\\\"name\\\":\\\"GoodMetric\\\",\\\"instructions\\\":\\\"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: {{prompt}}\\\\nResponse: {{prediction}}\\\",\\\"ratingScale\\\":[{\\\"definition\\\":\\\"Good\\\",\\\"value\\\":{\\\"floatValue\\\":1}},{\\\"definition\\\":\\\"Poor\\\",\\\"value\\\":{\\\"floatValue\\\":0}}]}},{\\\"customMetricDefinition\\\":{\\\"name\\\":\\\"BadMetric\\\",\\\"instructions\\\":\\\"You are an expert evaluator. Your task is to assess if the sentiment of the response is negative. Rate the response based on whether it conveys negative sentiment, unhelpfulness, or destructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a negative, discouraging tone?\\\\n- Is the response unhelpful or destructive?\\\\n- Does it use negative language or harsh criticism?\\\\n\\\\nRate on this scale:\\\\n- Bad: Response has negative sentiment\\\\n- Good: Response lacks negative sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: {{prompt}}\\\\nResponse: {{prediction}}\\\",\\\"ratingScale\\\":[{\\\"definition\\\":\\\"Bad\\\",\\\"value\\\":{\\\"floatValue\\\":1}},{\\\"definition\\\":\\\"Good\\\",\\\"value\\\":{\\\"floatValue\\\":0}}]}}]\"\n", - "\n", - "# # Create evaluator that only evaluates the custom model (matching test script exactly)\n", - "# evaluator = LLMAsJudgeEvaluator(\n", - "# base_model=BASE_MODEL,\n", - "# evaluator_model=\"anthropic.claude-3-5-haiku-20241022-v1:0\",\n", - "# dataset=DATASET,\n", - "# builtin_metrics=[\"Completeness\", \"Faithfulness\", \"Helpfulness\"],\n", - "# custom_metrics=custom_metrics,\n", - "# mlflow_resource_arn=MLFLOW_ARN,\n", - "# model_package_group=MODEL_PACKAGE_GROUP,\n", - "# model_artifact=MODEL_ARTIFACT,\n", - "# s3_output_path=S3_BUCKET,\n", - "# evaluate_base_model=False, # KEY: Skip base model evaluation\n", - "# )\n", - "\n", - "# print(\"✅ Created evaluator (custom model only)\")\n", - "# pprint(evaluator)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 3: Run LLM-as-Judge Evaluation\n", - "\n", - "Start the evaluation job. The evaluator will:\n", - "1. Generate inference responses from the base model (if evaluate_base_model=True)\n", - "2. Generate inference responses from the custom model\n", - "3. Use the judge model to evaluate responses with built-in and custom metrics" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 16:22:01] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=931878;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=760856;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Getting or creating artifact for source:                         base_evaluator.py:597\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=179503;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=71430;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for existing artifact for model package:               base_evaluator.py:459\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=2444;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=787547;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found existing artifact:                                         base_evaluator.py:468\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3                      \n",
-       "                             138877d772ec489bef                                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=808361;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=665812;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Inferred model package group ARN:                                base_evaluator.py:386\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes                      \n",
-       "                             t-finetuned-models-gamma from                                                         \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Inferred model package group ARN: \u001b]8;id=361400;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=518747;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#386\u001b\\\u001b[2m386\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Automatically inferred model_package_group:                      base_evaluator.py:421\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes                      \n",
-       "                             t-finetuned-models-gamma                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Automatically inferred model_package_group: \u001b]8;id=299761;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=867866;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using ModelPackage - model_package_group_arn:            llm_as_judge_evaluator.py:319\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-g                              \n",
-       "                             roup/test-finetuned-models-gamma                                                      \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=538256;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=292230;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#319\u001b\\\u001b[2m319\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-g \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m roup/test-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved model info - base_model_name:                   llm_as_judge_evaluator.py:322\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct,                                            \n",
-       "                             base_model_arn:                                                                       \n",
-       "                             arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPub                              \n",
-       "                             licHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1                              \n",
-       "                             .10.0, source_model_package_arn:                                                      \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/t                              \n",
-       "                             est-finetuned-models-gamma/28                                                         \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=854970;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=553794;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#322\u001b\\\u001b[2m322\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPub \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m licHub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct/\u001b[1;36m1\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.10\u001b[0m.\u001b[1;36m0\u001b[0m, source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/t \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m est-finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Uploading custom metrics to S3:                          llm_as_judge_evaluator.py:220\n",
-       "                             s3://mufi-test-serverless-smtj/eval/evaluationinputs/eva                              \n",
-       "                             l-meta-04295d9020251130-002201/custom-metrics.json                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Uploading custom metrics to S3: \u001b]8;id=657021;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=5404;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#220\u001b\\\u001b[2m220\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/evaluationinputs/eva\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225ml-meta-04295d9020251130-002201/\u001b[0m\u001b[38;2;225;0;225mcustom-metrics.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Successfully uploaded custom metrics to:                 llm_as_judge_evaluator.py:228\n",
-       "                             s3://mufi-test-serverless-smtj/eval/evaluationinputs/eva                              \n",
-       "                             l-meta-04295d9020251130-002201/custom-metrics.json                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully uploaded custom metrics to: \u001b]8;id=718083;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=581773;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#228\u001b\\\u001b[2m228\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/evaluationinputs/eva\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225ml-meta-04295d9020251130-002201/\u001b[0m\u001b[38;2;225;0;225mcustom-metrics.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using full template for ModelPackage                             base_evaluator.py:655\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=143249;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=489338;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved template parameters: {'role_arn':                       base_evaluator.py:693\n",
-       "                             'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn':                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment', 'mlflow_experiment_name': None,                               \n",
-       "                             'mlflow_run_name': None, 'model_package_group_arn':                                   \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma', 'source_model_package_arn':                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28', 'base_model_arn':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0',                              \n",
-       "                             's3_output_path': 's3://mufi-test-serverless-smtj/eval',                              \n",
-       "                             'dataset_artifact_arn':                                                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef', 'action_arn_prefix':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:action',                                    \n",
-       "                             'dataset_uri':                                                                        \n",
-       "                             's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas                      \n",
-       "                             et/gen_qa.jsonl', 'judge_model_id':                                                   \n",
-       "                             'anthropic.claude-3-5-haiku-20241022-v1:0', 'llmaj_metrics':                          \n",
-       "                             '[\"Completeness\", \"Faithfulness\"]', 'custom_metrics_s3_path':                         \n",
-       "                             's3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-                      \n",
-       "                             04295d9020251130-002201/custom-metrics.json', 'max_new_tokens':                       \n",
-       "                             '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0',                            \n",
-       "                             'pipeline_name': 'SagemakerModelEvaluationType2-llmaj',                               \n",
-       "                             'evaluate_base_model': False}                                                         \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=109479;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=566018;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0met/gen_qa.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'judge_model_id'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'anthropic.claude-3-5-haiku-20241022-v1:0'\u001b[0m, \u001b[38;2;0;135;0m'llmaj_metrics'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[38;2;0;135;0m\"Completeness\", \"Faithfulness\"\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m, \u001b[38;2;0;135;0m'custom_metrics_s3_path'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m04295d9020251130-002201/custom-metrics.json'\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[38;2;0;135;0m'SagemakerModelEvaluationType2-llmaj'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Rendered pipeline definition:                                    base_evaluator.py:702\n",
-       "                             {                                                                                     \n",
-       "                               \"Version\": \"2020-12-01\",                                                            \n",
-       "                               \"Metadata\": {},                                                                     \n",
-       "                               \"MlflowConfig\": {                                                                   \n",
-       "                                 \"MlflowResourceArn\":                                                              \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment\"                                                                \n",
-       "                               },                                                                                  \n",
-       "                               \"Parameters\": [],                                                                   \n",
-       "                               \"Steps\": [                                                                          \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"CreateEvaluationAction\",                                               \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Actions\": [                                                                  \n",
-       "                                       {                                                                           \n",
-       "                                         \"ActionName\": {                                                           \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ActionType\": \"Evaluation\",                                               \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\":                                                            \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\",                                                              \n",
-       "                                           \"SourceType\": \"ModelPackage\"                                            \n",
-       "                                         },                                                                        \n",
-       "                                         \"Properties\": {                                                           \n",
-       "                                           \"PipelineExecutionArn\": {                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           },                                                                      \n",
-       "                                           \"PipelineName\":                                                         \n",
-       "                             \"SagemakerModelEvaluationType2-llmaj\"                                                 \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Contexts\": [                                                                 \n",
-       "                                       {                                                                           \n",
-       "                                         \"ContextName\": {                                                          \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ContextType\": \"PipelineExecution\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Action\"                                                        \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Context\"                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Arn\":                                                                  \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef\"                                                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateCustomInferenceModel\",                                         \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"TrainingJobName\": \"CustomInference\",                                         \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"BenchmarkEvaluation\"                                     \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"name\": \"CustomInference\",                                                  \n",
-       "                                       \"task\": \"inference_only\"                                                    \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\": \"s3://mufi-test-serverless-smtj/eval\",                      \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma\",                                                           \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     },                                                                            \n",
-       "                                     \"InputDataConfig\": [                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ChannelName\": \"train\",                                                   \n",
-       "                                         \"DataSource\": {                                                           \n",
-       "                                           \"S3DataSource\": {                                                       \n",
-       "                                             \"S3DataType\": \"S3Prefix\",                                             \n",
-       "                                             \"S3Uri\":                                                              \n",
-       "                             \"s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas                      \n",
-       "                             et/gen_qa.jsonl\"                                                                      \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   },                                                                              \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"CreateEvaluationAction\"                                                      \n",
-       "                                   ]                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateCustomModelMetrics\",                                           \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"EvaluateCustomInferenceModel\"                                                \n",
-       "                                   ],                                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"TrainingJobName\": {                                                          \n",
-       "                                       \"Std:Join\": {                                                               \n",
-       "                                         \"On\": \"-\",                                                                \n",
-       "                                         \"Values\": [                                                               \n",
-       "                                           \"custom-llmaj-eval\",                                                    \n",
-       "                                           {                                                                       \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           }                                                                       \n",
-       "                                         ]                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     },                                                                            \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"LLMAJEvaluation\"                                         \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"name\": {                                                                   \n",
-       "                                         \"Std:Join\": {                                                             \n",
-       "                                           \"On\": \"-\",                                                              \n",
-       "                                           \"Values\": [                                                             \n",
-       "                                             \"custom-llmaj-eval\",                                                  \n",
-       "                                             {                                                                     \n",
-       "                                               \"Get\": \"Execution.PipelineExecutionId\"                              \n",
-       "                                             }                                                                     \n",
-       "                                           ]                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       },                                                                          \n",
-       "                                       \"judge_model_id\":                                                           \n",
-       "                             \"anthropic.claude-3-5-haiku-20241022-v1:0\",                                           \n",
-       "                                       \"inference_data_s3_path\": {                                                 \n",
-       "                                         \"Std:Join\": {                                                             \n",
-       "                                           \"On\": \"\",                                                               \n",
-       "                                           \"Values\": [                                                             \n",
-       "                                             {                                                                     \n",
-       "                                               \"Get\":                                                              \n",
-       "                             \"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat                      \n",
-       "                             h\"                                                                                    \n",
-       "                                             },                                                                    \n",
-       "                                             \"/\",                                                                  \n",
-       "                                             {                                                                     \n",
-       "                                               \"Get\":                                                              \n",
-       "                             \"Steps.EvaluateCustomInferenceModel.TrainingJobName\"                                  \n",
-       "                                             },                                                                    \n",
-       "                                             \"/output/output/\",                                                    \n",
-       "                                             \"CustomInference\",                                                    \n",
-       "                                             \"/eval_results/inference_output.jsonl\"                                \n",
-       "                                           ]                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       },                                                                          \n",
-       "                                       \"output_path\": \"s3://mufi-test-serverless-smtj/eval\",                       \n",
-       "                                       \"llmaj_metrics\": \"[\\\"Completeness\\\",                                        \n",
-       "                             \\\"Faithfulness\\\"]\",                                                                   \n",
-       "                                       \"custom_metrics_s3_path\":                                                   \n",
-       "                             \"s3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-                      \n",
-       "                             04295d9020251130-002201/custom-metrics.json\",                                         \n",
-       "                                       \"max_new_tokens\": \"8192\",                                                   \n",
-       "                                       \"temperature\": \"0\",                                                         \n",
-       "                                       \"top_k\": \"-1\",                                                              \n",
-       "                                       \"top_p\": \"1.0\"                                                              \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\": \"s3://mufi-test-serverless-smtj/eval\",                      \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma\",                                                           \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     }                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"AssociateLineage\",                                                     \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"CreateEvaluationAction\"                                                      \n",
-       "                                   ],                                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Artifacts\": [                                                                \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"custom-inference-results\"                                          \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"InferenceResults\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat                      \n",
-       "                             h\"                                                                                    \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"custom-eval-report\"                                                \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"EvaluationReport\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateCustomModelMetrics.OutputDataConfig.S3OutputPath\"                      \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"custom-inference-results\"                                        \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"custom-eval-report\"                                              \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 }                                                                                 \n",
-       "                               ]                                                                                   \n",
-       "                             }                                                                                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=358999;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=565177;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerModelEvaluationType2-llmaj\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomInferenceModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"TrainingJobName\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"name\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"inference_only\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0met/gen_qa.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModelMetrics\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluateCustomInferenceModel\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"TrainingJobName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-llmaj-eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"LLMAJEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-llmaj-eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"judge_model_id\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"anthropic.claude-3-5-haiku-20241022-v1:0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"inference_data_s3_path\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mh\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.TrainingJobName\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/output/output/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/eval_results/inference_output.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"output_path\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"llmaj_metrics\"\u001b[0m: \u001b[38;2;0;135;0m\"\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[38;2;0;135;0m\\\"Completeness\\\", \u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\\\"Faithfulness\\\"\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom_metrics_s3_path\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m04295d9020251130-002201/custom-metrics.json\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-inference-results\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"InferenceResults\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mh\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModelMetrics.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-inference-results\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:22:02] INFO     Found existing pipeline:                                              execution.py:199\n",
-       "                             SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c                 \n",
-       "                             6e9                                                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:02]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing pipeline: \u001b]8;id=729179;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=511166;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Updating pipeline                                                     execution.py:202\n",
-       "                             SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c                 \n",
-       "                             6e9 with latest definition                                                            \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline \u001b]8;id=567297;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=249002;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#202\u001b\\\u001b[2m202\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m with latest definition \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Updating pipeline resource.                                         resources.py:30306\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=897054;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=497721;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30306\u001b\\\u001b[2m30306\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:22:03] INFO     Successfully updated pipeline:                                        execution.py:208\n",
-       "                             SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c                 \n",
-       "                             6e9                                                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:03]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully updated pipeline: \u001b]8;id=916795;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=385336;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#208\u001b\\\u001b[2m208\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Starting pipeline execution: eval-meta-04295d90-1764462123            execution.py:263\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: eval-meta-04295d90-\u001b[1;36m1764462123\u001b[0m \u001b]8;id=41189;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=464412;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Pipeline execution started:                                           execution.py:274\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318n                 \n",
-       "                             ngjk32f                                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=227887;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844359;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c6e9\u001b[0m/execution/m318n \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m ngjk32f \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Evaluation job started!\n", - "Job ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f\n", - "Job Name: eval-meta-04295d90\n", - "Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
LLMAJEvaluationExecution(\n",
-       "arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f',\n",
-       "name='eval-meta-04295d90',\n",
-       "status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n",
-       "last_modified_time=datetime.datetime(2025, 11, 29, 16, 22, 3, 689000, tzinfo=tzlocal()),\n",
-       "eval_type=<EvalType.LLM_AS_JUDGE: 'llmasjudge'>,\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "steps=[]\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mLLMAJEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-04295d90'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m29\u001b[0m, \u001b[1;36m16\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m689000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.LLM_AS_JUDGE:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'llmasjudge'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Run evaluation\n", - "execution = evaluator.evaluate()\n", - "\n", - "print(f\"✅ Evaluation job started!\")\n", - "print(f\"Job ARN: {execution.arn}\")\n", - "print(f\"Job Name: {execution.name}\")\n", - "print(f\"Status: {execution.status.overall_status}\")\n", - "\n", - "pprint(execution)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 4: Check Job Status\n", - "\n", - "Refresh and display the current job status with step details." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n",
-       "overall_status='Executing',\n",
-       "step_details=[\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   status='Starting',\n",
-       "│   │   │   start_time='2025-11-29T16:22:04.148000-08:00',\n",
-       "│   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x1298e7170>',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   )\n",
-       "],\n",
-       "failure_reason=None\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Starting'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T16:22:04.148000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x1298e7170\u001b[0m\u001b[1;38;2;0;135;0m>\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Refresh status\n", - "execution.refresh()\n", - "\n", - "# Display job status using rich pprint\n", - "pprint(execution.status)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 5: Monitor Pipeline Execution\n", - "\n", - "Poll the pipeline status until it reaches a terminal state (Succeeded, Failed, or Stopped)." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n",
-       "  Overall Status        Succeeded                                                                                \n",
-       "  Target Status         Succeeded                                                                                \n",
-       "  Elapsed Time          1885.8s                                                                                  \n",
-       "                                                                                                                 \n",
-       " Pipeline Steps                                                                                                  \n",
-       "  Step Name                       Status           Duration                                                      \n",
-       "  AssociateLineage                Succeeded        1.9s                                                          \n",
-       "  EvaluateCustomModelMetrics      Succeeded        1327.1s                                                       \n",
-       "  EvaluateCustomInferenceModel    Succeeded        554.1s                                                        \n",
-       "  CreateEvaluationAction          Succeeded        4.5s                                                          \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m1885.8s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1.9s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModelMetrics \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1327.1s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomInferenceModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m554.1s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m4.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:53:37] INFO     Final Resource Status: Succeeded                                      execution.py:979\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:53:37]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=524139;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=278480;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Wait for job completion (optional)\n", - "# This will poll every 5 seconds for up to 1 hour\n", - "execution.wait(poll=5, timeout=3600)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:07] INFO     Extracted training job name:                                  show_results_utils.py:52\n",
-       "                             pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from                           \n",
-       "                             step: EvaluateCustomModelMetrics (priority: Custom)                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:07]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=177834;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=168478;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#52\u001b\\\u001b[2m52\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModelMetrics \u001b[1m(\u001b[0mpriority: Custom\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "  📦 Full evaluation artifacts available at:                                                                     \n",
-       "    s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955/                  \n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     S3 bucket: mufi-test-serverless-smtj, prefix: eval           show_results_utils.py:341\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=453165;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=425984;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#341\u001b\\\u001b[2m341\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted training job name:                                  show_results_utils.py:52\n",
-       "                             pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from                           \n",
-       "                             step: EvaluateCustomModelMetrics (priority: Custom)                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=324161;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=683512;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#52\u001b\\\u001b[2m52\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModelMetrics \u001b[1m(\u001b[0mpriority: Custom\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for bedrock summary in                             show_results_utils.py:361\n",
-       "                             s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-E                          \n",
-       "                             valuateCustomModelM-lN73ONZ955/output/output/                                         \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for bedrock summary in \u001b]8;id=308182;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=660550;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#361\u001b\\\u001b[2m361\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModelM-lN73ONZ955/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found bedrock job name: custom-llmaj-eval-m318nngjk32f       show_results_utils.py:377\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found bedrock job name: custom-llmaj-eval-m318nngjk32f \u001b]8;id=705765;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=855376;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#377\u001b\\\u001b[2m377\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for JSONL in                                       show_results_utils.py:387\n",
-       "                             s3://mufi-test-serverless-smtj/eval/custom-llmaj-eval-m318nn                          \n",
-       "                             gjk32f/                                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for JSONL in \u001b]8;id=236968;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=874421;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#387\u001b\\\u001b[2m387\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/custom-llmaj-eval-m318nn\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mgjk32f/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found JSONL:                                                 show_results_utils.py:405\n",
-       "                             eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode                          \n",
-       "                             l/taskTypes/General/datasets/CustomDataset/4a22339b-b5b1-421                          \n",
-       "                             4-9c1e-0c0bf2c71fd6_output.jsonl                                                      \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found JSONL: \u001b]8;id=648967;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=247115;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#405\u001b\\\u001b[2m405\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m l/taskTypes/General/datasets/CustomDataset/\u001b[93m4a22339b-b5b1-421\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m4-9c1e-0c0bf2c71fd6\u001b[0m_output.jsonl \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found results file:                                          show_results_utils.py:413\n",
-       "                             eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode                          \n",
-       "                             l/taskTypes/General/datasets/CustomDataset/4a22339b-b5b1-421                          \n",
-       "                             4-9c1e-0c0bf2c71fd6_output.jsonl                                                      \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=234223;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=249361;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#413\u001b\\\u001b[2m413\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m l/taskTypes/General/datasets/CustomDataset/\u001b[93m4a22339b-b5b1-421\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m4-9c1e-0c0bf2c71fd6\u001b[0m_output.jsonl \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Loaded 3 evaluation results                                  show_results_utils.py:429\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Loaded \u001b[1;36m3\u001b[0m evaluation results \u001b]8;id=139737;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=460642;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#429\u001b\\\u001b[2m429\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "═══ Evaluation 1 of 3 ═══\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[1;36m═══ Evaluation 1 of 3 ═══\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Prompt: What is the next number in this series? 1, 2, 4, 8, 16, ?\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mPrompt:\u001b[0m What is the next number in this series? \u001b[1;36m1\u001b[0m, \u001b[1;36m2\u001b[0m, \u001b[1;36m4\u001b[0m, \u001b[1;36m8\u001b[0m, \u001b[1;36m16\u001b[0m, ?\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Model Response: The next number in the series is 32.\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mModel Response:\u001b[0m The next number in the series is \u001b[1;36m32\u001b[0m.\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                                               \n",
-       "  Metric                                Score  \n",
-       " ───────────────────────────────────────────── \n",
-       "  Builtin.Completeness                 100.0%  \n",
-       "  Builtin.Faithfulness                 100.0%  \n",
-       "                                               \n",
-       "
\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "═══ Evaluation 2 of 3 ═══\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[1;36m═══ Evaluation 2 of 3 ═══\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Prompt: What is the symbol that ends the sentence as a question\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mPrompt:\u001b[0m What is the symbol that ends the sentence as a question\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Model Response: The symbol that ends the sentence as a question is: ?\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mModel Response:\u001b[0m The symbol that ends the sentence as a question is: ?\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                                               \n",
-       "  Metric                                Score  \n",
-       " ───────────────────────────────────────────── \n",
-       "  Builtin.Completeness                 100.0%  \n",
-       "  Builtin.Faithfulness                 100.0%  \n",
-       "                                               \n",
-       "
\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "═══ Evaluation 3 of 3 ═══\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[1;36m═══ Evaluation 3 of 3 ═══\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Prompt: Repeat only the last two words of the following: I ate a hamburger today and it was kind of dry\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mPrompt:\u001b[0m Repeat only the last two words of the following: I ate a hamburger today and it was kind of dry\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Model Response: I ate a hamburger today and it was kind of dry.\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mModel Response:\u001b[0m I ate a hamburger today and it was kind of dry.\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                                               \n",
-       "  Metric                                Score  \n",
-       " ───────────────────────────────────────────── \n",
-       "  Builtin.Completeness                   0.0%  \n",
-       "  Builtin.Faithfulness                   0.0%  \n",
-       "                                               \n",
-       "
\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 0.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 0.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
══════════════════════════════════════════════════════════════════════\n",
-       "
\n" - ], - "text/plain": [ - "══════════════════════════════════════════════════════════════════════\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Showing evaluations 1-3 of 3\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36mShowing evaluations \u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;36m-\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;36m of \u001b[0m\u001b[1;36m3\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
══════════════════════════════════════════════════════════════════════\n",
-       "
\n" - ], - "text/plain": [ - "══════════════════════════════════════════════════════════════════════\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Display results\n", - "execution.show_results(limit=10, offset=0, show_explanations=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Retrieve an Existing Job\n", - "\n", - "You can retrieve and inspect any existing evaluation job using its ARN." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:15] WARNING  Could not extract eval_type from ARN:                                 execution.py:146\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -llmasjudge/execution/4hr7446yft1d                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:15]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=315627;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=953607;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge/execution/4hr7446yft1d \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-4hr7446yft1d-EvaluateCustomModelM-qePWbkcMxz:                               \n",
-       "                             s3://mufi-test-serverless-smtj/eval                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=739992;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=203397;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-4hr7446yft1d-EvaluateCustomModelM-qePWbkcMxz: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/\u001b[0m\u001b[38;2;225;0;225meval\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  Could not extract eval_type from ARN:                                 execution.py:146\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -llmasjudge                                                                           \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=550335;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=858100;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  Could not extract eval_type from ARN:                                 execution.py:146\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -llmasjudge/execution/4hr7446yft1d                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=379628;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=725705;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge/execution/4hr7446yft1d \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n",
-       "overall_status='Succeeded',\n",
-       "step_details=[\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='AssociateLineage',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-19T15:45:57.889000-08:00',\n",
-       "│   │   │   end_time='2025-11-19T15:45:59.266000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateCustomModelMetrics',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-19T15:27:55.641000-08:00',\n",
-       "│   │   │   end_time='2025-11-19T15:45:56.749000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateCustomInferenceModel',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-19T15:18:07.804000-08:00',\n",
-       "│   │   │   end_time='2025-11-19T15:27:54.474000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-19T15:18:05.550000-08:00',\n",
-       "│   │   │   end_time='2025-11-19T15:18:07.332000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   )\n",
-       "],\n",
-       "failure_reason=None\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'AssociateLineage'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:57.889000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:59.266000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModelMetrics'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:27:55.641000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:56.749000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomInferenceModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:07.804000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:27:54.474000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:05.550000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:07.332000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
-       " in <module>:17                                                                                   \n",
-       "                                                                                                  \n",
-       "   14 )                                                                                           \n",
-       "   15 pprint(existing_execution.status)                                                           \n",
-       "   16                                                                                             \n",
-       " 17 existing_execution.show_results(limit=5, offset=0, show_explanations=False)                 \n",
-       "   18                                                                                             \n",
-       "                                                                                                  \n",
-       " /Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/pydantic/main \n",
-       " .py:1026 in __getattr__                                                                          \n",
-       "                                                                                                  \n",
-       "   1023 │   │   │   │   │   │   return super().__getattribute__(item)  # Raises AttributeError i  \n",
-       "   1024 │   │   │   │   │   else:                                                                 \n",
-       "   1025 │   │   │   │   │   │   # this is the current error                                       \n",
-       " 1026 │   │   │   │   │   │   raise AttributeError(f'{type(self).__name__!r} object has no att  \n",
-       "   1027 │   │                                                                                     \n",
-       "   1028 │   │   def __setattr__(self, name: str, value: Any) -> None:                             \n",
-       "   1029 │   │   │   if (setattr_handler := self.__pydantic_setattr_handlers__.get(name)) is not   \n",
-       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "AttributeError: 'EvaluationPipelineExecution' object has no attribute 'show_results'\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m17\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m14 \u001b[0m) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m15 \u001b[0mpprint(existing_execution.status) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m16 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m17 \u001b[1;4mexisting_execution.show_results\u001b[0m(limit=\u001b[94m5\u001b[0m, offset=\u001b[94m0\u001b[0m, show_explanations=\u001b[94mFalse\u001b[0m) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m18 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2;33m/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/pydantic/\u001b[0m\u001b[1;33mmain\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m1026\u001b[0m in \u001b[92m__getattr__\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1023 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().\u001b[92m__getattribute__\u001b[0m(item) \u001b[2m# Raises AttributeError i\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1024 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1025 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[2m# this is the current error\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m1026 \u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[1;4;94mraise\u001b[0m\u001b[1;4m \u001b[0m\u001b[1;4;96mAttributeError\u001b[0m\u001b[1;4m(\u001b[0m\u001b[1;4;33mf\u001b[0m\u001b[1;4;33m'\u001b[0m\u001b[1;4;33m{\u001b[0m\u001b[1;4;96mtype\u001b[0m\u001b[1;4m(\u001b[0m\u001b[1;4;96mself\u001b[0m\u001b[1;4m).\u001b[0m\u001b[1;4;91m__name__\u001b[0m\u001b[1;4;33m!r}\u001b[0m\u001b[1;4;33m object has no att\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1027 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1028 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mdef\u001b[0m\u001b[90m \u001b[0m\u001b[92m__setattr__\u001b[0m(\u001b[96mself\u001b[0m, name: \u001b[96mstr\u001b[0m, value: Any) -> \u001b[94mNone\u001b[0m: \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1029 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m (setattr_handler := \u001b[96mself\u001b[0m.__pydantic_setattr_handlers__.get(name)) \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", - "\u001b[1;91mAttributeError: \u001b[0m\u001b[38;2;0;135;0m'EvaluationPipelineExecution'\u001b[0m object has no attribute \u001b[38;2;0;135;0m'show_results'\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Get an existing job by ARN\n", - "# Replace with your actual pipeline execution ARN\n", - "existing_arn = 'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-llmasjudge/execution/4hr7446yft1d' # or use a specific ARN\n", - "\n", - "from sagemaker.train.evaluate import EvaluationPipelineExecution\n", - "from rich.pretty import pprint\n", - "\n", - "existing_execution = EvaluationPipelineExecution.get(\n", - " arn=existing_arn,\n", - " region=\"us-west-2\"\n", - ")\n", - "pprint(existing_execution.status)\n", - "\n", - "existing_execution.show_results(limit=5, offset=0, show_explanations=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Get All LLM-as-Judge Evaluations\n", - "\n", - "Retrieve all LLM-as-Judge evaluation jobs." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:21] INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955:                               \n",
-       "                             s3://mufi-test-serverless-smtj/eval                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=802368;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=75226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/\u001b[0m\u001b[38;2;225;0;225meval\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 2 LLM-as-Judge evaluation jobs\n", - " - m318nngjk32f: Succeeded\n", - " - 2m5hczli7vdp: Failed\n" - ] - } - ], - "source": [ - "from sagemaker.train.evaluate import LLMAsJudgeEvaluator\n", - "\n", - "# Get all LLM-as-Judge evaluations as an iterator\n", - "all_executions = list(LLMAsJudgeEvaluator.get_all(region=\"us-west-2\"))\n", - "\n", - "print(f\"Found {len(all_executions)} LLM-as-Judge evaluation jobs\")\n", - "for execution in all_executions:\n", - " print(f\" - {execution.name}: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Stop a Running Job (Optional)\n", - "\n", - "If needed, you can stop a running evaluation job." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Uncomment to stop the job\n", - "# execution.stop()\n", - "# print(f\"Execution stopped. Status: {execution.status.overall_status}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dataset Support\n", - "\n", - "The `dataset` parameter supports two formats:\n", - "\n", - "### 1. S3 URI\n", - "```python\n", - "dataset=\"s3://my-bucket/path/to/dataset.jsonl\"\n", - "```\n", - "\n", - "### 2. Dataset ARN (AI Registry)\n", - "```python\n", - "dataset=\"arn:aws:sagemaker:us-west-2:123456789012:hub-content/AIRegistry/DataSet/my-dataset/1.0.0\"\n", - "```\n", - "\n", - "The evaluator automatically detects which format is provided and uses the appropriate data source configuration." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.12" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/sagemaker-train/src/sagemaker/train/remote_function/__init__.py b/sagemaker-train/src/sagemaker/train/remote_function/__init__.py index bf29079921..87e9aca383 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/__init__.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/__init__.py @@ -10,25 +10,10 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -""" -DEPRECATED: This module has been moved to sagemaker.core.remote_function - -This is a backward compatibility shim. Please update your imports to: - from sagemaker.core.remote_function import ... -""" +"""Defines classes and helper methods used in remote function executions.""" from __future__ import absolute_import -import warnings - -# Backward compatibility: re-export from core -from sagemaker.core.remote_function.client import remote, RemoteExecutor # noqa: F401 -from sagemaker.core.remote_function.checkpoint_location import CheckpointLocation # noqa: F401 -from sagemaker.core.remote_function.custom_file_filter import CustomFileFilter # noqa: F401 -from sagemaker.core.remote_function.spark_config import SparkConfig # noqa: F401 - -warnings.warn( - "sagemaker.train.remote_function has been moved to sagemaker.core.remote_function. " - "Please update your imports. This shim will be removed in a future version.", - DeprecationWarning, - stacklevel=2 -) +from sagemaker.train.remote_function.client import remote, RemoteExecutor # noqa: F401 +from sagemaker.train.remote_function.checkpoint_location import CheckpointLocation # noqa: F401 +from sagemaker.train.remote_function.custom_file_filter import CustomFileFilter # noqa: F401 +from sagemaker.train.remote_function.spark_config import SparkConfig # noqa: F401 diff --git a/sagemaker-train/src/sagemaker/train/remote_function/client.py b/sagemaker-train/src/sagemaker/train/remote_function/client.py index eb99d14c1e..9551f48bb4 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/client.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/client.py @@ -10,21 +10,1276 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -""" -DEPRECATED: This module has been moved to sagemaker.core.remote_function.client - -This is a backward compatibility shim. -""" +"""SageMaker remote function client.""" from __future__ import absolute_import -import warnings +from concurrent.futures import ThreadPoolExecutor +from collections import deque +import time +import threading +from typing import Callable, Dict, List, Optional, Tuple, Any, Union +import functools +import itertools +import inspect -# Backward compatibility: re-export from core -from sagemaker.core.remote_function.client import * # noqa: F401, F403 +from botocore.exceptions import ClientError +from sagemaker.core.exceptions import UnexpectedStatusException +from sagemaker.core.experiments._run_context import _RunContext -warnings.warn( - "sagemaker.train.remote_function.client has been moved to sagemaker.core.remote_function.client. " - "Please update your imports. This shim will be removed in a future version.", - DeprecationWarning, - stacklevel=2 +import sagemaker.train.remote_function.core.serialization as serialization +from sagemaker.train.remote_function.errors import ( + RemoteFunctionError, + ServiceError, + DeserializationError, +) +from sagemaker.train.remote_function.core.stored_function import RESULTS_FOLDER, EXCEPTION_FOLDER +from sagemaker.train.remote_function.runtime_environment.runtime_environment_manager import ( + RuntimeEnvironmentError, ) + +from sagemaker.core.helper.session_helper import Session +from sagemaker.core.s3 import s3_path_join +from sagemaker.train.remote_function.job import _JobSettings, _Job, _RunInfo +from sagemaker.train.remote_function import logging_config +from sagemaker.core.common_utils import name_from_base, base_from_name +from sagemaker.train.remote_function.spark_config import SparkConfig +from sagemaker.train.remote_function.custom_file_filter import CustomFileFilter +from sagemaker.core.telemetry.telemetry_logging import _telemetry_emitter +from sagemaker.core.telemetry.constants import Feature + +_API_CALL_LIMIT = { + "SubmittingIntervalInSecs": 1, + "MinBatchPollingIntervalInSecs": 10, + "PollingIntervalInSecs": 0.5, +} + +# Possible future states. +_PENDING = "PENDING" +_RUNNING = "RUNNING" +# The future was cancelled by the user... +_CANCELLED = "CANCELLED" +_FINISHED = "FINISHED" + +logger = logging_config.get_logger() + + +@_telemetry_emitter(feature=Feature.REMOTE_FUNCTION, func_name="remote_function.remote") +def remote( + _func=None, + *, + dependencies: str = None, + pre_execution_commands: List[str] = None, + pre_execution_script: str = None, + environment_variables: Dict[str, str] = None, + image_uri: str = None, + include_local_workdir: bool = None, + custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None, + instance_count: int = 1, + instance_type: str = None, + job_conda_env: str = None, + job_name_prefix: str = None, + keep_alive_period_in_seconds: int = 0, + max_retry_attempts: int = 1, + max_runtime_in_seconds: int = 24 * 60 * 60, + role: str = None, + s3_kms_key: str = None, + s3_root_uri: str = None, + sagemaker_session: Session = None, + security_group_ids: List[str] = None, + subnets: List[str] = None, + tags: List[Tuple[str, str]] = None, + volume_kms_key: str = None, + volume_size: int = 30, + encrypt_inter_container_traffic: bool = None, + spark_config: SparkConfig = None, + use_spot_instances=False, + max_wait_time_in_seconds=None, + disable_output_compression: bool = False, + use_torchrun: bool = False, + use_mpirun: bool = False, + nproc_per_node: Optional[int] = None, +): + """Decorator for running the annotated function as a SageMaker training job. + + This decorator wraps the annotated code and runs it as a new SageMaker job synchronously + with the provided runtime settings. + + If a parameter value is not set, the decorator first looks up the value from the SageMaker + configuration file. If no value is specified in the configuration file or no configuration file + is found, the decorator selects the default as specified below. For more information, see + `Configuring and using defaults with the SageMaker Python SDK `_. + + Args: + _func (Optional): A Python function to run as a SageMaker training job. + + dependencies (str): Either the path to a dependencies file or the reserved keyword + ``auto_capture``. Defaults to ``None``. + If ``dependencies`` is provided, the value must be one of the following: + + * A path to a conda environment.yml file. The following conditions apply. + + * If job_conda_env is set, then the conda environment is updated by installing + dependencies from the yaml file and the function is invoked within that + conda environment. For this to succeed, the specified conda environment must + already exist in the image. + * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, then the + conda environment is updated by installing dependencies from the yaml file and the + function is invoked within that conda environment. For this to succeed, the + conda environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and + ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. + * If none of the previous conditions are met, a new conda environment named + ``sagemaker-runtime-env`` is created and the function annotated with the remote + decorator is invoked in that conda environment. + + * A path to a requirements.txt file. The following conditions apply. + + * If ``job_conda_env`` is set in the remote decorator, dependencies are installed + within that conda environment and the function annotated with the remote decorator + is invoked in the same conda environment. For this to succeed, the specified + conda environment must already exist in the image. + * If an environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, + dependencies are installed within that conda environment and the function annotated + with the remote decorator is invoked in the same. For this to succeed, the conda + environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and + ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. + * If none of the above conditions are met, conda is not used. Dependencies are + installed at the system level, without any virtual environment, and the function + annotated with the remote decorator is invoked using the Python runtime available + in the system path. + + * The parameter dependencies is set to ``auto_capture``. SageMaker will automatically + generate an env_snapshot.yml corresponding to the current active conda environment’s + snapshot. You do not need to provide a dependencies file. The following conditions + apply: + + * You must run the remote function within an active conda environment. + * When installing the dependencies on the training job, the same conditions as when + dependencies is set to a path to a conda environment file apply. These conditions are + as follows: + + * If job_conda_env is set, then the conda environment is updated by installing + dependencies from the yaml file and the function is invoked within that + conda environment. For this to succeed, the specified conda environment must + already exist in the image. + * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, then + the conda environment is updated by installing dependencies from the yaml file + and the function is invoked within that conda environment. For this to + succeed, the conda environment name must already be set in + ``SAGEMAKER_JOB_CONDA_ENV``, and ``SAGEMAKER_JOB_CONDA_ENV`` must already exist + in the image. + * If none of the previous conditions are met, a new conda environment with name + ``sagemaker-runtime-env`` is created and the function annotated with the + remote decorator is invoked in that conda environment. + + * ``None``. SageMaker will assume that there are no dependencies to install while + executing the remote annotated function in the training job. + + pre_execution_commands (List[str]): List of commands to be executed prior to executing + remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` + can be specified at the same time. Defaults to None. + + pre_execution_script (str): Path to script file to be executed prior to executing + remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` + can be specified at the same time. Defaults to None. + + environment_variables (Dict): The environment variables used inside the decorator function. + Defaults to ``None``. + + image_uri (str): The universal resource identifier (URI) location of a Docker image on + Amazon Elastic Container Registry (ECR). Defaults to the following based on where the SDK + is running: + + * For users who specify ``spark_config`` and want to run the function in a Spark + application, the ``image_uri`` should be ``None``. A SageMaker Spark image will + be used for training, otherwise a ``ValueError`` is thrown. + * For users on SageMaker Studio notebooks, the image used as the kernel image for the + notebook is used. + * For other users, it is resolved to base python image with the same python version + as the environment running the local code. + + If no compatible image is found, a ValueError is thrown. + + include_local_workdir (bool): A flag to indicate that the remote function should include + local directories. Set to ``True`` if the remote function code imports local modules and + methods that are not available via PyPI or conda. Only python files are included. + Default value is ``False``. + + custom_file_filter (Callable[[str, List], List], CustomFileFilter): Either a function + that filters job dependencies to be uploaded to S3 or a ``CustomFileFilter`` object + that specifies the local directories and files to be included in the remote function. + If a callable is passed in, the function should follow the protocol of ``ignore`` argument + of ``shutil.copytree``. Defaults to ``None``, which means only python + files are accepted and uploaded to S3. + + instance_count (int): The number of instances to use. Defaults to 1. + NOTE: Remote function supports instance_count > 1 for Spark jobs, torchrun and + mpirun utilities + + instance_type (str): The Amazon Elastic Compute Cloud (EC2) instance type to use to run + the SageMaker job. e.g. ml.c4.xlarge. If not provided, a ValueError is thrown. + + job_conda_env (str): The name of the conda environment to activate during job's runtime. + Defaults to ``None``. + + job_name_prefix (str): The prefix used used to create the underlying SageMaker job. + + keep_alive_period_in_seconds (int): The duration in seconds to retain and reuse provisioned + infrastructure after the completion of a training job, also known as SageMaker managed + warm pools. The use of warmpools reduces the latency time spent to provision new + resources. The default value for ``keep_alive_period_in_seconds`` is 0. + NOTE: Additional charges associated with warm pools may apply. Using this parameter also + activates a new persistent cache feature, which will further reduce job start up + latency than over using SageMaker managed warm pools alone by caching the package source + downloaded in the previous runs. + + max_retry_attempts (int): The max number of times the job is retried on + ``InternalServerFailure`` Error from SageMaker service. Defaults to 1. + + max_runtime_in_seconds (int): The upper limit in seconds to be used for training. After + this specified amount of time, SageMaker terminates the job regardless of its current + status. Defaults to 1 day or (86400 seconds). + + role (str): The IAM role (either name or full ARN) used to run your SageMaker training + job. Defaults to: + + * the SageMaker default IAM role if the SDK is running in SageMaker Notebooks or + SageMaker Studio Notebooks. + * if not above, a ValueError is be thrown. + + s3_kms_key (str): The key used to encrypt the input and output data. Default to ``None``. + + s3_root_uri (str): The root S3 folder to which the code archives and data are + uploaded to. Defaults to ``s3://``. + + sagemaker_session (sagemaker.core.helper.session.Session): The underlying SageMaker session to which + SageMaker service calls are delegated to (default: None). If not provided, one is created + using a default configuration chain. + + security_group_ids (List[str): A list of security group IDs. Defaults to ``None`` and the + training job is created without VPC config. + + subnets (List[str): A list of subnet IDs. Defaults to ``None`` and the job is created + without VPC config. + + tags (List[Tuple[str, str]): A list of tags attached to the job. Defaults to ``None`` and + the training job is created without tags. + + volume_kms_key (str): An Amazon Key Management Service (KMS) key used to encrypt an + Amazon Elastic Block Storage (EBS) volume attached to the training instance. Defaults to + ``None``. + + volume_size (int): The size in GB of the storage volume for storing input and output data + during training. Defaults to ``30``. + + encrypt_inter_container_traffic (bool): A flag that specifies whether traffic between + training containers is encrypted for the training job. Defaults to ``False``. + + spark_config (SparkConfig): Configurations to the Spark application that runs on + Spark image. If ``spark_config`` is specified, a SageMaker Spark image uri + will be used for training. Note that ``image_uri`` can not be specified at the + same time otherwise a ``ValueError`` is thrown. Defaults to ``None``. + + use_spot_instances (bool): Specifies whether to use SageMaker Managed Spot instances for + training. If enabled then the ``max_wait_time_in_seconds`` arg should also be set. + Defaults to ``False``. + + max_wait_time_in_seconds (int): Timeout in seconds waiting for spot training job. + After this amount of time Amazon SageMaker will stop waiting for managed spot training + job to complete. Defaults to ``None``. + + disable_output_compression (bool): Optional. When set to true, Model is uploaded to + Amazon S3 without compression after training finishes. + + use_torchrun (bool): Specifies whether to use torchrun for distributed training. + Defaults to ``False``. + + use_mpirun (bool): Specifies whether to use mpirun for distributed training. + Defaults to ``False``. + + nproc_per_node (int): Optional. Specifies the number of processes per node for + distributed training. Defaults to ``None``. + This is defined automatically configured on the instance type. + """ + + def _remote(func): + + job_settings = _JobSettings( + dependencies=dependencies, + pre_execution_commands=pre_execution_commands, + pre_execution_script=pre_execution_script, + environment_variables=environment_variables, + image_uri=image_uri, + include_local_workdir=include_local_workdir, + custom_file_filter=custom_file_filter, + instance_count=instance_count, + instance_type=instance_type, + job_conda_env=job_conda_env, + job_name_prefix=job_name_prefix, + keep_alive_period_in_seconds=keep_alive_period_in_seconds, + max_retry_attempts=max_retry_attempts, + max_runtime_in_seconds=max_runtime_in_seconds, + role=role, + s3_kms_key=s3_kms_key, + s3_root_uri=s3_root_uri, + sagemaker_session=sagemaker_session, + security_group_ids=security_group_ids, + subnets=subnets, + tags=tags, + volume_kms_key=volume_kms_key, + volume_size=volume_size, + encrypt_inter_container_traffic=encrypt_inter_container_traffic, + spark_config=spark_config, + use_spot_instances=use_spot_instances, + max_wait_time_in_seconds=max_wait_time_in_seconds, + disable_output_compression=disable_output_compression, + use_torchrun=use_torchrun, + use_mpirun=use_mpirun, + nproc_per_node=nproc_per_node, + ) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + + if instance_count > 1 and not ( + (spark_config is not None and not use_torchrun and not use_mpirun) + or (spark_config is None and use_torchrun and not use_mpirun) + or (spark_config is None and not use_torchrun and use_mpirun) + ): + raise ValueError( + "Remote function do not support training on multi instances " + + "without spark_config or use_torchrun or use_mpirun. " + + "Please provide instance_count = 1" + ) + + RemoteExecutor._validate_submit_args(func, *args, **kwargs) + + job = _Job.start(job_settings, func, args, kwargs) + + try: + job.wait() + except UnexpectedStatusException as usex: + if usex.actual_status == "Failed": + try: + exception = serialization.deserialize_exception_from_s3( + sagemaker_session=job_settings.sagemaker_session, + s3_uri=s3_path_join( + job_settings.s3_root_uri, job.job_name, EXCEPTION_FOLDER + ), + hmac_key=job.hmac_key, + ) + except ServiceError as serr: + chained_e = serr.__cause__ + if ( + isinstance(chained_e, ClientError) + and chained_e.response["Error"]["Code"] # pylint: disable=no-member + == "404" + and chained_e.response["Error"]["Message"] # pylint: disable=no-member + == "Not Found" + ): + describe_result = job.describe() + if ( + "FailureReason" in describe_result + and describe_result["FailureReason"] + and "RuntimeEnvironmentError: " in describe_result["FailureReason"] + ): + failure_msg = describe_result["FailureReason"].replace( + "RuntimeEnvironmentError: ", "" + ) + raise RuntimeEnvironmentError(failure_msg) + raise RemoteFunctionError( + "Failed to execute remote function. " + + "Check corresponding job for details." + ) + raise serr + + raise exception + + raise TimeoutError( + "Job for remote function timed out before reaching a termination status." + ) + + if job.describe()["TrainingJobStatus"] == "Completed": + return serialization.deserialize_obj_from_s3( + sagemaker_session=job_settings.sagemaker_session, + s3_uri=s3_path_join(job_settings.s3_root_uri, job.job_name, RESULTS_FOLDER), + hmac_key=job.hmac_key, + ) + + if job.describe()["TrainingJobStatus"] == "Stopped": + raise RemoteFunctionError("Job for remote function has been aborted.") + + return None + + wrapper.job_settings = job_settings + wrapper.wrapped_func = func + return wrapper + + if _func is None: + return _remote + return _remote(_func) + + +class _SubmitRequest: + """Class that holds parameters and data for creating a new job.""" + + def __init__( + self, future, job_settings: _JobSettings, func, func_args, func_kwargs, run_info=None + ): + self.future = future + self.job_settings = job_settings + self.func = func + self.args = func_args + self.kwargs = func_kwargs + self.run_info = run_info + + +def _submit_worker(executor): + """Background worker that submits job requests.""" + + def has_work_to_do(): + return ( + len(executor._pending_request_queue) > 0 + and len(executor._running_jobs) < executor.max_parallel_jobs + ) + + try: + while True: + with executor._state_condition: + executor._state_condition.wait_for(has_work_to_do) + request = executor._pending_request_queue[0] + + if request is None: + with executor._state_condition: + # remove the anchor from the pending queue + executor._pending_request_queue.popleft() + return + + time.sleep(_API_CALL_LIMIT["SubmittingIntervalInSecs"]) + # submit a new job + job = request.future._start_and_notify( + request.job_settings, request.func, request.args, request.kwargs, request.run_info + ) + + with executor._state_condition: + if job: + executor._running_jobs[job.job_name] = job + # remove the request from the pending queue + executor._pending_request_queue.popleft() + except Exception: # pylint: disable=broad-except + logger.exception("Error occurred while submitting CreateTrainingJob requests.") + + +def _polling_worker(executor): + """Background worker that polls the status of the running jobs.""" + try: + while True: + with executor._state_condition: + if ( + executor._shutdown + and len(executor._running_jobs) + len(executor._pending_request_queue) == 0 + ): + return + + time.sleep( + max( + _API_CALL_LIMIT["MinBatchPollingIntervalInSecs"] + - len(executor._running_jobs) * _API_CALL_LIMIT["PollingIntervalInSecs"], + 0, + ) + ) + + # check if running jobs are terminated + for job_name in list(executor._running_jobs.keys()): + try: + time.sleep(_API_CALL_LIMIT["PollingIntervalInSecs"]) + if executor._running_jobs[job_name].describe()["TrainingJobStatus"] in [ + "Completed", + "Failed", + "Stopped", + ]: + with executor._state_condition: + del executor._running_jobs[job_name] + executor._state_condition.notify_all() + except Exception as e: # pylint: disable=broad-except + if ( + not isinstance(e, ClientError) + or e.response["Error"]["Code"] # pylint: disable=no-member + != "LimitExceededException" + ): + # Couldn't check the job status, move on + logger.exception( + "Error occurred while checking the status of job %s", job_name + ) + with executor._state_condition: + del executor._running_jobs[job_name] + executor._state_condition.notify_all() + except Exception: # pylint: disable=broad-except + logger.exception("Error occurred while monitoring the job statuses.") + + +class RemoteExecutor(object): + """Run Python functions asynchronously as SageMaker jobs""" + + def __init__( + self, + *, + dependencies: str = None, + pre_execution_commands: List[str] = None, + pre_execution_script: str = None, + environment_variables: Dict[str, str] = None, + image_uri: str = None, + include_local_workdir: bool = None, + custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None, + instance_count: int = 1, + instance_type: str = None, + job_conda_env: str = None, + job_name_prefix: str = None, + keep_alive_period_in_seconds: int = 0, + max_parallel_jobs: int = 1, + max_retry_attempts: int = 1, + max_runtime_in_seconds: int = 24 * 60 * 60, + role: str = None, + s3_kms_key: str = None, + s3_root_uri: str = None, + sagemaker_session: Session = None, + security_group_ids: List[str] = None, + subnets: List[str] = None, + tags: List[Tuple[str, str]] = None, + volume_kms_key: str = None, + volume_size: int = 30, + encrypt_inter_container_traffic: bool = None, + spark_config: SparkConfig = None, + use_spot_instances=False, + max_wait_time_in_seconds=None, + disable_output_compression: bool = False, + use_torchrun: bool = False, + use_mpirun: bool = False, + nproc_per_node: Optional[int] = None, + ): + """Constructor for RemoteExecutor + + If a parameter value is not set, the constructor first looks up the value from the + SageMaker configuration file. If no value is specified in the configuration file or + no configuration file is found, the constructor selects the default as specified below. + For more information, see `Configuring and using defaults with the SageMaker Python SDK + `_. + + Args: + _func (Optional): A Python function to run as a SageMaker training job. + + dependencies (str): Either the path to a dependencies file or the reserved keyword + ``auto_capture``. Defaults to ``None``. + If ``dependencies`` is provided, the value must be one of the following: + + * A path to a conda environment.yml file. The following conditions apply. + + * If job_conda_env is set, then the conda environment is updated by installing + dependencies from the yaml file and the function is invoked within that + conda environment. For this to succeed, the specified conda environment must + already exist in the image. + * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, then + the conda environment is updated by installing dependencies from the yaml file and + the function is invoked within that conda environment. For this to succeed, the + conda environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and + ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. + * If none of the previous conditions are met, a new conda environment named + ``sagemaker-runtime-env`` is created and the function annotated with the remote + decorator is invoked in that conda environment. + + * A path to a requirements.txt file. The following conditions apply. + + * If ``job_conda_env`` is set in the remote decorator, dependencies are installed + within that conda environment and the function annotated with the remote decorator + is invoked in the same conda environment. For this to succeed, the specified + conda environment must already exist in the image. + * If an environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, + dependencies are installed within that conda environment and the function annotated + with the remote decorator is invoked in the same. For this to succeed, the + conda environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and + ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. + * If none of the above conditions are met, conda is not used. Dependencies are + installed at the system level, without any virtual environment, and the function + annotated with the remote decorator is invoked using the Python runtime available + in the system path. + + * The parameter dependencies is set to ``auto_capture``. SageMaker will automatically + generate an env_snapshot.yml corresponding to the current active conda environment’s + snapshot. You do not need to provide a dependencies file. The following conditions + apply: + + * You must run the remote function within an active conda environment. + * When installing the dependencies on the training job, the same conditions as when + dependencies is set to a path to a conda environment file apply. These conditions + are as follows: + + * If job_conda_env is set, then the conda environment is updated by installing + dependencies from the yaml file and the function is invoked within that + conda environment. For this to succeed, the specified conda environment must + already exist in the image. + * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, + then the conda environment is updated by installing dependencies from the yaml + file and the function is invoked within that conda environment. For this to + succeed, the conda environment name must already be set in + ``SAGEMAKER_JOB_CONDA_ENV``, and ``SAGEMAKER_JOB_CONDA_ENV`` must already exist + in the image. + * If none of the previous conditions are met, a new conda environment with name + ``sagemaker-runtime-env`` is created and the function annotated with the + remote decorator is invoked in that conda environment. + + * ``None``. SageMaker will assume that there are no dependencies to install while + executing the remote annotated function in the training job. + + pre_execution_commands (List[str]): List of commands to be executed prior to executing + remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` + can be specified at the same time. Defaults to None. + + pre_execution_script (str): Path to script file to be executed prior to executing + remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` + can be specified at the same time. Defaults to None. + + environment_variables (Dict): The environment variables used inside the decorator + function. Defaults to ``None``. + + image_uri (str): The universal resource identifier (URI) location of a Docker image on + Amazon Elastic Container Registry (ECR). Defaults to the following based on where the + SDK is running: + + * For users who specify ``spark_config`` and want to run the function in a Spark + application, the ``image_uri`` should be ``None``. A SageMaker Spark image will + be used for training, otherwise a ``ValueError`` is thrown. + * For users on SageMaker Studio notebooks, the image used as the kernel image for + the notebook is used. + * For other users, it is resolved to base python image with the same python + version as the environment running the local code. + + If no compatible image is found, a ValueError is thrown. + + include_local_workdir (bool): A flag to indicate that the remote function should include + local directories. Set to ``True`` if the remote function code imports local modules + and methods that are not available via PyPI or conda. Default value is ``False``. + + custom_file_filter (Callable[[str, List], List], CustomFileFilter): Either a function + that filters job dependencies to be uploaded to S3 or a ``CustomFileFilter`` object + that specifies the local directories and files to be included in the remote function. + If a callable is passed in, that function is passed to the ``ignore`` argument of + ``shutil.copytree``. Defaults to ``None``, which means only python + files are accepted and uploaded to S3. + + instance_count (int): The number of instances to use. Defaults to 1. + NOTE: Remote function supports instance_count > 1 for Spark jobs, torchrun and + mpirun utilities + + instance_type (str): The Amazon Elastic Compute Cloud (EC2) instance type to use to run + the SageMaker job. e.g. ml.c4.xlarge. If not provided, a ValueError is thrown. + + job_conda_env (str): The name of the conda environment to activate during job's runtime. + Defaults to ``None``. + + job_name_prefix (str): The prefix used used to create the underlying SageMaker job. + + keep_alive_period_in_seconds (int): The duration in seconds to retain and reuse + provisioned infrastructure after the completion of a training job, also known as + SageMaker managed warm pools. The use of warmpools reduces the latency time spent to + provision new resources. The default value for ``keep_alive_period_in_seconds`` is 0. + NOTE: Additional charges associated with warm pools may apply. Using this parameter + also activates a new pesistent cache feature, which will further reduce job start + up latency than over using SageMaker managed warm pools alone by caching the package + source downloaded in the previous runs. + + max_parallel_jobs (int): Maximum number of jobs that run in parallel. Defaults to 1. + + max_retry_attempts (int): The max number of times the job is retried on + ``InternalServerFailure`` Error from SageMaker service. Defaults to 1. + + max_runtime_in_seconds (int): The upper limit in seconds to be used for training. After + this specified amount of time, SageMaker terminates the job regardless of its current + status. Defaults to 1 day or (86400 seconds). + + role (str): The IAM role (either name or full ARN) used to run your SageMaker training + job. Defaults to: + + * the SageMaker default IAM role if the SDK is running in SageMaker Notebooks or + SageMaker Studio Notebooks. + * if not above, a ValueError is be thrown. + + s3_kms_key (str): The key used to encrypt the input and output data. + Default to ``None``. + + s3_root_uri (str): The root S3 folder to which the code archives and data are + uploaded to. Defaults to ``s3://``. + + sagemaker_session (sagemaker.core.helper.session.Session): The underlying SageMaker session to which + SageMaker service calls are delegated to (default: None). If not provided, one is + created using a default configuration chain. + + security_group_ids (List[str): A list of security group IDs. Defaults to ``None`` and + the training job is created without VPC config. + + subnets (List[str): A list of subnet IDs. Defaults to ``None`` and the job is + created without VPC config. + + tags (List[Tuple[str, str]): A list of tags attached to the job. Defaults to ``None`` + and the training job is created without tags. + + volume_kms_key (str): An Amazon Key Management Service (KMS) key used to encrypt an + Amazon Elastic Block Storage (EBS) volume attached to the training instance. + Defaults to ``None``. + + volume_size (int): The size in GB of the storage volume for storing input and output + data during training. Defaults to ``30``. + + encrypt_inter_container_traffic (bool): A flag that specifies whether traffic between + training containers is encrypted for the training job. Defaults to ``False``. + + spark_config (SparkConfig): Configurations to the Spark application that runs on + Spark image. If ``spark_config`` is specified, a SageMaker Spark image uri + will be used for training. Note that ``image_uri`` can not be specified at the + same time otherwise a ``ValueError`` is thrown. Defaults to ``None``. + + use_spot_instances (bool): Specifies whether to use SageMaker Managed Spot instances for + training. If enabled then the ``max_wait_time_in_seconds`` arg should also be set. + Defaults to ``False``. + + max_wait_time_in_seconds (int): Timeout in seconds waiting for spot training job. + After this amount of time Amazon SageMaker will stop waiting for managed spot training + job to complete. Defaults to ``None``. + + disable_output_compression (bool): Optional. When set to true, Model is uploaded to + Amazon S3 without compression after training finishes. + + use_torchrun (bool): Specifies whether to use torchrun for distributed training. + Defaults to ``False``. + + use_mpirun (bool): Specifies whether to use mpirun for distributed training. + Defaults to ``False``. + + nproc_per_node (int): Optional. Specifies the number of processes per node for + distributed training. Defaults to ``None``. + This is defined automatically configured on the instance type. + """ + self.max_parallel_jobs = max_parallel_jobs + + if self.max_parallel_jobs <= 0: + raise ValueError("max_parallel_jobs must be greater than 0.") + + if instance_count > 1 and not ( + (spark_config is not None and not use_torchrun and not use_mpirun) + or (spark_config is None and use_torchrun and not use_mpirun) + or (spark_config is None and not use_torchrun and use_mpirun) + ): + raise ValueError( + "Remote function do not support training on multi instances " + + "without spark_config or use_torchrun or use_mpirun. " + + "Please provide instance_count = 1" + ) + + self.job_settings = _JobSettings( + dependencies=dependencies, + pre_execution_commands=pre_execution_commands, + pre_execution_script=pre_execution_script, + environment_variables=environment_variables, + image_uri=image_uri, + include_local_workdir=include_local_workdir, + custom_file_filter=custom_file_filter, + instance_count=instance_count, + instance_type=instance_type, + job_conda_env=job_conda_env, + job_name_prefix=job_name_prefix, + keep_alive_period_in_seconds=keep_alive_period_in_seconds, + max_retry_attempts=max_retry_attempts, + max_runtime_in_seconds=max_runtime_in_seconds, + role=role, + s3_kms_key=s3_kms_key, + s3_root_uri=s3_root_uri, + sagemaker_session=sagemaker_session, + security_group_ids=security_group_ids, + subnets=subnets, + tags=tags, + volume_kms_key=volume_kms_key, + volume_size=volume_size, + encrypt_inter_container_traffic=encrypt_inter_container_traffic, + spark_config=spark_config, + use_spot_instances=use_spot_instances, + max_wait_time_in_seconds=max_wait_time_in_seconds, + disable_output_compression=disable_output_compression, + use_torchrun=use_torchrun, + use_mpirun=use_mpirun, + nproc_per_node=nproc_per_node, + ) + + self._state_condition = threading.Condition() + self._pending_request_queue = deque() + # For thread safety, see + # https://web.archive.org/web/20201108091210/http://effbot.org/pyfaq/what-kinds-of-global-value-mutation-are-thread-safe.htm + self._running_jobs = dict() + self._shutdown = False + + self._workers: ThreadPoolExecutor = None + + def submit(self, func, *args, **kwargs): + """Execute the input function as a SageMaker job asynchronously. + + Args: + func: Python function to run as a SageMaker job. + *args: Positional arguments to the input function. + **kwargs: keyword arguments to the input function + """ + if self._shutdown: + raise RuntimeError("Cannot schedule new remote function executions after shutdown") + + self._validate_submit_args(func, *args, **kwargs) + + with self._state_condition: + future = Future() + + run_info = None + if _RunContext.get_current_run() is not None: + run = _RunContext.get_current_run() + run_info = _RunInfo(run.experiment_name, run.run_name) + + self._pending_request_queue.append( + _SubmitRequest(future, self.job_settings, func, args, kwargs, run_info) + ) + + if self._workers is None: + self._workers = ThreadPoolExecutor(2) + self._workers.submit(_submit_worker, self) + self._workers.submit(_polling_worker, self) + + self._state_condition.notify_all() + + return future + + def map(self, func, *iterables): + """Return an iterator that applies function to every item of iterable, yielding the results. + + If additional iterables arguments are passed, function must take that many arguments and + is applied to the items from all iterables in parallel. With multiple iterables, the + iterator stops when the shortest iterable is exhausted. + + Args: + func: Python function to run as a SageMaker job. + iterables: Arguments of the input python function. + """ + + futures = map(self.submit, itertools.repeat(func), *iterables) + return [future.result() for future in futures] + + def shutdown(self): + """Prevent more function executions to be submitted to this executor.""" + with self._state_condition: + self._shutdown = True + + # give a signal to the submitting worker so that it doesn't block on empty queue forever + self._pending_request_queue.append(None) + + self._state_condition.notify_all() + + if self._workers is not None: + self._workers.shutdown(wait=True) + + def __enter__(self): + """Create an executor instance and return it""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Make sure the executor instance is shutdown.""" + self.shutdown() + return False + + @staticmethod + def _validate_submit_args(func, *args, **kwargs): + """Validates input args passed to submit method.""" + + full_arg_spec = inspect.getfullargspec(func) + + # args related validations + + is_accepting_variable_positional_args = full_arg_spec.varargs is not None + num_default_positional_args = len(full_arg_spec.defaults) if full_arg_spec.defaults else 0 + minimum_num_expected_positional_args = len(full_arg_spec.args) - num_default_positional_args + + if not is_accepting_variable_positional_args and len(args) > len(full_arg_spec.args): + raise TypeError( + f"{func.__name__}() takes {len(full_arg_spec.args)} positional " + + f"{'arguments' if len(full_arg_spec.args) > 1 else 'argument'} but {len(args)} " + + f"{'were' if len(args) > 1 else 'was'} given." + ) + + if len(args) < minimum_num_expected_positional_args: + missing_positional_args = full_arg_spec.args[ + len(args) : minimum_num_expected_positional_args + ] + missing_args = list(filter(lambda arg: arg not in kwargs, missing_positional_args)) + if missing_args: + missing_args_str = ( + ", ".join(map(lambda x: f"'{x}'", missing_args[:-1])) + + f", and '{missing_args[-1]}'" + if len(missing_args) > 1 + else f"'{missing_args[0]}'" + ) + raise TypeError( + f"{func.__name__}() missing {len(missing_args)} required positional " + + f"{'arguments' if len(missing_args) > 1 else 'argument'}: {missing_args_str}" + ) + + # kwargs related validations + + for k in kwargs: + if k in full_arg_spec.args and len(args) > full_arg_spec.args.index(k): + raise TypeError(f"{func.__name__}() got multiple values for argument '{k}'") + if k not in full_arg_spec.kwonlyargs and k not in full_arg_spec.args: + raise TypeError(f"{func.__name__}() got an unexpected keyword argument '{k}'") + + missing_kwargs = [ + k + for k in full_arg_spec.kwonlyargs + if k not in full_arg_spec.kwonlydefaults and k not in kwargs + ] + if missing_kwargs: + missing_kwargs_string = ( + ", ".join(map(lambda x: f"'{x}'", missing_kwargs[:-1])) + + f", and '{missing_kwargs[-1]}'" + if len(missing_kwargs) > 1 + else f"'{missing_kwargs[0]}'" + ) + + raise TypeError( + f"{func.__name__}() missing {len(missing_kwargs)} required keyword-only " + + f"{'arguments' if len(missing_kwargs) > 1 else 'argument'}: " + + f"{missing_kwargs_string}" + ) + + +class Future(object): + """Class representing a reference to a SageMaker job result. + + Reference to the SageMaker job created as a result of the remote function run. The job may + or may not have finished running. + """ + + def __init__(self): + self._condition = threading.Condition() + self._state = _PENDING + self._job = None + self._exception = None + self._return = None + + @staticmethod + def from_describe_response(describe_training_job_response, sagemaker_session): + """Construct a Future from a describe_training_job_response object.""" + future = Future() + job_exception = None + client_exception = None + job_return = None + job = _Job.from_describe_response(describe_training_job_response, sagemaker_session) + if describe_training_job_response["TrainingJobStatus"] in ["Stopping", "Stopped"]: + state = _CANCELLED + elif describe_training_job_response["TrainingJobStatus"] == "Completed": + state = _FINISHED + try: + job_return = serialization.deserialize_obj_from_s3( + sagemaker_session=sagemaker_session, + s3_uri=s3_path_join(job.s3_uri, RESULTS_FOLDER), + hmac_key=job.hmac_key, + ) + except DeserializationError as e: + client_exception = e + except ServiceError as e: + client_exception = e + elif describe_training_job_response["TrainingJobStatus"] == "Failed": + state = _FINISHED + try: + job_exception = serialization.deserialize_exception_from_s3( + sagemaker_session=sagemaker_session, + s3_uri=s3_path_join(job.s3_uri, EXCEPTION_FOLDER), + hmac_key=job.hmac_key, + ) + except ServiceError as serr: + chained_e = serr.__cause__ + if ( + isinstance(chained_e, ClientError) + and chained_e.response["Error"]["Code"] == "404" # pylint: disable=no-member + and chained_e.response["Error"]["Message"] # pylint: disable=no-member + == "Not Found" + ): + if ( + "FailureReason" in describe_training_job_response + and describe_training_job_response["FailureReason"] + and "RuntimeEnvironmentError: " + in describe_training_job_response["FailureReason"] + ): + failure_msg = describe_training_job_response["FailureReason"].replace( + "RuntimeEnvironmentError: ", "" + ) + job_exception = RuntimeEnvironmentError(failure_msg) + else: + job_exception = RemoteFunctionError( + "Failed to execute remote function. " + + "Check corresponding job for details." + ) + else: + job_exception = serr + except DeserializationError as e: + client_exception = e + else: + state = _RUNNING + + future._job = job + future._state = state + future._exception = job_exception or client_exception + future._return = job_return + return future + + def _start_and_notify( + self, job_settings: _JobSettings, func, func_args, func_kwargs, run_info=None + ): + """Start and record the newly created job in the future object. + + The job is recorded if one is successfully started. Otherwise, the exception is + recorded. The state update is broadcast to other waiting threads. + """ + with self._condition: + if self._state in [_PENDING]: + + try: + self._job = _Job.start(job_settings, func, func_args, func_kwargs, run_info) + except (Exception,) as e: # pylint: disable=broad-except + self._exception = e + self._state = _FINISHED + self._condition.notify_all() + return None + + self._state = _RUNNING + self._condition.notify_all() + return self._job + return None + + def result(self, timeout: float = None) -> Any: + """Returns the SageMaker job result. + + This method waits for the SageMaker job created from the remote function execution to + complete for up to the timeout value (if specified). If timeout is ``None``, + this method will wait until the SageMaker job completes. + + Args: + timeout (float): Timeout in seconds to wait until the job is completed. ``None`` by + default. + + Returns: + The Python object returned by the remote function. + """ + try: + self.wait(timeout) + except UnexpectedStatusException: + pass + + with self._condition: + if self._state == _PENDING: + raise RuntimeError() + + if self._state == _RUNNING: + if self._job.describe()["TrainingJobStatus"] == "Completed": + self._return = serialization.deserialize_obj_from_s3( + sagemaker_session=self._job.sagemaker_session, + s3_uri=s3_path_join(self._job.s3_uri, RESULTS_FOLDER), + hmac_key=self._job.hmac_key, + ) + self._state = _FINISHED + return self._return + if self._job.describe()["TrainingJobStatus"] == "Failed": + try: + self._exception = serialization.deserialize_exception_from_s3( + sagemaker_session=self._job.sagemaker_session, + s3_uri=s3_path_join(self._job.s3_uri, EXCEPTION_FOLDER), + hmac_key=self._job.hmac_key, + ) + except ServiceError as serr: + chained_e = serr.__cause__ + if ( + isinstance(chained_e, ClientError) + and chained_e.response["Error"]["Code"] # pylint: disable=no-member + == "404" + and chained_e.response["Error"]["Message"] # pylint: disable=no-member + == "Not Found" + ): + if ( + "FailureReason" in self._job.describe() + and self._job.describe()["FailureReason"] + and "RuntimeEnvironmentError: " + in self._job.describe()["FailureReason"] + ): + failure_msg = self._job.describe()["FailureReason"].replace( + "RuntimeEnvironmentError: ", "" + ) + self._exception = RuntimeEnvironmentError(failure_msg) + else: + self._exception = RemoteFunctionError( + "Failed to execute remote function. " + + "Check corresponding job for details." + ) + else: + self._exception = serr + self._state = _FINISHED + elif self._job.describe()["TrainingJobStatus"] == "Stopped": + self._state = _CANCELLED + raise RemoteFunctionError("Job for remote function has been aborted.") + else: + raise TimeoutError( + "Job for remote function timed out before reaching a termination status." + ) + + if self._state == _FINISHED: + if self._exception: + raise self._exception + return self._return + + return None + + def wait( + self, + timeout: int = None, + ) -> None: + """Wait for the underlying SageMaker job to complete. + + This method waits for the SageMaker job created as a result of the remote function run + to complete for up to the timeout value (if specified). If timeout is ``None``, this method + will block until the job is completed. + + Args: + timeout (int): Timeout in seconds to wait until the job is completed before it is + stopped. Defaults to ``None``. + + Returns: + None + """ + + with self._condition: + if self._state == _PENDING: + self._condition.wait(timeout=timeout) + + if self._state == _RUNNING: + self._job.wait(timeout=timeout) + + def cancel(self) -> bool: + """Cancel the function execution. + + This method prevents the SageMaker job being created or stops the underlying SageMaker job + early if it is already in progress. + + Returns: + ``True`` if the underlying SageMaker job created as a result of the remote function + run is cancelled. + """ + with self._condition: + if self._state == _FINISHED: + return False + if self._state == _CANCELLED: + return True + + if self._job: + self._job.stop() + self._state = _CANCELLED + return True + + def running(self) -> bool: + """Check if the underlying SageMaker job is running. + + Returns: + ``True`` if the underlying SageMaker job is still running. ``False``, otherwise. + """ + with self._condition: + return self._state == _RUNNING + + def cancelled(self) -> bool: + """Check if the underlying SageMaker job was cancelled. + + Returns: + ``True`` if the underlying SageMaker job was cancelled. ``False``, otherwise. + """ + with self._condition: + return self._state == _CANCELLED + + def done(self) -> bool: + """Check if the underlying SageMaker job is finished. + + Returns: + ``True`` if the underlying SageMaker job finished running. ``False``, otherwise. + """ + with self._condition: + if self._state == _RUNNING and self._job.describe()["TrainingJobStatus"] in [ + "Completed", + "Failed", + ]: + self._state = _FINISHED + return True + + if self._state == _FINISHED: + return True + + return False + + +def get_future(job_name, sagemaker_session=None) -> Future: + """Get a future object with information about a job with the given job_name. + + Args: + job_name (str): name of the underlying SageMaker job created as a result of the remote + function run. + + sagemaker_session (sagemaker.core.helper.session.Session): A session object that manages interactions + with Amazon SageMaker APIs and any other AWS services needed. + + Returns: + A `sagemaker.remote_function.client.Future` instance. + """ + if not sagemaker_session: + sagemaker_session = Session() + describe_training_job_response = sagemaker_session.sagemaker_client.describe_training_job( + TrainingJobName=job_name + ) + return Future.from_describe_response(describe_training_job_response, sagemaker_session) + + +def list_futures(job_name_prefix, sagemaker_session=None): + """Generates Future objects with information about jobs with given job_name_prefix. + + Args: + job_name_prefix (str): A prefix used to identify the SageMaker jobs associated with remote + function run. + sagemaker_session (sagemaker.core.helper.session.Session): A session object that manages interactions + with Amazon SageMaker APIs and any other AWS services needed. + + Yields: + A `sagemaker.remote_function.client.Future` instance. + """ + if not sagemaker_session: + sagemaker_session = Session() + job_name = name_from_base(job_name_prefix) + # perform the following transformation because we might have trimmed the job_name_prefix while + # creating the job. + transformed_job_name_prefix = base_from_name(job_name) + next_token = None + list_training_job_kwargs = {"NameContains": transformed_job_name_prefix} + while True: + if next_token: + list_training_job_kwargs["NextToken"] = next_token + list_training_job_response = sagemaker_session.sagemaker_client.list_training_jobs( + **list_training_job_kwargs + ) + training_job_names = [ + job["TrainingJobName"] for job in list_training_job_response["TrainingJobSummaries"] + ] + for training_job_name in training_job_names: + describe_training_job_response = ( + sagemaker_session.sagemaker_client.describe_training_job( + TrainingJobName=training_job_name + ) + ) + yield Future.from_describe_response(describe_training_job_response, sagemaker_session) + if "NextToken" in list_training_job_response: + next_token = list_training_job_response["NextToken"] + else: + break diff --git a/sagemaker-train/src/sagemaker/train/remote_function/core/__init__.py b/sagemaker-train/src/sagemaker/train/remote_function/core/__init__.py index 7e9f2d30da..e69de29bb2 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/core/__init__.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/core/__init__.py @@ -1,27 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -""" -DEPRECATED: This module has been moved to sagemaker.core.remote_function.core - -This is a backward compatibility shim. -""" -from __future__ import absolute_import - -import warnings - -warnings.warn( - "sagemaker.train.remote_function.core has been moved to sagemaker.core.remote_function.core. " - "Please update your imports. This shim will be removed in a future version.", - DeprecationWarning, - stacklevel=2 -) diff --git a/sagemaker-train/src/sagemaker/train/remote_function/core/_custom_dispatch_table.py b/sagemaker-train/src/sagemaker/train/remote_function/core/_custom_dispatch_table.py index 20b7a297b5..857ac40eb0 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/core/_custom_dispatch_table.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/core/_custom_dispatch_table.py @@ -1,4 +1,3 @@ - # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You @@ -24,7 +23,6 @@ ParameterBoolean, ) from sagemaker.core.workflow.execution_variables import ExecutionVariable -from sagemaker.mlops.workflow.function_step import DelayedReturn from sagemaker.core.workflow.properties import ( Properties, PropertiesMap, @@ -32,6 +30,19 @@ ) +# Lazy import to avoid circular dependency +# DelayedReturn is in MLOps package which depends on Core +def _get_delayed_return_class(): + """Lazy import of DelayedReturn to avoid circular dependency.""" + try: + from sagemaker.mlops.workflow.function_step import DelayedReturn + + return DelayedReturn + except ImportError: + # If MLOps is not installed, return None + return None + + def _pipeline_variable_reducer(pipeline_variable): """Reducer for pipeline variable.""" @@ -42,6 +53,7 @@ def _pipeline_variable_reducer(pipeline_variable): ) +# Build dispatch table with lazy loading for DelayedReturn dispatch_table = { ParameterInteger: _pipeline_variable_reducer, ParameterFloat: _pipeline_variable_reducer, @@ -52,5 +64,9 @@ def _pipeline_variable_reducer(pipeline_variable): Properties: _pipeline_variable_reducer, PropertiesMap: _pipeline_variable_reducer, PropertiesList: _pipeline_variable_reducer, - DelayedReturn: _pipeline_variable_reducer, } + +# Add DelayedReturn to dispatch table if MLOps is available +_delayed_return_class = _get_delayed_return_class() +if _delayed_return_class is not None: + dispatch_table[_delayed_return_class] = _pipeline_variable_reducer diff --git a/sagemaker-train/src/sagemaker/train/remote_function/core/pipeline_variables.py b/sagemaker-train/src/sagemaker/train/remote_function/core/pipeline_variables.py index 5767a07596..1acbf1ad7d 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/core/pipeline_variables.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/core/pipeline_variables.py @@ -10,21 +10,344 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -""" -DEPRECATED: This module has been moved to sagemaker.core.remote_function.core.pipeline_variables - -This is a backward compatibility shim. -""" +"""SageMaker remote function data serializer/deserializer.""" from __future__ import absolute_import -import warnings +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass, field +from typing import Any, Union, Dict, List, Tuple + +from sagemaker.core.s3 import s3_path_join +from sagemaker.train.remote_function.core.serialization import deserialize_obj_from_s3 +from sagemaker.core.workflow.step_outputs import get_step + + +@dataclass +class Context: + """Context for an execution.""" + + step_name: str = None + execution_id: str = None + property_references: Dict[str, str] = field(default_factory=dict) + serialize_output_to_json: bool = False + func_step_s3_dir: str = None + + +@dataclass +class _Parameter: + """Parameter to a function.""" + + name: str + + +class _ParameterInteger(_Parameter): + """Integer parameter to a function.""" + + ... + + +class _ParameterFloat(_Parameter): + """Float parameter to a function.""" + + ... + + +class _ParameterString(_Parameter): + """String parameter to a function.""" + + ... + + +class _ParameterBoolean(_Parameter): + """Boolean parameter to a function.""" + + ... + + +@dataclass +class _Properties: + """Properties of classic steps.""" + + path: str + + +@dataclass +class _ExecutionVariable: + """Execution variable.""" + + name: str + + +@dataclass +class _S3BaseUriIdentifier: + """Identifies that the class refers to function step s3 base uri. + + The s3_base_uri = s3_root_uri + pipeline_name. + This identifier is resolved in function step runtime by SDK. + """ + + NAME = "S3_BASE_URI" + + +@dataclass +class _DelayedReturn: + """Delayed return from a function.""" + + uri: Union[_Properties, List[Union[str, _Parameter, _ExecutionVariable]]] + reference_path: Tuple = field(default_factory=tuple) + + +class _ExecutionVariableResolver: + """Resolve execution variables.""" + + def __init__(self, context: Context): + """Resolve execution variables.""" + self._context = context + + def resolve(self, execution_variable: _ExecutionVariable): + """Resolve a single execution variable. + + Args: + execution_variable: execution variable to resolve. + Returns: + resolved value + """ + return self._context.property_references[f"Execution.{execution_variable.name}"] + + +class _ParameterResolver: + """Resolve parameters.""" + + def __init__(self, context: Context): + """Resolve parameters.""" + self._context = context + + def resolve(self, parameter: _Parameter): + """Resolve a single property reference. + + Args: + parameter: parameter to resolve. + Returns: + resolved value + """ + if isinstance(parameter, _ParameterInteger): + return int(self._context.property_references[f"Parameters.{parameter.name}"]) + if isinstance(parameter, _ParameterFloat): + return float(self._context.property_references[f"Parameters.{parameter.name}"]) + if isinstance(parameter, _ParameterString): + return self._context.property_references[f"Parameters.{parameter.name}"] + + return self._context.property_references[f"Parameters.{parameter.name}"] == "true" + + +class _PropertiesResolver: + """Resolve classic step properties.""" + + def __init__(self, context: Context): + """Resolve classic step properties.""" + self._context = context + + def resolve(self, properties: _Properties): + """Resolve classic step properties. + + Args: + properties: classic step properties. + Returns: + resolved value + """ + return self._context.property_references[properties.path] + + +class _DelayedReturnResolver: + """Resolve delayed returns.""" + + def __init__( + self, + delayed_returns: List[_DelayedReturn], + hmac_key: str, + properties_resolver: _PropertiesResolver, + parameter_resolver: _ParameterResolver, + execution_variable_resolver: _ExecutionVariableResolver, + s3_base_uri: str, + **settings, + ): + """Resolve delayed return. + + Args: + delayed_returns: list of delayed returns to resolve. + hmac_key: key used to encrypt serialized and deserialized function and arguments. + properties_resolver: resolver used to resolve step properties. + parameter_resolver: resolver used to pipeline parameters. + execution_variable_resolver: resolver used to resolve execution variables. + s3_base_uri (str): the s3 base uri of the function step that + the serialized artifacts will be uploaded to. + The s3_base_uri = s3_root_uri + pipeline_name. + **settings: settings to pass to the deserialization function. + """ + self._s3_base_uri = s3_base_uri + self._parameter_resolver = parameter_resolver + self._execution_variable_resolver = execution_variable_resolver + self._properties_resolver = properties_resolver + # different delayed returns can have the same uri, so we need to dedupe + uris = { + self._resolve_delayed_return_uri(delayed_return) for delayed_return in delayed_returns + } + + def deserialization_task(uri): + return uri, deserialize_obj_from_s3( + sagemaker_session=settings["sagemaker_session"], + s3_uri=uri, + hmac_key=hmac_key, + ) + + with ThreadPoolExecutor() as executor: + self._deserialized_objects = dict(executor.map(deserialization_task, uris)) + + def resolve(self, delayed_return: _DelayedReturn) -> Any: + """Resolve a single delayed return. + + Args: + delayed_return: delayed return to resolve. + Returns: + resolved delayed return. + """ + deserialized_obj = self._deserialized_objects[ + self._resolve_delayed_return_uri(delayed_return) + ] + return _retrieve_child_item(delayed_return, deserialized_obj) + + def _resolve_delayed_return_uri(self, delayed_return: _DelayedReturn): + """Resolve the s3 uri of the delayed return.""" + if isinstance(delayed_return.uri, _Properties): + return self._properties_resolver.resolve(delayed_return.uri) + + # Keep the following old resolution logics to keep backward compatible + uri = [] + for component in delayed_return.uri: + if isinstance(component, _Parameter): + uri.append(self._parameter_resolver.resolve(component)) + elif isinstance(component, _ExecutionVariable): + uri.append(self._execution_variable_resolver.resolve(component)) + elif isinstance(component, _S3BaseUriIdentifier): + uri.append(self._s3_base_uri) + else: + uri.append(component) + return s3_path_join(*uri) + + +def _retrieve_child_item(delayed_return: _DelayedReturn, deserialized_obj: Any): + """Retrieve child item from deserialized object.""" + result = deserialized_obj + for component in delayed_return.reference_path: + result = result[component[1]] + return result + + +def resolve_pipeline_variables( + context: Context, + func_args: Tuple, + func_kwargs: Dict, + hmac_key: str, + s3_base_uri: str, + **settings, +): + """Resolve pipeline variables. + + Args: + context: context for the execution. + func_args: function args. + func_kwargs: function kwargs. + hmac_key: key used to encrypt serialized and deserialized function and arguments. + s3_base_uri: the s3 base uri of the function step that the serialized artifacts + will be uploaded to. The s3_base_uri = s3_root_uri + pipeline_name. + **settings: settings to pass to the deserialization function. + """ + + delayed_returns = [] + + if func_args is not None: + for arg in func_args: + if isinstance(arg, _DelayedReturn): + delayed_returns.append(arg) + if func_kwargs is not None: + for arg in func_kwargs.values(): + if isinstance(arg, _DelayedReturn): + delayed_returns.append(arg) + + # build the resolvers + parameter_resolver = _ParameterResolver(context) + execution_variable_resolver = _ExecutionVariableResolver(context) + properties_resolver = _PropertiesResolver(context) + delayed_return_resolver = _DelayedReturnResolver( + delayed_returns=delayed_returns, + hmac_key=hmac_key, + properties_resolver=properties_resolver, + parameter_resolver=parameter_resolver, + execution_variable_resolver=execution_variable_resolver, + s3_base_uri=s3_base_uri, + **settings, + ) + + # resolve the pipeline variables + resolved_func_args = None + if func_args is not None: + resolved_func_args = [] + for arg in func_args: + if isinstance(arg, _Parameter): + resolved_func_args.append(parameter_resolver.resolve(arg)) + elif isinstance(arg, _ExecutionVariable): + resolved_func_args.append(execution_variable_resolver.resolve(arg)) + elif isinstance(arg, _Properties): + resolved_func_args.append(properties_resolver.resolve(arg)) + elif isinstance(arg, _DelayedReturn): + resolved_func_args.append(delayed_return_resolver.resolve(arg)) + else: + resolved_func_args.append(arg) + resolved_func_args = tuple(resolved_func_args) + + resolved_func_kwargs = None + if func_kwargs is not None: + resolved_func_kwargs = {} + for key, value in func_kwargs.items(): + if isinstance(value, _Parameter): + resolved_func_kwargs[key] = parameter_resolver.resolve(value) + elif isinstance(value, _ExecutionVariable): + resolved_func_kwargs[key] = execution_variable_resolver.resolve(value) + elif isinstance(value, _Properties): + resolved_func_kwargs[key] = properties_resolver.resolve(value) + elif isinstance(value, _DelayedReturn): + resolved_func_kwargs[key] = delayed_return_resolver.resolve(value) + else: + resolved_func_kwargs[key] = value + + return resolved_func_args, resolved_func_kwargs + + +def convert_pipeline_variables_to_pickleable(func_args: Tuple, func_kwargs: Dict): + """Convert pipeline variables to pickleable. + + Args: + func_args: function args. + func_kwargs: function kwargs. + """ + + from sagemaker.core.helper.pipeline_variable import PipelineVariable + + from sagemaker.mlops.workflow.function_step import DelayedReturn + + def convert(arg): + if isinstance(arg, DelayedReturn): + return _DelayedReturn( + uri=get_step(arg)._properties.OutputDataConfig.S3OutputPath._pickleable, + reference_path=arg._reference_path, + ) + + if isinstance(arg, PipelineVariable): + return arg._pickleable + + return arg -# Backward compatibility: re-export from core -from sagemaker.core.remote_function.core.pipeline_variables import * # noqa: F401, F403 + converted_func_args = tuple(convert(arg) for arg in func_args) + converted_func_kwargs = {key: convert(arg) for key, arg in func_kwargs.items()} -warnings.warn( - "sagemaker.train.remote_function.core.pipeline_variables has been moved to sagemaker.core.remote_function.core.pipeline_variables. " - "Please update your imports. This shim will be removed in a future version.", - DeprecationWarning, - stacklevel=2 -) + return converted_func_args, converted_func_kwargs diff --git a/sagemaker-train/src/sagemaker/train/remote_function/core/serialization.py b/sagemaker-train/src/sagemaker/train/remote_function/core/serialization.py index d30d1494d5..bea9d1c6db 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/core/serialization.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/core/serialization.py @@ -10,21 +10,413 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -""" -DEPRECATED: This module has been moved to sagemaker.core.remote_function.core.serialization - -This is a backward compatibility shim. -""" +"""SageMaker remote function data serializer/deserializer.""" from __future__ import absolute_import -import warnings +import dataclasses +import json + +import io + +import sys +import hmac +import hashlib +import pickle -# Backward compatibility: re-export from core -from sagemaker.core.remote_function.core.serialization import * # noqa: F401, F403 +from typing import Any, Callable, Union -warnings.warn( - "sagemaker.train.remote_function.core.serialization has been moved to sagemaker.core.remote_function.core.serialization. " - "Please update your imports. This shim will be removed in a future version.", - DeprecationWarning, - stacklevel=2 +import cloudpickle +from tblib import pickling_support + +from sagemaker.train.remote_function.errors import ( + ServiceError, + SerializationError, + DeserializationError, ) +from sagemaker.core.s3 import S3Downloader, S3Uploader +from sagemaker.core.helper.session_helper import Session +from ._custom_dispatch_table import dispatch_table + +# Note: do not use os.path.join for s3 uris, fails on windows + + +def _get_python_version(): + """Returns the current python version.""" + return f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + + +@dataclasses.dataclass +class _MetaData: + """Metadata about the serialized data or functions.""" + + sha256_hash: str + version: str = "2023-04-24" + python_version: str = _get_python_version() + serialization_module: str = "cloudpickle" + + def to_json(self): + """Converts metadata to json string.""" + return json.dumps(dataclasses.asdict(self)).encode() + + @staticmethod + def from_json(s): + """Converts json string to metadata object.""" + try: + obj = json.loads(s) + except json.decoder.JSONDecodeError: + raise DeserializationError("Corrupt metadata file. It is not a valid json file.") + + sha256_hash = obj.get("sha256_hash") + metadata = _MetaData(sha256_hash=sha256_hash) + metadata.version = obj.get("version") + metadata.python_version = obj.get("python_version") + metadata.serialization_module = obj.get("serialization_module") + + if not sha256_hash: + raise DeserializationError( + "Corrupt metadata file. SHA256 hash for the serialized data does not exist. " + "Please make sure to install SageMaker SDK version >= 2.156.0 on the client side " + "and try again." + ) + + if not ( + metadata.version == "2023-04-24" and metadata.serialization_module == "cloudpickle" + ): + raise DeserializationError( + f"Corrupt metadata file. Serialization approach {s} is not supported." + ) + + return metadata + + +class CloudpickleSerializer: + """Serializer using cloudpickle.""" + + @staticmethod + def serialize(obj: Any) -> bytes: + """Serializes data object and uploads it to S3. + + Args: + obj: object to be serialized and persisted + Raises: + SerializationError: when fail to serialize object to bytes. + """ + try: + io_buffer = io.BytesIO() + custom_pickler = cloudpickle.CloudPickler(io_buffer) + dt = pickle.Pickler.dispatch_table.__get__(custom_pickler) # pylint: disable=no-member + new_dt = dt.new_child(dispatch_table) + pickle.Pickler.dispatch_table.__set__( # pylint: disable=no-member + custom_pickler, new_dt + ) + custom_pickler.dump(obj) + return io_buffer.getvalue() + except Exception as e: + if isinstance( + e, NotImplementedError + ) and "Instance of Run type is not allowed to be pickled." in str(e): + raise SerializationError( + """You are trying to pass a sagemaker.experiments.run.Run object to + a remote function + or are trying to access a global sagemaker.experiments.run.Run object + from within the function. This is not supported. + You must use `load_run` to load an existing Run in the remote function + or instantiate a new Run in the function.""" + ) + + raise SerializationError( + "Error when serializing object of type [{}]: {}".format(type(obj).__name__, repr(e)) + ) from e + + @staticmethod + def deserialize(s3_uri: str, bytes_to_deserialize: bytes) -> Any: + """Downloads from S3 and then deserializes data objects. + + Args: + s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. + bytes_to_deserialize: bytes to be deserialized. + Returns : + List of deserialized python objects. + Raises: + DeserializationError: when fail to serialize object to bytes. + """ + + try: + return cloudpickle.loads(bytes_to_deserialize) + except Exception as e: + raise DeserializationError( + "Error when deserializing bytes downloaded from {}: {}. " + "NOTE: this may be caused by inconsistent sagemaker python sdk versions " + "where remote function runs versus the one used on client side. " + "If the sagemaker versions do not match, a warning message would " + "be logged starting with 'Inconsistent sagemaker versions found'. " + "Please check it to validate.".format(s3_uri, repr(e)) + ) from e + + +# TODO: use dask serializer in case dask distributed is installed in users' environment. +def serialize_func_to_s3( + func: Callable, sagemaker_session: Session, s3_uri: str, hmac_key: str, s3_kms_key: str = None +): + """Serializes function and uploads it to S3. + + Args: + sagemaker_session (sagemaker.core.helper.session.Session): + The underlying Boto3 session which AWS service calls are delegated to. + s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. + hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized func. + s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. + func: function to be serialized and persisted + Raises: + SerializationError: when fail to serialize function to bytes. + """ + + _upload_payload_and_metadata_to_s3( + bytes_to_upload=CloudpickleSerializer.serialize(func), + hmac_key=hmac_key, + s3_uri=s3_uri, + sagemaker_session=sagemaker_session, + s3_kms_key=s3_kms_key, + ) + + +def deserialize_func_from_s3(sagemaker_session: Session, s3_uri: str, hmac_key: str) -> Callable: + """Downloads from S3 and then deserializes data objects. + + This method downloads the serialized training job outputs to a temporary directory and + then deserializes them using dask. + + Args: + sagemaker_session (sagemaker.core.helper.session.Session): + The underlying sagemaker session which AWS service calls are delegated to. + s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. + hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized func. + Returns : + The deserialized function. + Raises: + DeserializationError: when fail to serialize function to bytes. + """ + metadata = _MetaData.from_json( + _read_bytes_from_s3(f"{s3_uri}/metadata.json", sagemaker_session) + ) + + bytes_to_deserialize = _read_bytes_from_s3(f"{s3_uri}/payload.pkl", sagemaker_session) + + _perform_integrity_check( + expected_hash_value=metadata.sha256_hash, secret_key=hmac_key, buffer=bytes_to_deserialize + ) + + return CloudpickleSerializer.deserialize(f"{s3_uri}/payload.pkl", bytes_to_deserialize) + + +def serialize_obj_to_s3( + obj: Any, sagemaker_session: Session, s3_uri: str, hmac_key: str, s3_kms_key: str = None +): + """Serializes data object and uploads it to S3. + + Args: + sagemaker_session (sagemaker.core.helper.session.Session): + The underlying Boto3 session which AWS service calls are delegated to. + s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. + s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. + hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized obj. + obj: object to be serialized and persisted + Raises: + SerializationError: when fail to serialize object to bytes. + """ + + _upload_payload_and_metadata_to_s3( + bytes_to_upload=CloudpickleSerializer.serialize(obj), + hmac_key=hmac_key, + s3_uri=s3_uri, + sagemaker_session=sagemaker_session, + s3_kms_key=s3_kms_key, + ) + + +def json_serialize_obj_to_s3( + obj: Any, + json_key: str, + sagemaker_session: Session, + s3_uri: str, + s3_kms_key: str = None, +): + """Json serializes data object and uploads it to S3. + + If a function step's output is data referenced by other steps via JsonGet, + its output should be json serialized and uploaded to S3. + + Args: + obj: (Any) object to be serialized and persisted. + json_key: (str) the json key pointing to function step output. + sagemaker_session (sagemaker.core.helper.session.Session): + The underlying Boto3 session which AWS service calls are delegated to. + s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. + s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. + """ + json_serialized_result = {} + try: + to_dump = {json_key: obj, "Exception": None} + json_serialized_result = json.dumps(to_dump) + except TypeError as e: + if "is not JSON serializable" in str(e): + to_dump = { + json_key: None, + "Exception": f"The function return ({obj}) is not JSON serializable.", + } + json_serialized_result = json.dumps(to_dump) + + S3Uploader.upload_string_as_file_body( + body=json_serialized_result, + desired_s3_uri=s3_uri, + sagemaker_session=sagemaker_session, + kms_key=s3_kms_key, + ) + + +def deserialize_obj_from_s3(sagemaker_session: Session, s3_uri: str, hmac_key: str) -> Any: + """Downloads from S3 and then deserializes data objects. + + Args: + sagemaker_session (sagemaker.core.helper.session.Session): + The underlying sagemaker session which AWS service calls are delegated to. + s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. + hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized obj. + Returns : + Deserialized python objects. + Raises: + DeserializationError: when fail to serialize object to bytes. + """ + + metadata = _MetaData.from_json( + _read_bytes_from_s3(f"{s3_uri}/metadata.json", sagemaker_session) + ) + + bytes_to_deserialize = _read_bytes_from_s3(f"{s3_uri}/payload.pkl", sagemaker_session) + + _perform_integrity_check( + expected_hash_value=metadata.sha256_hash, secret_key=hmac_key, buffer=bytes_to_deserialize + ) + + return CloudpickleSerializer.deserialize(f"{s3_uri}/payload.pkl", bytes_to_deserialize) + + +def serialize_exception_to_s3( + exc: Exception, sagemaker_session: Session, s3_uri: str, hmac_key: str, s3_kms_key: str = None +): + """Serializes exception with traceback and uploads it to S3. + + Args: + sagemaker_session (sagemaker.core.helper.session.Session): + The underlying Boto3 session which AWS service calls are delegated to. + s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. + s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. + hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized exception. + exc: Exception to be serialized and persisted + Raises: + SerializationError: when fail to serialize object to bytes. + """ + pickling_support.install() + + _upload_payload_and_metadata_to_s3( + bytes_to_upload=CloudpickleSerializer.serialize(exc), + hmac_key=hmac_key, + s3_uri=s3_uri, + sagemaker_session=sagemaker_session, + s3_kms_key=s3_kms_key, + ) + + +def _upload_payload_and_metadata_to_s3( + bytes_to_upload: Union[bytes, io.BytesIO], + hmac_key: str, + s3_uri: str, + sagemaker_session: Session, + s3_kms_key, +): + """Uploads serialized payload and metadata to s3. + + Args: + bytes_to_upload (bytes): Serialized bytes to upload. + hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized obj. + s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. + sagemaker_session (sagemaker.core.helper.session.Session): + The underlying Boto3 session which AWS service calls are delegated to. + s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. + """ + _upload_bytes_to_s3(bytes_to_upload, f"{s3_uri}/payload.pkl", s3_kms_key, sagemaker_session) + + sha256_hash = _compute_hash(bytes_to_upload, secret_key=hmac_key) + + _upload_bytes_to_s3( + _MetaData(sha256_hash).to_json(), + f"{s3_uri}/metadata.json", + s3_kms_key, + sagemaker_session, + ) + + +def deserialize_exception_from_s3(sagemaker_session: Session, s3_uri: str, hmac_key: str) -> Any: + """Downloads from S3 and then deserializes exception. + + Args: + sagemaker_session (sagemaker.core.helper.session.Session): + The underlying sagemaker session which AWS service calls are delegated to. + s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded. + hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized exception. + Returns : + Deserialized exception with traceback. + Raises: + DeserializationError: when fail to serialize object to bytes. + """ + + metadata = _MetaData.from_json( + _read_bytes_from_s3(f"{s3_uri}/metadata.json", sagemaker_session) + ) + + bytes_to_deserialize = _read_bytes_from_s3(f"{s3_uri}/payload.pkl", sagemaker_session) + + _perform_integrity_check( + expected_hash_value=metadata.sha256_hash, secret_key=hmac_key, buffer=bytes_to_deserialize + ) + + return CloudpickleSerializer.deserialize(f"{s3_uri}/payload.pkl", bytes_to_deserialize) + + +def _upload_bytes_to_s3(b: Union[bytes, io.BytesIO], s3_uri, s3_kms_key, sagemaker_session): + """Wrapping s3 uploading with exception translation for remote function.""" + try: + S3Uploader.upload_bytes(b, s3_uri, kms_key=s3_kms_key, sagemaker_session=sagemaker_session) + except Exception as e: + raise ServiceError( + "Failed to upload serialized bytes to {}: {}".format(s3_uri, repr(e)) + ) from e + + +def _read_bytes_from_s3(s3_uri, sagemaker_session): + """Wrapping s3 downloading with exception translation for remote function.""" + try: + return S3Downloader.read_bytes(s3_uri, sagemaker_session=sagemaker_session) + except Exception as e: + raise ServiceError( + "Failed to read serialized bytes from {}: {}".format(s3_uri, repr(e)) + ) from e + + +def _compute_hash(buffer: bytes, secret_key: str) -> str: + """Compute the hmac-sha256 hash""" + return hmac.new(secret_key.encode(), msg=buffer, digestmod=hashlib.sha256).hexdigest() + + +def _perform_integrity_check(expected_hash_value: str, secret_key: str, buffer: bytes): + """Performs integrity checks for serialized code/arguments uploaded to s3. + + Verifies whether the hash read from s3 matches the hash calculated + during remote function execution. + """ + actual_hash_value = _compute_hash(buffer=buffer, secret_key=secret_key) + if not hmac.compare_digest(expected_hash_value, actual_hash_value): + raise DeserializationError( + "Integrity check for the serialized function or data failed. " + "Please restrict access to your S3 bucket" + ) diff --git a/sagemaker-train/src/sagemaker/train/remote_function/core/stored_function.py b/sagemaker-train/src/sagemaker/train/remote_function/core/stored_function.py index 34915a4d42..ad146d881e 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/core/stored_function.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/core/stored_function.py @@ -10,21 +10,217 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -""" -DEPRECATED: This module has been moved to sagemaker.core.remote_function.core.stored_function - -This is a backward compatibility shim. -""" +"""SageMaker job function serializer/deserializer.""" from __future__ import absolute_import -import warnings +import os +from dataclasses import dataclass +from typing import Any -# Backward compatibility: re-export from core -from sagemaker.core.remote_function.core.stored_function import * # noqa: F401, F403 -warnings.warn( - "sagemaker.train.remote_function.core.stored_function has been moved to sagemaker.core.remote_function.core.stored_function. " - "Please update your imports. This shim will be removed in a future version.", - DeprecationWarning, - stacklevel=2 +from sagemaker.core.s3 import s3_path_join +from sagemaker.train.remote_function import logging_config +from sagemaker.train.remote_function.core.pipeline_variables import ( + Context, + resolve_pipeline_variables, ) + +import sagemaker.train.remote_function.core.serialization as serialization +from sagemaker.core.helper.session_helper import Session + + +logger = logging_config.get_logger() + + +FUNCTION_FOLDER = "function" +ARGUMENTS_FOLDER = "arguments" +RESULTS_FOLDER = "results" +EXCEPTION_FOLDER = "exception" +JSON_SERIALIZED_RESULT_KEY = "Result" +JSON_RESULTS_FILE = "results.json" + + +@dataclass +class _SerializedData: + """Data class to store serialized function and arguments""" + + func: bytes + args: bytes + + +class StoredFunction: + """Class representing a remote function stored in S3.""" + + def __init__( + self, + sagemaker_session: Session, + s3_base_uri: str, + hmac_key: str, + s3_kms_key: str = None, + context: Context = Context(), + ): + """Construct a StoredFunction object. + + Args: + sagemaker_session: (sagemaker.session.Session): The underlying sagemaker session which + AWS service calls are delegated to. + s3_base_uri: the base uri to which serialized artifacts will be uploaded. + s3_kms_key: KMS key used to encrypt artifacts uploaded to S3. + hmac_key: Key used to encrypt serialized and deserialized function and arguments. + context: Build or run context of a pipeline step. + """ + self.sagemaker_session = sagemaker_session + self.s3_base_uri = s3_base_uri + self.s3_kms_key = s3_kms_key + self.hmac_key = hmac_key + self.context = context + + # For pipeline steps, function code is at: base/step_name/build_timestamp/ + # For results, path is: base/step_name/build_timestamp/execution_id/ + # This ensures uniqueness: build_timestamp per build, execution_id per run + if context.step_name and context.func_step_s3_dir: + # Pipeline step: include build timestamp in both paths + self.func_upload_path = s3_path_join( + s3_base_uri, context.step_name, context.func_step_s3_dir + ) + self.results_upload_path = s3_path_join( + s3_base_uri, context.step_name, context.func_step_s3_dir, context.execution_id + ) + else: + # Regular remote function: original behavior + self.func_upload_path = s3_path_join( + s3_base_uri, context.step_name, context.func_step_s3_dir + ) + self.results_upload_path = s3_path_join( + s3_base_uri, context.execution_id, context.step_name + ) + + def save(self, func, *args, **kwargs): + """Serialize and persist the function and arguments. + + Args: + func: the python function. + args: the positional arguments to func. + kwargs: the keyword arguments to func. + Returns: + None + """ + + logger.info( + "Serializing function code to %s", s3_path_join(self.func_upload_path, FUNCTION_FOLDER) + ) + serialization.serialize_func_to_s3( + func=func, + sagemaker_session=self.sagemaker_session, + s3_uri=s3_path_join(self.func_upload_path, FUNCTION_FOLDER), + s3_kms_key=self.s3_kms_key, + hmac_key=self.hmac_key, + ) + + logger.info( + "Serializing function arguments to %s", + s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), + ) + + serialization.serialize_obj_to_s3( + obj=(args, kwargs), + sagemaker_session=self.sagemaker_session, + s3_uri=s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), + hmac_key=self.hmac_key, + s3_kms_key=self.s3_kms_key, + ) + + def save_pipeline_step_function(self, serialized_data): + """Upload serialized function and arguments to s3. + + Args: + serialized_data (_SerializedData): The serialized function + and function arguments of a function step. + """ + + logger.info( + "Uploading serialized function code to %s", + s3_path_join(self.func_upload_path, FUNCTION_FOLDER), + ) + serialization._upload_payload_and_metadata_to_s3( + bytes_to_upload=serialized_data.func, + hmac_key=self.hmac_key, + s3_uri=s3_path_join(self.func_upload_path, FUNCTION_FOLDER), + sagemaker_session=self.sagemaker_session, + s3_kms_key=self.s3_kms_key, + ) + + logger.info( + "Uploading serialized function arguments to %s", + s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), + ) + serialization._upload_payload_and_metadata_to_s3( + bytes_to_upload=serialized_data.args, + hmac_key=self.hmac_key, + s3_uri=s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), + sagemaker_session=self.sagemaker_session, + s3_kms_key=self.s3_kms_key, + ) + + def load_and_invoke(self) -> Any: + """Load and deserialize the function and the arguments and then execute it.""" + + logger.info( + "Deserializing function code from %s", + s3_path_join(self.func_upload_path, FUNCTION_FOLDER), + ) + func = serialization.deserialize_func_from_s3( + sagemaker_session=self.sagemaker_session, + s3_uri=s3_path_join(self.func_upload_path, FUNCTION_FOLDER), + hmac_key=self.hmac_key, + ) + + logger.info( + "Deserializing function arguments from %s", + s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), + ) + args, kwargs = serialization.deserialize_obj_from_s3( + sagemaker_session=self.sagemaker_session, + s3_uri=s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER), + hmac_key=self.hmac_key, + ) + + logger.info("Resolving pipeline variables") + resolved_args, resolved_kwargs = resolve_pipeline_variables( + self.context, + args, + kwargs, + hmac_key=self.hmac_key, + s3_base_uri=self.s3_base_uri, + sagemaker_session=self.sagemaker_session, + ) + + logger.info("Invoking the function") + result = func(*resolved_args, **resolved_kwargs) + + logger.info( + "Serializing the function return and uploading to %s", + s3_path_join(self.results_upload_path, RESULTS_FOLDER), + ) + serialization.serialize_obj_to_s3( + obj=result, + sagemaker_session=self.sagemaker_session, + s3_uri=s3_path_join(self.results_upload_path, RESULTS_FOLDER), + hmac_key=self.hmac_key, + s3_kms_key=self.s3_kms_key, + ) + + if self.context and self.context.serialize_output_to_json: + logger.info( + "JSON Serializing the function return and uploading to %s", + s3_path_join(self.results_upload_path, RESULTS_FOLDER), + ) + serialization.json_serialize_obj_to_s3( + obj=result, + json_key=JSON_SERIALIZED_RESULT_KEY, + sagemaker_session=self.sagemaker_session, + s3_uri=s3_path_join( + os.path.join(self.results_upload_path, RESULTS_FOLDER, JSON_RESULTS_FILE) + ), + s3_kms_key=self.s3_kms_key, + ) diff --git a/sagemaker-train/src/sagemaker/train/remote_function/custom_file_filter.py b/sagemaker-train/src/sagemaker/train/remote_function/custom_file_filter.py index 9c1b1e1baa..c82cc7eee7 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/custom_file_filter.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/custom_file_filter.py @@ -125,4 +125,4 @@ def _filter_non_python_files(path: str, names: List) -> List: _src, dst, ignore=_ignore, - ) \ No newline at end of file + ) diff --git a/sagemaker-train/src/sagemaker/train/remote_function/errors.py b/sagemaker-train/src/sagemaker/train/remote_function/errors.py index e67fcf7d9f..edff22f916 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/errors.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/errors.py @@ -10,21 +10,95 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -""" -DEPRECATED: This module has been moved to sagemaker.core.remote_function.errors - -This is a backward compatibility shim. -""" +"""Definitions for reomote job errors and error handling""" from __future__ import absolute_import -import warnings +import os + +from tblib import pickling_support +from sagemaker.core.s3 import s3_path_join +import sagemaker.train.remote_function.core.serialization as serialization + + +DEFAULT_FAILURE_CODE = 1 +FAILURE_REASON_PATH = "/opt/ml/output/failure" + + +@pickling_support.install +class RemoteFunctionError(Exception): + """The base exception class for remote function exceptions""" + + def __init__(self, message): + self.message = message + super().__init__(self.message) + + +@pickling_support.install +class ServiceError(RemoteFunctionError): + """Raised when errors encountered during interaction with SageMaker, S3 service APIs""" + + +@pickling_support.install +class SerializationError(RemoteFunctionError): + """Raised when errors encountered during serialization of remote function objects""" + + +@pickling_support.install +class DeserializationError(RemoteFunctionError): + """Raised when errors encountered during deserialization of remote function objects""" + + +def _get_valid_failure_exit_code(exit_code) -> int: + """Normalize exit code for terminating the process""" + try: + valid_exit_code = int(exit_code) + except (TypeError, ValueError): + valid_exit_code = DEFAULT_FAILURE_CODE + + return valid_exit_code + + +def _write_failure_reason_file(failure_msg): + """Create a file 'failure' with failure reason written if remote function execution failed. + + See: https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-training-algo.html + Args: + failure_msg: The content of file to be written. + """ + if not os.path.exists(FAILURE_REASON_PATH): + with open(FAILURE_REASON_PATH, "w") as f: + f.write(failure_msg) + + +def handle_error(error, sagemaker_session, s3_base_uri, s3_kms_key, hmac_key) -> int: + """Handle all exceptions raised during remote function execution. + + Args: + error (Exception): The error to be handled. + sagemaker_session (sagemaker.core.helper.session.Session): The underlying Boto3 session which + AWS service calls are delegated to. + s3_base_uri (str): S3 root uri to which resulting serialized exception will be uploaded. + s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3. + hmac_key (str): Key used to calculate hmac hash of the serialized exception. + Returns : + exit_code (int): Exit code to terminate current job. + """ + + failure_reason = repr(error) + if isinstance(error, RemoteFunctionError): + exit_code = DEFAULT_FAILURE_CODE + else: + error_number = getattr(error, "errno", DEFAULT_FAILURE_CODE) + exit_code = _get_valid_failure_exit_code(error_number) + + _write_failure_reason_file(failure_reason) -# Backward compatibility: re-export from core -from sagemaker.core.remote_function.errors import * # noqa: F401, F403 + serialization.serialize_exception_to_s3( + exc=error, + sagemaker_session=sagemaker_session, + s3_uri=s3_path_join(s3_base_uri, "exception"), + hmac_key=hmac_key, + s3_kms_key=s3_kms_key, + ) -warnings.warn( - "sagemaker.train.remote_function.errors has been moved to sagemaker.core.remote_function.errors. " - "Please update your imports. This shim will be removed in a future version.", - DeprecationWarning, - stacklevel=2 -) + return exit_code diff --git a/sagemaker-train/src/sagemaker/train/remote_function/invoke_function.py b/sagemaker-train/src/sagemaker/train/remote_function/invoke_function.py index 3bafeffd5b..f07a50f706 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/invoke_function.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/invoke_function.py @@ -169,4 +169,4 @@ def main(sys_args=None): if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file + main(sys.argv[1:]) diff --git a/sagemaker-train/src/sagemaker/train/remote_function/job.py b/sagemaker-train/src/sagemaker/train/remote_function/job.py index 33bf62af86..8adc350a78 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/job.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/job.py @@ -10,21 +10,2131 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +"""Helper classes that interact with SageMaker Training service.""" +from __future__ import absolute_import + +import dataclasses +import json +import os +import re +import secrets +import shutil +import sys +import time +from io import BytesIO +from typing import Callable, Dict, List, Optional, Tuple, Union, TYPE_CHECKING +from urllib.parse import urlparse + +import botocore +from botocore.exceptions import ClientError + +from sagemaker.core.config.config_schema import ( + REMOTE_FUNCTION_ENVIRONMENT_VARIABLES, + REMOTE_FUNCTION_IMAGE_URI, + REMOTE_FUNCTION_DEPENDENCIES, + REMOTE_FUNCTION_PRE_EXECUTION_COMMANDS, + REMOTE_FUNCTION_PRE_EXECUTION_SCRIPT, + REMOTE_FUNCTION_INCLUDE_LOCAL_WORKDIR, + REMOTE_FUNCTION_INSTANCE_TYPE, + REMOTE_FUNCTION_JOB_CONDA_ENV, + REMOTE_FUNCTION_ROLE_ARN, + REMOTE_FUNCTION_S3_ROOT_URI, + REMOTE_FUNCTION_S3_KMS_KEY_ID, + REMOTE_FUNCTION_VOLUME_KMS_KEY_ID, + REMOTE_FUNCTION_TAGS, + REMOTE_FUNCTION_VPC_CONFIG_SUBNETS, + REMOTE_FUNCTION_VPC_CONFIG_SECURITY_GROUP_IDS, + REMOTE_FUNCTION_ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION, +) +from sagemaker.core.experiments._run_context import _RunContext +from sagemaker.core.experiments.run import Run +from sagemaker.core.image_uris import get_base_python_image_uri +from sagemaker.core import image_uris +from sagemaker.train.remote_function.checkpoint_location import CheckpointLocation +from sagemaker.core.helper.session_helper import get_execution_role, expand_role, Session +from sagemaker.core.common_utils import ( + name_from_base, + _tmpdir, + resolve_value_from_config, + format_tags, + Tags, +) +from sagemaker.core.s3 import s3_path_join, S3Uploader + +from sagemaker.train.remote_function.core.stored_function import StoredFunction, _SerializedData +from sagemaker.train.remote_function.core.pipeline_variables import Context + +from sagemaker.train.remote_function.runtime_environment.runtime_environment_manager import ( + RuntimeEnvironmentManager, + _DependencySettings, +) +from sagemaker.train.remote_function import logging_config +from sagemaker.train.remote_function.spark_config import SparkConfig +from sagemaker.train.remote_function.custom_file_filter import ( + CustomFileFilter, + copy_workdir, + resolve_custom_file_filter_from_config_file, +) + +# Lazy import to avoid circular dependency - DelayedReturn is in MLOps which depends on Core +# from sagemaker.mlops.workflow.function_step import DelayedReturn +from sagemaker.core.workflow.step_outputs import get_step +from sagemaker.core import exceptions +from sagemaker.core import network as vpc_utils + +from sagemaker.core import logs as sagemaker_logs + +from sagemaker.core.common_utils import ( + _wait_until, + secondary_training_status_changed, + secondary_training_status_message, +) +from sagemaker.core.config.config_utils import _append_sagemaker_config_tags + +if TYPE_CHECKING: + from sagemaker.core.helper.pipeline_variable import PipelineVariable + +# runtime script names +BOOTSTRAP_SCRIPT_NAME = "bootstrap_runtime_environment.py" +MPI_UTILS_SCRIPT_NAME = "mpi_utils_remote.py" +ENTRYPOINT_SCRIPT_NAME = "job_driver.sh" +PRE_EXECUTION_SCRIPT_NAME = "pre_exec.sh" +RUNTIME_MANAGER_SCRIPT_NAME = "runtime_environment_manager.py" +SPARK_APP_SCRIPT_NAME = "spark_app.py" + +# training channel names +RUNTIME_SCRIPTS_CHANNEL_NAME = "sagemaker_remote_function_bootstrap" +REMOTE_FUNCTION_WORKSPACE = "sm_rf_user_ws" +JOB_REMOTE_FUNCTION_WORKSPACE = "sagemaker_remote_function_workspace" +SCRIPT_AND_DEPENDENCIES_CHANNEL_NAME = "pre_exec_script_and_dependencies" + +# Spark config channel and file name +SPARK_CONF_CHANNEL_NAME = "conf" +SPARK_CONF_FILE_NAME = "configuration.json" + +# Spark submitted files workspace names on S3 +SPARK_SUBMIT_JARS_WORKSPACE = "sm_rf_spark_jars" +SPARK_SUBMIT_PY_FILES_WORKSPACE = "sm_rf_spark_py_files" +SPARK_SUBMIT_FILES_WORKSPACE = "sm_rf_spark_data_files" +SPARK_CONF_WORKSPACE = "sm_rf_spark_conf" + +# default spark version +DEFAULT_SPARK_VERSION = "3.3" +DEFAULT_SPARK_CONTAINER_VERSION = "v1" + +SPARK_NAME = "spark" + +# run context dictionary keys +KEY_EXPERIMENT_NAME = "experiment_name" +KEY_RUN_NAME = "run_name" + +JOBS_CONTAINER_ENTRYPOINT = [ + "/bin/bash", + f"/opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{ENTRYPOINT_SCRIPT_NAME}", +] + +SPARK_APP_SCRIPT_PATH = f"/opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{SPARK_APP_SCRIPT_NAME}" + +ENTRYPOINT_SCRIPT = f""" +#!/bin/bash + +# Entry point for bootstrapping runtime environment and invoking remote function + +set -eu + +PERSISTENT_CACHE_DIR=${{SAGEMAKER_MANAGED_WARMPOOL_CACHE_DIRECTORY:-/opt/ml/cache}} +export CONDA_PKGS_DIRS=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/conda/pkgs +printf "INFO: CONDA_PKGS_DIRS is set to '$CONDA_PKGS_DIRS'\\n" +export PIP_CACHE_DIR=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/pip +printf "INFO: PIP_CACHE_DIR is set to '$PIP_CACHE_DIR'\\n" + +printf "INFO: /opt/ml/input/config/resourceconfig.json:\\n" +cat /opt/ml/input/config/resourceconfig.json + +printf "INFO: Bootstraping runtime environment.\\n" +python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{BOOTSTRAP_SCRIPT_NAME} "$@" +source /opt/ml/input/sm_training.env + +if [ -d {JOB_REMOTE_FUNCTION_WORKSPACE} ] +then + if [ -f "remote_function_conda_env.txt" ] + then + cp remote_function_conda_env.txt {JOB_REMOTE_FUNCTION_WORKSPACE}/remote_function_conda_env.txt + fi + printf "INFO: Changing workspace to {JOB_REMOTE_FUNCTION_WORKSPACE}.\\n" + cd {JOB_REMOTE_FUNCTION_WORKSPACE} +fi + +if [ -f "remote_function_conda_env.txt" ] +then + conda_env=$(cat remote_function_conda_env.txt) + + if which mamba >/dev/null; then + conda_exe="mamba" + else + conda_exe="conda" + fi + + printf "INFO: Invoking remote function inside conda environment: $conda_env.\\n" + printf "INFO: $conda_exe run -n $conda_env python -m sagemaker.train.remote_function.invoke_function \\n" + $conda_exe run -n $conda_env python -m sagemaker.train.remote_function.invoke_function "$@" +else + printf "INFO: No conda env provided. Invoking remote function\\n" + printf "INFO: python -m sagemaker.train.remote_function.invoke_function \\n" + python -m sagemaker.train.remote_function.invoke_function "$@" +fi +""" + +ENTRYPOINT_MPIRUN_SCRIPT = f""" +#!/bin/bash + +# Entry point for bootstrapping runtime environment and invoking remote function with mpirun + +set -eu + +PERSISTENT_CACHE_DIR=${{SAGEMAKER_MANAGED_WARMPOOL_CACHE_DIRECTORY:-/opt/ml/cache}} +export CONDA_PKGS_DIRS=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/conda/pkgs +printf "INFO: CONDA_PKGS_DIRS is set to '$CONDA_PKGS_DIRS'\\n" +export PIP_CACHE_DIR=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/pip +printf "INFO: PIP_CACHE_DIR is set to '$PIP_CACHE_DIR'\\n" + +printf "INFO: /opt/ml/input/config/resourceconfig.json:\\n" +cat /opt/ml/input/config/resourceconfig.json + +printf "INFO: Bootstraping runtime environment.\\n" +python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{BOOTSTRAP_SCRIPT_NAME} "$@" +source /opt/ml/input/sm_training.env + +if [ -d {JOB_REMOTE_FUNCTION_WORKSPACE} ] +then + if [ -f "remote_function_conda_env.txt" ] + then + cp remote_function_conda_env.txt {JOB_REMOTE_FUNCTION_WORKSPACE}/remote_function_conda_env.txt + fi + printf "INFO: Changing workspace to {JOB_REMOTE_FUNCTION_WORKSPACE}.\\n" + cd {JOB_REMOTE_FUNCTION_WORKSPACE} +fi + +if [ -f "remote_function_conda_env.txt" ] +then + conda_env=$(cat remote_function_conda_env.txt) + + if which mamba >/dev/null; then + conda_exe="mamba" + else + conda_exe="conda" + fi + + if [ "$SM_CURRENT_HOST" = "$SM_MASTER_ADDR" ]; then + python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} + + printf "INFO: Invoking remote function with mpirun inside conda environment: $conda_env.\\n" + printf "INFO: $conda_exe run -n $conda_env mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \ + --allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \ + -mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \ + -mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \ + -x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \ + + python -m mpi4py -m sagemaker.train.remote_function.invoke_function \\n" + $conda_exe run -n $conda_env mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \ + --allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \ + -mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \ + -mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \ + -x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \ + $SM_FI_PROVIDER $SM_NCCL_PROTO $SM_FI_EFA_USE_DEVICE_RDMA \ + python -m mpi4py -m sagemaker.train.remote_function.invoke_function "$@" + + python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} --job_ended 1 + else + printf "INFO: This is the instance $SM_CURRENT_HOST. mpirun command terminated\\n" + python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} + fi +else + if [ "$SM_CURRENT_HOST" = "$SM_MASTER_ADDR" ]; then + python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} + + printf "INFO: No conda env provided. Invoking remote function with mpirun\\n" + printf "INFO: mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \ + --allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \ + -mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \ + -mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \ + -x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \ + $SM_FI_PROVIDER $SM_NCCL_PROTO $SM_FI_EFA_USE_DEVICE_RDMA \ + python -m mpi4py -m sagemaker.train.remote_function.invoke_function \\n" + + mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \ + --allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \ + -mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \ + -mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \ + -x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \ + $SM_FI_PROVIDER $SM_NCCL_PROTO $SM_FI_EFA_USE_DEVICE_RDMA \ + python -m mpi4py -m sagemaker.train.remote_function.invoke_function "$@" + + python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} --job_ended 1 + else + printf "INFO: This is the instance $SM_CURRENT_HOST.\\n" + python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} + fi +fi +""" + +ENTRYPOINT_TORCHRUN_SCRIPT = f""" +#!/bin/bash + +# Entry point for bootstrapping runtime environment and invoking remote function with torchrun + +set -eu + +PERSISTENT_CACHE_DIR=${{SAGEMAKER_MANAGED_WARMPOOL_CACHE_DIRECTORY:-/opt/ml/cache}} +export CONDA_PKGS_DIRS=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/conda/pkgs +printf "INFO: CONDA_PKGS_DIRS is set to '$CONDA_PKGS_DIRS'\\n" +export PIP_CACHE_DIR=${{PERSISTENT_CACHE_DIR}}/sm_remotefunction_user_dependencies_cache/pip +printf "INFO: PIP_CACHE_DIR is set to '$PIP_CACHE_DIR'\\n" + +printf "INFO: /opt/ml/input/config/resourceconfig.json:\\n" +cat /opt/ml/input/config/resourceconfig.json + +printf "INFO: Bootstraping runtime environment.\\n" +python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{BOOTSTRAP_SCRIPT_NAME} "$@" +source /opt/ml/input/sm_training.env + +if [ -d {JOB_REMOTE_FUNCTION_WORKSPACE} ] +then + if [ -f "remote_function_conda_env.txt" ] + then + cp remote_function_conda_env.txt {JOB_REMOTE_FUNCTION_WORKSPACE}/remote_function_conda_env.txt + fi + printf "INFO: Changing workspace to {JOB_REMOTE_FUNCTION_WORKSPACE}.\\n" + cd {JOB_REMOTE_FUNCTION_WORKSPACE} +fi + +if [ -f "remote_function_conda_env.txt" ] +then + conda_env=$(cat remote_function_conda_env.txt) + + if which mamba >/dev/null; then + conda_exe="mamba" + else + conda_exe="conda" + fi + + printf "INFO: Invoking remote function with torchrun inside conda environment: $conda_env.\\n" + printf "INFO: $conda_exe run -n $conda_env torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE \ + --master_addr $SM_MASTER_ADDR --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK \ + -m sagemaker.train.remote_function.invoke_function \\n" + + $conda_exe run -n $conda_env torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE \ + --master_addr $SM_MASTER_ADDR --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK \ + -m sagemaker.train.remote_function.invoke_function "$@" +else + printf "INFO: No conda env provided. Invoking remote function with torchrun\\n" + printf "INFO: torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE --master_addr $SM_MASTER_ADDR \ + --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK -m sagemaker.train.remote_function.invoke_function \\n" + + torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE --master_addr $SM_MASTER_ADDR \ + --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK -m sagemaker.train.remote_function.invoke_function "$@" +fi """ -DEPRECATED: This module has been moved to sagemaker.core.remote_function.job -This is a backward compatibility shim. +SPARK_ENTRYPOINT_SCRIPT = f""" +#!/bin/bash + +# Entry point for bootstrapping runtime environment and invoking remote function for Spark + +set -eu + +printf "INFO: Bootstraping Spark runtime environment.\\n" + +python3 /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{BOOTSTRAP_SCRIPT_NAME} "$@" + +# Spark Container entry point script to initiate the spark application +smspark-submit "$@" """ -from __future__ import absolute_import -import warnings +_STATUS_CODE_TABLE = { + "COMPLETED": "Completed", + "INPROGRESS": "InProgress", + "IN_PROGRESS": "InProgress", + "FAILED": "Failed", + "STOPPED": "Stopped", + "STOPPING": "Stopping", + "STARTING": "Starting", + "PENDING": "Pending", +} -# Backward compatibility: re-export from core -from sagemaker.core.remote_function.job import * # noqa: F401, F403 +logger = logging_config.get_logger() -warnings.warn( - "sagemaker.train.remote_function.job has been moved to sagemaker.core.remote_function.job. " - "Please update your imports. This shim will be removed in a future version.", - DeprecationWarning, - stacklevel=2 -) + +class LogState(object): + """Placeholder docstring""" + + STARTING = 1 + WAIT_IN_PROGRESS = 2 + TAILING = 3 + JOB_COMPLETE = 4 + COMPLETE = 5 + + +class _JobSettings: + """Helper class that processes the job settings. + + It validates the job settings and provides default values if necessary. + """ + + def __init__( + self, + *, + dependencies: str = None, + pre_execution_commands: List[str] = None, + pre_execution_script: str = None, + environment_variables: Dict[str, Union[str, "PipelineVariable"]] = None, + image_uri: Union[str, "PipelineVariable"] = None, + include_local_workdir: bool = None, + custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None, + instance_count: Union[int, "PipelineVariable"] = 1, + instance_type: Union[str, "PipelineVariable"] = None, + job_conda_env: Union[str, "PipelineVariable"] = None, + job_name_prefix: str = None, + keep_alive_period_in_seconds: Union[int, "PipelineVariable"] = 0, + max_retry_attempts: Union[int, "PipelineVariable"] = 1, + max_runtime_in_seconds: Union[int, "PipelineVariable"] = 24 * 60 * 60, + role: str = None, + s3_kms_key: Union[str, "PipelineVariable"] = None, + s3_root_uri: str = None, + sagemaker_session: Session = None, + security_group_ids: List[Union[str, "PipelineVariable"]] = None, + subnets: List[Union[str, "PipelineVariable"]] = None, + tags: Optional[Tags] = None, + volume_kms_key: Union[str, "PipelineVariable"] = None, + volume_size: Union[int, "PipelineVariable"] = 30, + encrypt_inter_container_traffic: Union[bool, "PipelineVariable"] = None, + spark_config: SparkConfig = None, + use_spot_instances=False, + max_wait_time_in_seconds=None, + disable_output_compression: bool = False, + use_torchrun: bool = False, + use_mpirun: bool = False, + nproc_per_node: Optional[int] = None, + ): + """Initialize a _JobSettings instance which configures the remote job. + + Args: + dependencies (str): Either the path to a dependencies file or the reserved keyword + ``auto_capture``. Defaults to ``None``. + If ``dependencies`` is provided, the value must be one of the following: + + * A path to a conda environment.yml file. The following conditions apply. + + * If job_conda_env is set, then the conda environment is updated by installing + dependencies from the yaml file and the function is invoked within that + conda environment. For this to succeed, the specified conda environment must + already exist in the image. + * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, + then the conda environment is updated by installing dependencies from the + yaml file and the function is invoked within that conda environment. For + this to succeed, the conda environment name must already be set in + ``SAGEMAKER_JOB_CONDA_ENV``, and ``SAGEMAKER_JOB_CONDA_ENV`` must already + exist in the image. + * If none of the previous conditions are met, a new conda environment named + ``sagemaker-runtime-env`` is created and the function annotated with the remote + decorator is invoked in that conda environment. + + * A path to a requirements.txt file. The following conditions apply. + + * If ``job_conda_env`` is set in the remote decorator, dependencies are installed + within that conda environment and the function annotated with the remote decorator + is invoked in the same conda environment. For this to succeed, the specified + conda environment must already exist in the image. + * If an environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, + dependencies are installed within that conda environment and the function + annotated with the remote decorator is invoked in the same. For this to succeed, + the conda environment name must already be set in ``SAGEMAKER_JOB_CONDA_ENV``, and + ``SAGEMAKER_JOB_CONDA_ENV`` must already exist in the image. + * If none of the above conditions are met, conda is not used. Dependencies are + installed at the system level, without any virtual environment, and the function + annotated with the remote decorator is invoked using the Python runtime available + in the system path. + + * The parameter dependencies is set to ``auto_capture``. SageMaker will automatically + generate an env_snapshot.yml corresponding to the current active conda environment’s + snapshot. You do not need to provide a dependencies file. The following conditions + apply: + + * You must run the remote function within an active conda environment. + * When installing the dependencies on the training job, the same conditions + as when dependencies is set to a path to a conda environment file apply. + These conditions are as follows: + + * If job_conda_env is set, then the conda environment is updated by installing + dependencies from the yaml file and the function is invoked within that + conda environment. For this to succeed, the specified conda environment must + already exist in the image. + * If the environment variable ``SAGEMAKER_JOB_CONDA_ENV`` is set in the image, + then the conda environment is updated by installing dependencies from the yaml + file and the function is invoked within that conda environment. For this to + succeed, the conda environment name must already be set in + ``SAGEMAKER_JOB_CONDA_ENV``, and ``SAGEMAKER_JOB_CONDA_ENV`` must already exist + in the image. + * If none of the previous conditions are met, a new conda environment with name + ``sagemaker-runtime-env`` is created and the function annotated with the + remote decorator is invoked in that conda environment. + + * ``None``. SageMaker will assume that there are no dependencies to install while + executing the remote annotated function in the training job. + + pre_execution_commands (List[str]): List of commands to be executed prior to executing + remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` + can be specified at the same time. Defaults to None. + + pre_execution_script (str): Path to script file to be executed prior to executing + remote function. Only one of ``pre_execution_commands`` or ``pre_execution_script`` + can be specified at the same time. Defaults to None. + + environment_variables (dict[str, str] or dict[str, PipelineVariable]): The environment + variables used inside the decorator function. Defaults to ``None``. + + image_uri (str, PipelineVariable): The universal resource identifier (URI) location of + a Docker image on Amazon Elastic Container Registry (ECR). Defaults to the following + based on where the SDK is running: + + * For users who specify ``spark_config`` and want to run the function in a Spark + application, the ``image_uri`` should be ``None``. A SageMaker Spark image will + be used for training, otherwise a ``ValueError`` is thrown. + * For users on SageMaker Studio notebooks, the image used as the kernel image for + the notebook is used. + * For other users, it is resolved to base python image with the same python version + as the environment running the local code. + + If no compatible image is found, a ValueError is thrown. + + include_local_workdir (bool): A flag to indicate that the remote function should include + local directories. Set to ``True`` if the remote function code imports local modules + and methods that are not available via PyPI or conda. Default value is ``False``. + + custom_file_filter (Callable[[str, List], List], CustomFileFilter): Either a function + that filters job dependencies to be uploaded to S3 or a ``CustomFileFilter`` object + that specifies the local directories and files to be included in the remote function. + If a callable is passed in, that function is passed to the ``ignore`` argument of + ``shutil.copytree``. Defaults to ``None``, which means only python + files are accepted and uploaded to S3. + + instance_count (int, PipelineVariable): The number of instances to use. Defaults to 1. + + instance_type (str, PipelineVariable): The Amazon Elastic Compute Cloud (EC2) instance + type to use to run the SageMaker job. e.g. ml.c4.xlarge. If not provided, + a ValueError is thrown. + + job_conda_env (str, PipelineVariable): The name of the conda environment to activate + during job's runtime. Defaults to ``None``. + + job_name_prefix (str, PipelineVariable): The prefix used to create the underlying + SageMaker job. + + keep_alive_period_in_seconds (int, PipelineVariable): The duration in seconds to retain + and reuse provisioned infrastructure after the completion of a training job, also + known as SageMaker managed warm pools. The use of warm pools reduces the latency time + spent to provision new resources. The default value for + ``keep_alive_period_in_seconds`` is 0. + NOTE: Additional charges associated with warm pools may apply. Using this parameter + also activates a new persistent cache feature, which will further reduce job start up + latency than over using SageMaker managed warm pools alone by caching the package + source downloaded in the previous runs. + + max_retry_attempts (int, PipelineVariable): The max number of times the job is retried + on ``InternalServerFailure`` Error from SageMaker service. Defaults to 1. + + max_runtime_in_seconds (int, PipelineVariable): The upper limit in seconds to be used + for training. After this specified amount of time, SageMaker terminates the job + regardless of its current status. Defaults to 1 day or (86400 seconds). + + role (str): The IAM role (either name or full ARN) used to run your SageMaker training + job. Defaults to: + + * the SageMaker default IAM role if the SDK is running in SageMaker Notebooks or + SageMaker Studio Notebooks. + * if not above, a ValueError is thrown. + + s3_kms_key (str): The key used to encrypt the input and output data. + Default to ``None``. + + s3_root_uri (str): The root S3 folder to which the code archives and data are + uploaded to. Defaults to ``s3://``. + + sagemaker_session (sagemaker.core.helper.session.Session): The underlying SageMaker session to + which SageMaker service calls are delegated to (default: None). If not provided, + one is created using a default configuration chain. + + security_group_ids (List[str, PipelineVariable]): A list of security group IDs. + Defaults to ``None`` and the training job is created without VPC config. + + subnets (List[str, PipelineVariable]): A list of subnet IDs. Defaults to ``None`` + and the job is created without VPC config. + + tags (Optional[Tags]): Tags attached to the job. Defaults to ``None`` + and the training job is created without tags. + + volume_kms_key (str, PipelineVariable): An Amazon Key Management Service (KMS) key + used to encrypt an Amazon Elastic Block Storage (EBS) volume attached to the + training instance. Defaults to ``None``. + + volume_size (int, PipelineVariable): The size in GB of the storage volume for storing + input and output data during training. Defaults to ``30``. + + encrypt_inter_container_traffic (bool, PipelineVariable): A flag that specifies + whether traffic between training containers is encrypted for the training job. + Defaults to ``False``. + + spark_config (SparkConfig): Configurations to the Spark application that runs on + Spark image. If ``spark_config`` is specified, a SageMaker Spark image uri + will be used for training. Note that ``image_uri`` can not be specified at the + same time otherwise a ``ValueError`` is thrown. Defaults to ``None``. + + use_spot_instances (bool, PipelineVariable): Specifies whether to use SageMaker + Managed Spot instances for training. If enabled then the ``max_wait`` arg should + also be set. Defaults to ``False``. + + max_wait_time_in_seconds (int): Timeout in seconds waiting for spot training job. + After this amount of time Amazon SageMaker will stop waiting for managed spot + training job to complete. Defaults to ``None``. + + disable_output_compression (bool): Optional. When set to true, Model is uploaded to + Amazon S3 without compression after training finishes. + + use_torchrun (bool): Specifies whether to use torchrun for distributed training. + Defaults to ``False``. + + use_mpirun (bool): Specifies whether to use mpirun for distributed training. + Defaults to ``False``. + + nproc_per_node (int): Optional. Specifies the number of processes per node for + distributed training. Defaults to ``None``. + This is defined automatically configured on the instance type. + """ + self.sagemaker_session = sagemaker_session or Session() + self.environment_variables = resolve_value_from_config( + direct_input=environment_variables, + config_path=REMOTE_FUNCTION_ENVIRONMENT_VARIABLES, + default_value={}, + sagemaker_session=self.sagemaker_session, + ) + self.environment_variables.update( + {"AWS_DEFAULT_REGION": self.sagemaker_session.boto_region_name} + ) + + # The following will be overridden by the _Job.compile method. + # However, it needs to be kept here for feature store SDK. + # TODO: update the feature store SDK to set the HMAC key there. + self.environment_variables.update({"REMOTE_FUNCTION_SECRET_KEY": secrets.token_hex(32)}) + + if spark_config and image_uri: + raise ValueError("spark_config and image_uri cannot be specified at the same time!") + + if spark_config and job_conda_env: + raise ValueError("Remote Spark jobs do not support job_conda_env.") + + if spark_config and dependencies == "auto_capture": + raise ValueError( + "Remote Spark jobs do not support automatically capturing dependencies." + ) + + _image_uri = resolve_value_from_config( + direct_input=image_uri, + config_path=REMOTE_FUNCTION_IMAGE_URI, + sagemaker_session=self.sagemaker_session, + ) + + if spark_config: + self.image_uri = self._get_default_spark_image(self.sagemaker_session) + logger.info( + "Set the image uri as %s because value of spark_config is " + "indicating this is a remote spark job.", + self.image_uri, + ) + elif _image_uri: + self.image_uri = _image_uri + else: + self.image_uri = self._get_default_image(self.sagemaker_session) + + self.dependencies = resolve_value_from_config( + direct_input=dependencies, + config_path=REMOTE_FUNCTION_DEPENDENCIES, + sagemaker_session=self.sagemaker_session, + ) + + self.pre_execution_commands = resolve_value_from_config( + direct_input=pre_execution_commands, + config_path=REMOTE_FUNCTION_PRE_EXECUTION_COMMANDS, + sagemaker_session=self.sagemaker_session, + ) + + self.pre_execution_script = resolve_value_from_config( + direct_input=pre_execution_script, + config_path=REMOTE_FUNCTION_PRE_EXECUTION_SCRIPT, + sagemaker_session=self.sagemaker_session, + ) + + if self.pre_execution_commands is not None and self.pre_execution_script is not None: + raise ValueError( + "Only one of pre_execution_commands or pre_execution_script can be specified!" + ) + + self.include_local_workdir = resolve_value_from_config( + direct_input=include_local_workdir, + config_path=REMOTE_FUNCTION_INCLUDE_LOCAL_WORKDIR, + default_value=False, + sagemaker_session=self.sagemaker_session, + ) + + self.custom_file_filter = resolve_custom_file_filter_from_config_file( + custom_file_filter, self.sagemaker_session + ) + + self.instance_type = resolve_value_from_config( + direct_input=instance_type, + config_path=REMOTE_FUNCTION_INSTANCE_TYPE, + sagemaker_session=self.sagemaker_session, + ) + if not self.instance_type: + raise ValueError("instance_type is a required parameter!") + + self.instance_count = instance_count + self.volume_size = volume_size + self.max_runtime_in_seconds = max_runtime_in_seconds + self.max_retry_attempts = max_retry_attempts + self.keep_alive_period_in_seconds = keep_alive_period_in_seconds + self.spark_config = spark_config + self.use_spot_instances = use_spot_instances + self.max_wait_time_in_seconds = max_wait_time_in_seconds + self.job_conda_env = resolve_value_from_config( + direct_input=job_conda_env, + config_path=REMOTE_FUNCTION_JOB_CONDA_ENV, + sagemaker_session=self.sagemaker_session, + ) + self.job_name_prefix = job_name_prefix + self.encrypt_inter_container_traffic = resolve_value_from_config( + direct_input=encrypt_inter_container_traffic, + config_path=REMOTE_FUNCTION_ENABLE_INTER_CONTAINER_TRAFFIC_ENCRYPTION, + default_value=False, + sagemaker_session=self.sagemaker_session, + ) + self.enable_network_isolation = False + + _role = resolve_value_from_config( + direct_input=role, + config_path=REMOTE_FUNCTION_ROLE_ARN, + sagemaker_session=self.sagemaker_session, + ) + if _role: + self.role = expand_role(self.sagemaker_session.boto_session, _role) + else: + self.role = get_execution_role(self.sagemaker_session) + + self.s3_root_uri = resolve_value_from_config( + direct_input=s3_root_uri, + config_path=REMOTE_FUNCTION_S3_ROOT_URI, + default_value=s3_path_join( + "s3://", + self.sagemaker_session.default_bucket(), + self.sagemaker_session.default_bucket_prefix, + ), + sagemaker_session=self.sagemaker_session, + ) + + self.s3_kms_key = resolve_value_from_config( + direct_input=s3_kms_key, + config_path=REMOTE_FUNCTION_S3_KMS_KEY_ID, + sagemaker_session=self.sagemaker_session, + ) + self.volume_kms_key = resolve_value_from_config( + direct_input=volume_kms_key, + config_path=REMOTE_FUNCTION_VOLUME_KMS_KEY_ID, + sagemaker_session=self.sagemaker_session, + ) + + _subnets = resolve_value_from_config( + direct_input=subnets, + config_path=REMOTE_FUNCTION_VPC_CONFIG_SUBNETS, + sagemaker_session=self.sagemaker_session, + ) + _security_group_ids = resolve_value_from_config( + direct_input=security_group_ids, + config_path=REMOTE_FUNCTION_VPC_CONFIG_SECURITY_GROUP_IDS, + sagemaker_session=self.sagemaker_session, + ) + vpc_config = vpc_utils.to_dict(subnets=_subnets, security_group_ids=_security_group_ids) + self.vpc_config = vpc_utils.sanitize(vpc_config) + + tags = format_tags(tags) + self.tags = _append_sagemaker_config_tags( + self.sagemaker_session, tags, REMOTE_FUNCTION_TAGS + ) + + self.disable_output_compression = disable_output_compression + self.use_torchrun = use_torchrun + self.use_mpirun = use_mpirun + self.nproc_per_node = nproc_per_node + + @staticmethod + def _get_default_image(session): + """Return Studio notebook image, if in Studio env. Else, base python. + + Args: + session (Session): Boto session. + + Returns: + Default SageMaker base python image. + """ + + if ( + "SAGEMAKER_INTERNAL_IMAGE_URI" in os.environ + and os.environ["SAGEMAKER_INTERNAL_IMAGE_URI"] + ): + return os.environ["SAGEMAKER_INTERNAL_IMAGE_URI"] + + py_version = str(sys.version_info[0]) + str(sys.version_info[1]) + + if py_version not in ["310", "38"]: + raise ValueError( + "Default image is supported only for Python versions 3.8 and 3.10. If you " + "are using any other python version, you must provide a compatible image_uri." + ) + + region = session.boto_region_name + image_uri = get_base_python_image_uri(region=region, py_version=py_version) + + return image_uri + + @staticmethod + def _get_default_spark_image(session): + """Return the Spark image. + + Args: + session (Session): Boto session. + + Returns: + SageMaker Spark container image uri. + """ + + region = session.boto_region_name + + py_version = str(sys.version_info[0]) + str(sys.version_info[1]) + + if py_version not in ["39"]: + raise ValueError( + "The SageMaker Spark image for remote job only supports Python version 3.9. " + ) + + image_uri = image_uris.retrieve( + framework=SPARK_NAME, + region=region, + version=DEFAULT_SPARK_VERSION, + instance_type=None, + py_version=f"py{py_version}", + container_version=DEFAULT_SPARK_CONTAINER_VERSION, + ) + + return image_uri + + +class _Job: + """Helper class that interacts with the SageMaker training service.""" + + def __init__(self, job_name: str, s3_uri: str, sagemaker_session: Session, hmac_key: str): + """Initialize a _Job object. + + Args: + job_name (str): The training job name. + s3_uri (str): The training job output S3 uri. + sagemaker_session (Session): SageMaker boto session. + hmac_key (str): Remote function secret key. + """ + self.job_name = job_name + self.s3_uri = s3_uri + self.sagemaker_session = sagemaker_session + self.hmac_key = hmac_key + self._last_describe_response = None + + @staticmethod + def from_describe_response(describe_training_job_response, sagemaker_session): + """Construct a _Job from a describe_training_job_response object. + + Args: + describe_training_job_response (Dict): Describe training job response. + sagemaker_session (Session): SageMaker boto session. + + Returns: + the _Job object. + """ + job_name = describe_training_job_response["TrainingJobName"] + s3_uri = describe_training_job_response["OutputDataConfig"]["S3OutputPath"] + hmac_key = describe_training_job_response["Environment"]["REMOTE_FUNCTION_SECRET_KEY"] + + job = _Job(job_name, s3_uri, sagemaker_session, hmac_key) + job._last_describe_response = describe_training_job_response + return job + + @staticmethod + def start(job_settings: _JobSettings, func, func_args, func_kwargs, run_info=None): + """Start a training job. + + Args: + job_settings (_JobSettings): the job settings. + func: the function to be executed. + func_args: the positional arguments to the function. + func_kwargs: the keyword arguments to the function + + Returns: + the _Job object. + """ + job_name = _Job._get_job_name(job_settings, func) + s3_base_uri = s3_path_join(job_settings.s3_root_uri, job_name) + + training_job_request = _Job.compile( + job_settings=job_settings, + job_name=job_name, + s3_base_uri=s3_base_uri, + func=func, + func_args=func_args, + func_kwargs=func_kwargs, + run_info=run_info, + ) + + logger.info("Creating job: %s", job_name) + + job_settings.sagemaker_session.sagemaker_client.create_training_job(**training_job_request) + + return _Job( + job_name, + s3_base_uri, + job_settings.sagemaker_session, + training_job_request["Environment"]["REMOTE_FUNCTION_SECRET_KEY"], + ) + + @staticmethod + def compile( + job_settings: _JobSettings, + job_name: str, + s3_base_uri: str, + func: Callable, + func_args: tuple, + func_kwargs: dict, + run_info=None, + serialized_data: _SerializedData = None, + ) -> dict: + """Build the artifacts and generate the training job request.""" + from sagemaker.core.workflow.properties import Properties + from sagemaker.core.workflow.parameters import Parameter + from sagemaker.core.workflow.functions import Join + from sagemaker.core.workflow.execution_variables import ( + ExecutionVariables, + ExecutionVariable, + ) + from sagemaker.core.workflow.utilities import load_step_compilation_context + + step_compilation_context = load_step_compilation_context() + + jobs_container_entrypoint = JOBS_CONTAINER_ENTRYPOINT[:] + + # generate hmac key for integrity check + if step_compilation_context is None: + hmac_key = secrets.token_hex(32) + else: + hmac_key = step_compilation_context.function_step_secret_token + + # serialize function and arguments + if step_compilation_context is None: + stored_function = StoredFunction( + sagemaker_session=job_settings.sagemaker_session, + s3_base_uri=s3_base_uri, + hmac_key=hmac_key, + s3_kms_key=job_settings.s3_kms_key, + ) + stored_function.save(func, *func_args, **func_kwargs) + else: + stored_function = StoredFunction( + sagemaker_session=job_settings.sagemaker_session, + s3_base_uri=s3_base_uri, + hmac_key=hmac_key, + s3_kms_key=job_settings.s3_kms_key, + context=Context( + step_name=step_compilation_context.step_name, + func_step_s3_dir=step_compilation_context.pipeline_build_time, + ), + ) + + stored_function.save_pipeline_step_function(serialized_data) + + stopping_condition = { + "MaxRuntimeInSeconds": job_settings.max_runtime_in_seconds, + } + if job_settings.max_wait_time_in_seconds is not None: + stopping_condition["MaxWaitTimeInSeconds"] = job_settings.max_wait_time_in_seconds + + request_dict = dict( + TrainingJobName=job_name, + RoleArn=job_settings.role, + StoppingCondition=stopping_condition, + RetryStrategy={"MaximumRetryAttempts": job_settings.max_retry_attempts}, + ) + + _update_job_request_with_checkpoint_config(func_args, func_kwargs, request_dict) + + if job_settings.tags: + request_dict["Tags"] = job_settings.tags + + # generate other build artifacts including workspace, requirements.txt + request_dict["InputDataConfig"] = _generate_input_data_config( + job_settings=job_settings, s3_base_uri=s3_base_uri + ) + + if step_compilation_context: + # Path format: base/step_name/build_timestamp/execution_id/results + # This matches the path construction in stored_function.py + s3_output_path = Join( + on="/", + values=[ + s3_base_uri, + step_compilation_context.step_name, + step_compilation_context.pipeline_build_time, + ExecutionVariables.PIPELINE_EXECUTION_ID, + "results", + ], + ) + output_config = {"S3OutputPath": s3_output_path} + else: + output_config = {"S3OutputPath": s3_base_uri} + if job_settings.s3_kms_key is not None: + output_config["KmsKeyId"] = job_settings.s3_kms_key + if job_settings.disable_output_compression: + output_config["CompressionType"] = "NONE" + request_dict["OutputDataConfig"] = output_config + + container_args = ["--s3_base_uri", s3_base_uri] + container_args.extend(["--region", job_settings.sagemaker_session.boto_region_name]) + container_args.extend( + ["--client_python_version", RuntimeEnvironmentManager()._current_python_version()] + ) + container_args.extend( + [ + "--client_sagemaker_pysdk_version", + RuntimeEnvironmentManager()._current_sagemaker_pysdk_version(), + ] + ) + container_args.extend( + [ + "--dependency_settings", + _DependencySettings.from_dependency_file_path( + job_settings.dependencies + ).to_string(), + ] + ) + if job_settings.use_torchrun: + container_args.extend(["--distribution", "torchrun"]) + elif job_settings.use_mpirun: + container_args.extend(["--distribution", "mpirun"]) + if job_settings.nproc_per_node is not None and int(job_settings.nproc_per_node) > 0: + container_args.extend(["--user_nproc_per_node", str(job_settings.nproc_per_node)]) + if job_settings.s3_kms_key: + container_args.extend(["--s3_kms_key", job_settings.s3_kms_key]) + + if job_settings.job_conda_env: + container_args.extend(["--job_conda_env", job_settings.job_conda_env]) + + if step_compilation_context: + # TODO: remove the duplicates in the list + container_args.extend(["--pipeline_step_name", step_compilation_context.step_name]) + container_args.extend( + ["--pipeline_execution_id", ExecutionVariables.PIPELINE_EXECUTION_ID] + ) + container_args.extend( + ["--func_step_s3_dir", step_compilation_context.pipeline_build_time] + ) + container_args.extend(["--property_references"]) + container_args.extend( + [ + ExecutionVariables.PIPELINE_EXECUTION_ID.expr["Get"], + ExecutionVariables.PIPELINE_EXECUTION_ID.to_string(), + ] + ) + for arg in func_args + tuple(func_kwargs.values()): + if isinstance(arg, (Parameter, ExecutionVariable, Properties)): + container_args.extend([arg.expr["Get"], arg.to_string()]) + + # Lazy import to avoid circular dependency + try: + from sagemaker.mlops.workflow.function_step import DelayedReturn + + if isinstance(arg, DelayedReturn): + # The uri is a Properties object + uri = get_step(arg)._properties.OutputDataConfig.S3OutputPath + container_args.extend([uri.expr["Get"], uri.to_string()]) + except ImportError: + # MLOps not installed, skip DelayedReturn handling + pass + + if run_info is not None: + container_args.extend(["--run_in_context", json.dumps(dataclasses.asdict(run_info))]) + elif _RunContext.get_current_run() is not None: + container_args.extend( + ["--run_in_context", _convert_run_to_json(_RunContext.get_current_run())] + ) + + algorithm_spec = dict( + TrainingImage=job_settings.image_uri, + TrainingInputMode="File", + ContainerEntrypoint=jobs_container_entrypoint, + ContainerArguments=container_args, + ) + + request_dict["AlgorithmSpecification"] = algorithm_spec + + resource_config = dict( + VolumeSizeInGB=job_settings.volume_size, + InstanceCount=job_settings.instance_count, + InstanceType=job_settings.instance_type, + ) + if job_settings.volume_kms_key is not None: + resource_config["VolumeKmsKeyId"] = job_settings.volume_kms_key + if job_settings.keep_alive_period_in_seconds is not None: + resource_config["KeepAlivePeriodInSeconds"] = job_settings.keep_alive_period_in_seconds + + request_dict["ResourceConfig"] = resource_config + + if job_settings.enable_network_isolation is not None: + request_dict["EnableNetworkIsolation"] = job_settings.enable_network_isolation + + if job_settings.encrypt_inter_container_traffic is not None: + request_dict["EnableInterContainerTrafficEncryption"] = ( + job_settings.encrypt_inter_container_traffic + ) + + if job_settings.vpc_config: + request_dict["VpcConfig"] = job_settings.vpc_config + + request_dict["EnableManagedSpotTraining"] = job_settings.use_spot_instances + + request_dict["Environment"] = job_settings.environment_variables + request_dict["Environment"].update({"REMOTE_FUNCTION_SECRET_KEY": hmac_key}) + + extended_request = _extend_spark_config_to_request(request_dict, job_settings, s3_base_uri) + extended_request = _extend_mpirun_to_request(extended_request, job_settings) + extended_request = _extend_torchrun_to_request(extended_request, job_settings) + + return extended_request + + def describe(self): + """Describe the underlying sagemaker training job. + + Returns: + Dict: Describe training job response. + """ + if self._last_describe_response is not None and self._last_describe_response[ + "TrainingJobStatus" + ] in ["Completed", "Failed", "Stopped"]: + return self._last_describe_response + + self._last_describe_response = ( + self.sagemaker_session.sagemaker_client.describe_training_job( + TrainingJobName=self.job_name + ) + ) + + return self._last_describe_response + + def stop(self): + """Stop the underlying sagemaker training job.""" + self.sagemaker_session.sagemaker_client.stop_training_job(TrainingJobName=self.job_name) + + def wait(self, timeout: int = None): + """Wait for the underlying sagemaker job to finish and displays its logs . + + This method blocks on the sagemaker job completing for up to the timeout value (if + specified). If timeout is ``None``, this method will block until the job is completed. + + Args: + timeout (int): Timeout in seconds to wait until the job is completed. ``None`` by + default. + + Returns: None + """ + + self._last_describe_response = _logs_for_job( + sagemaker_session=self.sagemaker_session, + job_name=self.job_name, + wait=True, + timeout=timeout, + ) + + @staticmethod + def _get_job_name(job_settings, func): + """Get the underlying SageMaker job name from job_name_prefix or func. + + Args: + job_settings (_JobSettings): the job settings. + func: the function to be executed. + + Returns: + str : the training job name. + """ + from sagemaker.core.workflow.utilities import load_step_compilation_context + + step_complication_context = load_step_compilation_context() + + job_name_prefix = job_settings.job_name_prefix + if not job_name_prefix: + job_name_prefix = func.__name__ + # remove all special characters in the beginning of function name + job_name_prefix = re.sub(r"^[^a-zA-Z0-9]+", "", job_name_prefix) + # convert all remaining special characters to '-' + job_name_prefix = re.sub(r"[^a-zA-Z0-9-]", "-", job_name_prefix) + + if step_complication_context: + return job_name_prefix + return name_from_base(job_name_prefix) + + +def _prepare_and_upload_runtime_scripts( + spark_config: SparkConfig, + s3_base_uri: str, + s3_kms_key: str, + sagemaker_session: Session, + use_torchrun: bool = False, + use_mpirun: bool = False, +): + """Copy runtime scripts to a folder and upload to S3. + + In case of remote function, s3_base_uri is s3_root_uri + function_name. + In case of pipeline, s3_base_uri is s3_root_uri + pipeline_name. The runtime scripts are + uploaded only once per pipeline. + + Args: + spark_config (SparkConfig): remote Spark job configurations. + + s3_base_uri (str): S3 location that the runtime scripts will be uploaded to. + + s3_kms_key (str): kms key used to encrypt the files uploaded to S3. + + sagemaker_session (str): SageMaker boto client session. + + use_torchrun (bool): Whether to use torchrun or not. + + use_mpirun (bool): Whether to use mpirun or not. + + nproc_per_node (Optional[int]): Number of processes per node + """ + + from sagemaker.core.workflow.utilities import load_step_compilation_context + + step_compilation_context = load_step_compilation_context() + + if step_compilation_context and not step_compilation_context.upload_runtime_scripts: + return s3_path_join(s3_base_uri, RUNTIME_SCRIPTS_CHANNEL_NAME) + + with _tmpdir() as bootstrap_scripts: + + # write entrypoint script to tmpdir + entrypoint_script_path = os.path.join(bootstrap_scripts, ENTRYPOINT_SCRIPT_NAME) + entry_point_script = ENTRYPOINT_SCRIPT + if spark_config: + entry_point_script = SPARK_ENTRYPOINT_SCRIPT + spark_script_path = os.path.join( + os.path.dirname(__file__), "runtime_environment", SPARK_APP_SCRIPT_NAME + ) + shutil.copy2(spark_script_path, bootstrap_scripts) + + if use_torchrun: + entry_point_script = ENTRYPOINT_TORCHRUN_SCRIPT + + if use_mpirun: + entry_point_script = ENTRYPOINT_MPIRUN_SCRIPT + + with open(entrypoint_script_path, "w", newline="\n") as file: + file.writelines(entry_point_script) + + bootstrap_script_path = os.path.join( + os.path.dirname(__file__), "runtime_environment", BOOTSTRAP_SCRIPT_NAME + ) + mpi_utils_path = os.path.join( + os.path.dirname(__file__), "runtime_environment", MPI_UTILS_SCRIPT_NAME + ) + runtime_manager_script_path = os.path.join( + os.path.dirname(__file__), "runtime_environment", RUNTIME_MANAGER_SCRIPT_NAME + ) + + # copy runtime scripts to tmpdir + shutil.copy2(bootstrap_script_path, bootstrap_scripts) + shutil.copy2(mpi_utils_path, bootstrap_scripts) + shutil.copy2(runtime_manager_script_path, bootstrap_scripts) + + upload_path = S3Uploader.upload( + bootstrap_scripts, + s3_path_join(s3_base_uri, RUNTIME_SCRIPTS_CHANNEL_NAME), + s3_kms_key, + sagemaker_session, + ) + + if step_compilation_context: + step_compilation_context.upload_runtime_scripts = False + return upload_path + + +def _generate_input_data_config(job_settings: _JobSettings, s3_base_uri: str): + """Generates input data config""" + from sagemaker.core.workflow.utilities import load_step_compilation_context + + step_compilation_context = load_step_compilation_context() + + bootstrap_scripts_s3uri = _prepare_and_upload_runtime_scripts( + spark_config=job_settings.spark_config, + s3_base_uri=s3_base_uri, + s3_kms_key=job_settings.s3_kms_key, + sagemaker_session=job_settings.sagemaker_session, + use_torchrun=job_settings.use_torchrun, + use_mpirun=job_settings.use_mpirun, + ) + + input_data_config = [ + dict( + ChannelName=RUNTIME_SCRIPTS_CHANNEL_NAME, + DataSource={ + "S3DataSource": { + "S3Uri": bootstrap_scripts_s3uri, + "S3DataType": "S3Prefix", + } + }, + ) + ] + + local_dependencies_path = RuntimeEnvironmentManager().snapshot(job_settings.dependencies) + + if step_compilation_context: + with _tmpdir() as tmp_dir: + script_and_dependencies_s3uri = _prepare_dependencies_and_pre_execution_scripts( + local_dependencies_path=local_dependencies_path, + pre_execution_commands=job_settings.pre_execution_commands, + pre_execution_script_local_path=job_settings.pre_execution_script, + s3_base_uri=s3_base_uri, + s3_kms_key=job_settings.s3_kms_key, + sagemaker_session=job_settings.sagemaker_session, + tmp_dir=tmp_dir, + ) + + if script_and_dependencies_s3uri: + input_data_config.append( + dict( + ChannelName=SCRIPT_AND_DEPENDENCIES_CHANNEL_NAME, + DataSource={ + "S3DataSource": { + "S3Uri": script_and_dependencies_s3uri, + "S3DataType": "S3Prefix", + } + }, + ) + ) + + user_workspace_s3uri = _prepare_and_upload_workspace( + local_dependencies_path=local_dependencies_path, + include_local_workdir=job_settings.include_local_workdir, + pre_execution_commands=job_settings.pre_execution_commands, + pre_execution_script_local_path=job_settings.pre_execution_script, + s3_base_uri=s3_base_uri, + s3_kms_key=job_settings.s3_kms_key, + sagemaker_session=job_settings.sagemaker_session, + custom_file_filter=job_settings.custom_file_filter, + ) + + if user_workspace_s3uri: + input_data_config.append( + dict( + ChannelName=( + REMOTE_FUNCTION_WORKSPACE + if not step_compilation_context + else step_compilation_context.pipeline_build_time + ), + DataSource={ + "S3DataSource": { + "S3Uri": user_workspace_s3uri, + "S3DataType": "S3Prefix", + } + }, + ) + ) + + return input_data_config + + +def _prepare_dependencies_and_pre_execution_scripts( + local_dependencies_path: str, + pre_execution_commands: List[str], + pre_execution_script_local_path: str, + s3_base_uri: str, + s3_kms_key: str, + sagemaker_session: Session, + tmp_dir: str, +): + """Prepare pre-execution scripts and dependencies and upload them to s3. + + If pre execution commands are provided, a new bash file will be created + with those commands in tmp directory. + If pre execution script is provided, it copies that file from local file path + to tmp directory. + If local dependencies file is provided, it copies that file from local file path + to tmp directory. + If under pipeline context, tmp directory with copied dependencies and scripts is + uploaded to S3. + """ + from sagemaker.core.workflow.utilities import load_step_compilation_context + + if not (local_dependencies_path or pre_execution_commands or pre_execution_script_local_path): + return None + + if local_dependencies_path: + dst_path = shutil.copy2(local_dependencies_path, tmp_dir) + logger.info("Copied dependencies file at '%s' to '%s'", local_dependencies_path, dst_path) + + if pre_execution_commands or pre_execution_script_local_path: + pre_execution_script = os.path.join(tmp_dir, PRE_EXECUTION_SCRIPT_NAME) + if pre_execution_commands: + with open(pre_execution_script, "w") as target_script: + commands = [cmd + "\n" for cmd in pre_execution_commands] + target_script.writelines(commands) + logger.info( + "Generated pre-execution script from commands to '%s'", pre_execution_script + ) + else: + shutil.copy2(pre_execution_script_local_path, pre_execution_script) + logger.info( + "Copied pre-execution commands from script at '%s' to '%s'", + pre_execution_script_local_path, + pre_execution_script, + ) + + step_compilation_context = load_step_compilation_context() + if step_compilation_context: + upload_path = S3Uploader.upload( + tmp_dir, + s3_path_join( + s3_base_uri, + step_compilation_context.step_name, + step_compilation_context.pipeline_build_time, + SCRIPT_AND_DEPENDENCIES_CHANNEL_NAME, + ), + s3_kms_key, + sagemaker_session, + ) + logger.info( + "Successfully uploaded dependencies and pre execution scripts to '%s'", upload_path + ) + return upload_path + return None + + +def _prepare_and_upload_workspace( + local_dependencies_path: str, + include_local_workdir: bool, + pre_execution_commands: List[str], + pre_execution_script_local_path: str, + s3_base_uri: str, + s3_kms_key: str, + sagemaker_session: Session, + custom_file_filter: Optional[Union[Callable[[str, List], List], CustomFileFilter]] = None, +) -> str: + """Prepare and upload the workspace to S3. + + Under pipeline context, only workdir is packaged in the workspace folder and uploaded to s3. + Under remote function context, workdir along with pre execution scripts and dependencies + are packaged together into the workspace folder and uploaded to S3. + """ + from sagemaker.core.workflow.utilities import load_step_compilation_context + + step_compilation_context = load_step_compilation_context() + + if not ( + local_dependencies_path + or include_local_workdir + or pre_execution_commands + or pre_execution_script_local_path + ): + return None + + func_step_s3_dir = None + if step_compilation_context: + func_step_s3_dir = step_compilation_context.pipeline_build_time + if not include_local_workdir: + return None + if not step_compilation_context.upload_workspace: + return s3_path_join(s3_base_uri, REMOTE_FUNCTION_WORKSPACE, func_step_s3_dir) + + with _tmpdir() as tmp_dir: + tmp_workspace_dir = os.path.join(tmp_dir, "temp_workspace/") + os.mkdir(tmp_workspace_dir) + # TODO Remove the following hack to avoid dir_exists error in the copy_tree call below. + tmp_workspace = os.path.join(tmp_workspace_dir, JOB_REMOTE_FUNCTION_WORKSPACE) + + if include_local_workdir: + copy_workdir(tmp_workspace, custom_file_filter) + logger.info("Copied user workspace to '%s'", tmp_workspace) + + if not os.path.isdir(tmp_workspace): + # create the directory if no workdir_path was provided in the input. + os.mkdir(tmp_workspace) + + if not step_compilation_context: + _prepare_dependencies_and_pre_execution_scripts( + local_dependencies_path=local_dependencies_path, + pre_execution_commands=pre_execution_commands, + pre_execution_script_local_path=pre_execution_script_local_path, + s3_base_uri=s3_base_uri, + s3_kms_key=s3_kms_key, + sagemaker_session=sagemaker_session, + tmp_dir=tmp_workspace, + ) + + workspace_archive_path = os.path.join(tmp_dir, "workspace") + workspace_archive_path = shutil.make_archive( + workspace_archive_path, "zip", tmp_workspace_dir + ) + logger.info("Successfully created workdir archive at '%s'", workspace_archive_path) + + upload_path = S3Uploader.upload( + workspace_archive_path, + s3_path_join(s3_base_uri, REMOTE_FUNCTION_WORKSPACE, func_step_s3_dir), + s3_kms_key, + sagemaker_session, + ) + logger.info("Successfully uploaded workdir to '%s'", upload_path) + if step_compilation_context: + step_compilation_context.upload_workspace = False + return upload_path + + +def _convert_run_to_json(run: Run) -> str: + """Convert current run into json string""" + run_info = _RunInfo(run.experiment_name, run.run_name) + return json.dumps(dataclasses.asdict(run_info)) + + +def _prepare_and_upload_spark_dependent_files( + spark_config: SparkConfig, + s3_base_uri: str, + s3_kms_key: str, + sagemaker_session: Session, +) -> Tuple: + """Upload the Spark dependencies to S3 if present. + + Args: + spark_config (SparkConfig): The remote Spark job configurations. + s3_base_uri (str): The S3 location that the Spark dependencies will be uploaded to. + s3_kms_key (str): The kms key used to encrypt the files uploaded to S3. + sagemaker_session (str): SageMaker boto client session. + """ + if not spark_config: + return None, None, None, None + + submit_jars_s3_paths = _upload_spark_submit_deps( + spark_config.submit_jars, + SPARK_SUBMIT_JARS_WORKSPACE, + s3_base_uri, + s3_kms_key, + sagemaker_session, + ) + submit_py_files_s3_paths = _upload_spark_submit_deps( + spark_config.submit_py_files, + SPARK_SUBMIT_PY_FILES_WORKSPACE, + s3_base_uri, + s3_kms_key, + sagemaker_session, + ) + submit_files_s3_path = _upload_spark_submit_deps( + spark_config.submit_files, + SPARK_SUBMIT_FILES_WORKSPACE, + s3_base_uri, + s3_kms_key, + sagemaker_session, + ) + config_file_s3_uri = _upload_serialized_spark_configuration( + s3_base_uri, s3_kms_key, spark_config.configuration, sagemaker_session + ) + + return submit_jars_s3_paths, submit_py_files_s3_paths, submit_files_s3_path, config_file_s3_uri + + +def _upload_spark_submit_deps( + submit_deps: List[str], + workspace_name: str, + s3_base_uri: str, + s3_kms_key: str, + sagemaker_session: Session, +) -> str: + """Upload the Spark submit dependencies to S3. + + Args: + submit_deps (List[str]): A list of path which points to the Spark dependency files. + The path can be either a local path or S3 uri. For example ``/local/deps.jar`` or + ``s3:///deps.jar``. + + workspace_name (str): workspace name for Spark dependency. + s3_base_uri (str): S3 location that the Spark dependencies will be uploaded to. + s3_kms_key (str): kms key used to encrypt the files uploaded to S3. + sagemaker_session (str): SageMaker boto client session. + + Returns: + str : The concatenated path of all dependencies which will be passed to Spark. + """ + spark_opt_s3_uris = [] + if not submit_deps: + return None + + if not workspace_name or not s3_base_uri: + raise ValueError("workspace_name or s3_base_uri may not be empty.") + + for dep_path in submit_deps: + dep_url = urlparse(dep_path) + + if dep_url.scheme in ["s3", "s3a"]: + spark_opt_s3_uris.append(dep_path) + elif not dep_url.scheme or dep_url.scheme == "file": + if not os.path.isfile(dep_path): + raise ValueError(f"submit_deps path {dep_path} is not a valid local file.") + + upload_path = S3Uploader.upload( + local_path=dep_path, + desired_s3_uri=s3_path_join(s3_base_uri, workspace_name), + kms_key=s3_kms_key, + sagemaker_session=sagemaker_session, + ) + + spark_opt_s3_uris.append(upload_path) + logger.info("Uploaded the local file %s to %s", dep_path, upload_path) + return str.join(",", spark_opt_s3_uris) + + +def _upload_serialized_spark_configuration( + s3_base_uri: str, s3_kms_key: str, configuration: Dict, sagemaker_session: Session +) -> str: + """Upload the Spark configuration json to S3""" + if not configuration: + return None + + serialized_configuration = BytesIO(json.dumps(configuration).encode("utf-8")) + config_file_s3_uri = s3_path_join(s3_base_uri, SPARK_CONF_WORKSPACE, SPARK_CONF_FILE_NAME) + + S3Uploader.upload_string_as_file_body( + body=serialized_configuration, + desired_s3_uri=config_file_s3_uri, + kms_key=s3_kms_key, + sagemaker_session=sagemaker_session, + ) + + logger.info("Uploaded spark configuration json %s to %s", configuration, config_file_s3_uri) + + return config_file_s3_uri + + +def _extend_mpirun_to_request( + request_dict: Dict, + job_settings: _JobSettings, +) -> Dict: + """Extend the create training job request with mpirun configuration. + + Args: + request_dict (Dict): create training job request dict. + job_settings (_JobSettings): the job settings. + """ + use_mpirun = job_settings.use_mpirun + instance_count = job_settings.instance_count + + if not use_mpirun: + return request_dict + + if instance_count == 1: + return request_dict + + extended_request = request_dict.copy() + + for input_channel in extended_request["InputDataConfig"]: + s3_data_source = input_channel["DataSource"].get("S3DataSource", None) + if s3_data_source: + s3_data_source["S3DataDistributionType"] = "FullyReplicated" + + return extended_request + + +def _extend_torchrun_to_request( + request_dict: Dict, + job_settings: _JobSettings, +) -> Dict: + """Extend the create training job request with torchrun configuration. + + Args: + request_dict (Dict): create training job request dict. + job_settings (_JobSettings): the job settings. + """ + use_torchrun = job_settings.use_torchrun + instance_count = job_settings.instance_count + + if not use_torchrun: + return request_dict + + if instance_count == 1: + return request_dict + + extended_request = request_dict.copy() + + for input_channel in extended_request["InputDataConfig"]: + s3_data_source = input_channel["DataSource"].get("S3DataSource", None) + if s3_data_source: + s3_data_source["S3DataDistributionType"] = "FullyReplicated" + + return extended_request + + +def _extend_spark_config_to_request( + request_dict: Dict, + job_settings: _JobSettings, + s3_base_uri: str, +) -> Dict: + """Extend the create training job request with spark configurations. + + Args: + request_dict (Dict): create training job request dict. + job_settings (_JobSettings): the job settings. + s3_base_uri (str): S3 location that the Spark dependencies will be uploaded to. + """ + spark_config = job_settings.spark_config + + if not spark_config: + return request_dict + + extended_request = request_dict.copy() + container_entrypoint = extended_request["AlgorithmSpecification"]["ContainerEntrypoint"] + + ( + submit_jars_s3_paths, + submit_py_files_s3_paths, + submit_files_s3_path, + config_file_s3_uri, + ) = _prepare_and_upload_spark_dependent_files( + spark_config=spark_config, + s3_base_uri=s3_base_uri, + s3_kms_key=job_settings.s3_kms_key, + sagemaker_session=job_settings.sagemaker_session, + ) + + input_data_config = extended_request["InputDataConfig"] + + if config_file_s3_uri: + input_data_config.append( + dict( + ChannelName=SPARK_CONF_CHANNEL_NAME, + DataSource={ + "S3DataSource": { + "S3Uri": config_file_s3_uri, + "S3DataType": "S3Prefix", + } + }, + ) + ) + + for input_channel in extended_request["InputDataConfig"]: + s3_data_source = input_channel["DataSource"].get("S3DataSource", None) + if s3_data_source: + s3_data_source["S3DataDistributionType"] = "FullyReplicated" + + if spark_config.spark_event_logs_uri: + container_entrypoint.extend( + ["--spark-event-logs-s3-uri", spark_config.spark_event_logs_uri] + ) + + if submit_jars_s3_paths: + container_entrypoint.extend(["--jars", submit_jars_s3_paths]) + + if submit_py_files_s3_paths: + container_entrypoint.extend(["--py-files", submit_py_files_s3_paths]) + + if submit_files_s3_path: + container_entrypoint.extend(["--files", submit_files_s3_path]) + + if spark_config: + container_entrypoint.extend([SPARK_APP_SCRIPT_PATH]) + + return extended_request + + +def _update_job_request_with_checkpoint_config(args, kwargs, request_dict): + """Extend job request with checkpoint config based on CheckpointLocation in function args. + + Args: + args (tuple): The positional arguments of the remote function. + kwargs (Dict): The keyword arguments of the remote function. + request_dict (Dict): create training job request dict. + """ + checkpoint_location_index_in_args = None + checkpoint_location_key_in_kwargs = None + checkpoint_location_count = 0 + + for index, arg in enumerate(args): + if isinstance(arg, CheckpointLocation): + checkpoint_location_index_in_args = index + checkpoint_location_count += 1 + + for key, value in kwargs.items(): + if isinstance(value, CheckpointLocation): + checkpoint_location_key_in_kwargs = key + checkpoint_location_count += 1 + + if checkpoint_location_count < 1: + return + + if checkpoint_location_count > 1: + raise ValueError( + "Remote function cannot have more than one argument of type CheckpointLocation." + ) + + if checkpoint_location_index_in_args is not None: + checkpoint_location_arg = args[checkpoint_location_index_in_args] + else: + checkpoint_location_arg = kwargs[checkpoint_location_key_in_kwargs] + + checkpoint_s3_uri = checkpoint_location_arg._s3_uri + checkpoint_local_path = checkpoint_location_arg._local_path + + request_dict["CheckpointConfig"] = { + "LocalPath": checkpoint_local_path, + "S3Uri": checkpoint_s3_uri, + } + + +@dataclasses.dataclass +class _RunInfo: + """Data class to hold information of the run object from context.""" + + experiment_name: str + run_name: str + + +def _get_initial_job_state(description, status_key, wait): + """Placeholder docstring""" + status = description[status_key] + job_already_completed = status in ("Completed", "Failed", "Stopped") + return LogState.TAILING if wait and not job_already_completed else LogState.COMPLETE + + +def _logs_for_job( # noqa: C901 - suppress complexity warning for this method + sagemaker_session, job_name, wait=False, poll=10, log_type="All", timeout=None +): + """Display logs for a given training job, optionally tailing them until job is complete. + + If the output is a tty or a Jupyter cell, it will be color-coded + based on which instance the log entry is from. + + Args: + sagemaker_session (sagemaker.core.helper.session.Session): A SageMaker Session + object, used for SageMaker interactions. + job_name (str): Name of the training job to display the logs for. + wait (bool): Whether to keep looking for new log entries until the job completes + (default: False). + poll (int): The interval in seconds between polling for new log entries and job + completion (default: 5). + log_type ([str]): A list of strings specifying which logs to print. Acceptable + strings are "All", "None", "Training", or "Rules". To maintain backwards + compatibility, boolean values are also accepted and converted to strings. + timeout (int): Timeout in seconds to wait until the job is completed. ``None`` by + default. + Returns: + Last call to sagemaker DescribeTrainingJob + Raises: + exceptions.CapacityError: If the training job fails with CapacityError. + exceptions.UnexpectedStatusException: If waiting and the training job fails. + """ + sagemaker_client = sagemaker_session.sagemaker_client + request_end_time = time.time() + timeout if timeout else None + description = _wait_until( + lambda: sagemaker_client.describe_training_job(TrainingJobName=job_name) + ) + print(secondary_training_status_message(description, None), end="") + + instance_count, stream_names, positions, client, log_group, dot, color_wrap = _logs_init( + sagemaker_session.boto_session, description, job="Training" + ) + + state = _get_initial_job_state(description, "TrainingJobStatus", wait) + + # The loop below implements a state machine that alternates between checking the job status + # and reading whatever is available in the logs at this point. Note, that if we were + # called with wait == False, we never check the job status. + # + # If wait == TRUE and job is not completed, the initial state is TAILING + # If wait == FALSE, the initial state is COMPLETE (doesn't matter if the job really is + # complete). + # + # The state table: + # + # STATE ACTIONS CONDITION NEW STATE + # ---------------- ---------------- ----------------- ---------------- + # TAILING Read logs, Pause, Get status Job complete JOB_COMPLETE + # Else TAILING + # JOB_COMPLETE Read logs, Pause Any COMPLETE + # COMPLETE Read logs, Exit N/A + # + # Notes: + # - The JOB_COMPLETE state forces us to do an extra pause and read any items that got to + # Cloudwatch after the job was marked complete. + last_describe_job_call = time.time() + last_description = description + last_debug_rule_statuses = None + last_profiler_rule_statuses = None + + while True: + _flush_log_streams( + stream_names, + instance_count, + client, + log_group, + job_name, + positions, + dot, + color_wrap, + ) + if timeout and time.time() > request_end_time: + print("Timeout Exceeded. {} seconds elapsed.".format(timeout)) + break + + if state == LogState.COMPLETE: + break + + time.sleep(poll) + + if state == LogState.JOB_COMPLETE: + state = LogState.COMPLETE + elif time.time() - last_describe_job_call >= 30: + description = sagemaker_client.describe_training_job(TrainingJobName=job_name) + last_describe_job_call = time.time() + + if secondary_training_status_changed(description, last_description): + print() + print(secondary_training_status_message(description, last_description), end="") + last_description = description + + status = description["TrainingJobStatus"] + + if status in ("Completed", "Failed", "Stopped"): + print() + state = LogState.JOB_COMPLETE + + # Print prettified logs related to the status of SageMaker Debugger rules. + debug_rule_statuses = description.get("DebugRuleEvaluationStatuses", {}) + if ( + debug_rule_statuses + and _rule_statuses_changed(debug_rule_statuses, last_debug_rule_statuses) + and (log_type in {"All", "Rules"}) + ): + for status in debug_rule_statuses: + rule_log = ( + f"{status['RuleConfigurationName']}: {status['RuleEvaluationStatus']}" + ) + print(rule_log) + + last_debug_rule_statuses = debug_rule_statuses + + # Print prettified logs related to the status of SageMaker Profiler rules. + profiler_rule_statuses = description.get("ProfilerRuleEvaluationStatuses", {}) + if ( + profiler_rule_statuses + and _rule_statuses_changed(profiler_rule_statuses, last_profiler_rule_statuses) + and (log_type in {"All", "Rules"}) + ): + for status in profiler_rule_statuses: + rule_log = ( + f"{status['RuleConfigurationName']}: {status['RuleEvaluationStatus']}" + ) + print(rule_log) + + last_profiler_rule_statuses = profiler_rule_statuses + + if wait: + _check_job_status(job_name, description, "TrainingJobStatus") + if dot: + print() + # Customers are not billed for hardware provisioning, so billable time is less than + # total time + training_time = description.get("TrainingTimeInSeconds") + billable_time = description.get("BillableTimeInSeconds") + if training_time is not None: + print("Training seconds:", training_time * instance_count) + if billable_time is not None: + print("Billable seconds:", billable_time * instance_count) + if description.get("EnableManagedSpotTraining"): + saving = (1 - float(billable_time) / training_time) * 100 + print("Managed Spot Training savings: {:.1f}%".format(saving)) + return last_description + + +def _check_job_status(job, desc, status_key_name): + """Check to see if the job completed successfully. + + If not, construct and raise a exceptions. (UnexpectedStatusException). + + Args: + job (str): The name of the job to check. + desc (dict[str, str]): The result of ``describe_training_job()``. + status_key_name (str): Status key name to check for. + + Raises: + exceptions.CapacityError: If the training job fails with CapacityError. + exceptions.UnexpectedStatusException: If the training job fails. + """ + status = desc[status_key_name] + # If the status is capital case, then convert it to Camel case + status = _STATUS_CODE_TABLE.get(status, status) + + if status == "Stopped": + logger.warning( + "Job ended with status 'Stopped' rather than 'Completed'. " + "This could mean the job timed out or stopped early for some other reason: " + "Consider checking whether it completed as you expect." + ) + elif status != "Completed": + reason = desc.get("FailureReason", "(No reason provided)") + job_type = status_key_name.replace("JobStatus", " job") + troubleshooting = ( + "https://docs.aws.amazon.com/sagemaker/latest/dg/" + "sagemaker-python-sdk-troubleshooting.html" + ) + message = ( + "Error for {job_type} {job_name}: {status}. Reason: {reason}. " + "Check troubleshooting guide for common errors: {troubleshooting}" + ).format( + job_type=job_type, + job_name=job, + status=status, + reason=reason, + troubleshooting=troubleshooting, + ) + if "CapacityError" in str(reason): + raise exceptions.CapacityError( + message=message, + allowed_statuses=["Completed", "Stopped"], + actual_status=status, + ) + raise exceptions.UnexpectedStatusException( + message=message, + allowed_statuses=["Completed", "Stopped"], + actual_status=status, + ) + + +def _flush_log_streams( + stream_names, instance_count, client, log_group, job_name, positions, dot, color_wrap +): + """Placeholder docstring""" + if len(stream_names) < instance_count: + # Log streams are created whenever a container starts writing to stdout/err, so this list + # may be dynamic until we have a stream for every instance. + try: + streams = client.describe_log_streams( + logGroupName=log_group, + logStreamNamePrefix=job_name + "/", + orderBy="LogStreamName", + limit=min(instance_count, 50), + ) + stream_names = [s["logStreamName"] for s in streams["logStreams"]] + + while "nextToken" in streams: + streams = client.describe_log_streams( + logGroupName=log_group, + logStreamNamePrefix=job_name + "/", + orderBy="LogStreamName", + limit=50, + ) + + stream_names.extend([s["logStreamName"] for s in streams["logStreams"]]) + + positions.update( + [ + (s, sagemaker_logs.Position(timestamp=0, skip=0)) + for s in stream_names + if s not in positions + ] + ) + except ClientError as e: + # On the very first training job run on an account, there's no log group until + # the container starts logging, so ignore any errors thrown about that + err = e.response.get("Error", {}) + if err.get("Code", None) != "ResourceNotFoundException": + raise + + if len(stream_names) > 0: + if dot: + print("") + dot = False + for idx, event in sagemaker_logs.multi_stream_iter( + client, log_group, stream_names, positions + ): + color_wrap(idx, event["message"]) + ts, count = positions[stream_names[idx]] + if event["timestamp"] == ts: + positions[stream_names[idx]] = sagemaker_logs.Position(timestamp=ts, skip=count + 1) + else: + positions[stream_names[idx]] = sagemaker_logs.Position( + timestamp=event["timestamp"], skip=1 + ) + else: + dot = True + print(".", end="") + sys.stdout.flush() + + +def _rule_statuses_changed(current_statuses, last_statuses): + """Checks the rule evaluation statuses for SageMaker Debugger and Profiler rules.""" + if not last_statuses: + return True + + for current, last in zip(current_statuses, last_statuses): + if (current["RuleConfigurationName"] == last["RuleConfigurationName"]) and ( + current["RuleEvaluationStatus"] != last["RuleEvaluationStatus"] + ): + return True + + return False + + +def _get_initial_job_state(description, status_key, wait): + """Placeholder docstring""" + status = description[status_key] + job_already_completed = status in ("Completed", "Failed", "Stopped") + return LogState.TAILING if wait and not job_already_completed else LogState.COMPLETE + + +def _logs_init(boto_session, description, job): + """Placeholder docstring""" + if job == "Training": + if "InstanceGroups" in description["ResourceConfig"]: + instance_count = 0 + for instanceGroup in description["ResourceConfig"]["InstanceGroups"]: + instance_count += instanceGroup["InstanceCount"] + else: + instance_count = description["ResourceConfig"]["InstanceCount"] + elif job == "Transform": + instance_count = description["TransformResources"]["InstanceCount"] + elif job == "Processing": + instance_count = description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] + elif job == "AutoML": + instance_count = 0 + + stream_names = [] # The list of log streams + positions = {} # The current position in each stream, map of stream name -> position + + # Increase retries allowed (from default of 4), as we don't want waiting for a training job + # to be interrupted by a transient exception. + config = botocore.config.Config(retries={"max_attempts": 15}) + client = boto_session.client("logs", config=config) + log_group = "/aws/sagemaker/" + job + "Jobs" + + dot = False + + from sagemaker.core.logs import ColorWrap + + color_wrap = ColorWrap() + + return instance_count, stream_names, positions, client, log_group, dot, color_wrap diff --git a/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/bootstrap_runtime_environment.py b/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/bootstrap_runtime_environment.py index afe0f80012..f07e860cf9 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/bootstrap_runtime_environment.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/bootstrap_runtime_environment.py @@ -167,7 +167,10 @@ def _handle_pre_exec_scripts(script_file_dir: str): """ path_to_pre_exec_script = os.path.join(script_file_dir, PRE_EXECUTION_SCRIPT_NAME) - RuntimeEnvironmentManager().run_pre_exec_script(pre_exec_script_path=path_to_pre_exec_script) + if os.path.isfile(path_to_pre_exec_script): + RuntimeEnvironmentManager().run_pre_exec_script( + pre_exec_script_path=path_to_pre_exec_script + ) def _install_dependencies( @@ -599,4 +602,4 @@ def main(sys_args=None): if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file + main(sys.argv[1:]) diff --git a/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/mpi_utils_remote.py b/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/mpi_utils_remote.py index 79ddd4020b..c5d9f15ee2 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/mpi_utils_remote.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/mpi_utils_remote.py @@ -249,4 +249,4 @@ def main(sys_args=None): if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file + main(sys.argv[1:]) diff --git a/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/runtime_environment_manager.py b/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/runtime_environment_manager.py index f4d95f5412..5f00317c23 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/runtime_environment_manager.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/runtime_environment/runtime_environment_manager.py @@ -94,6 +94,50 @@ def from_dependency_file_path(dependency_file_path): class RuntimeEnvironmentManager: """Runtime Environment Manager class to manage runtime environment.""" + def _validate_path(self, path: str) -> str: + """Validate and sanitize file path to prevent path traversal attacks. + + Args: + path (str): The file path to validate + + Returns: + str: The validated absolute path + + Raises: + ValueError: If the path is invalid or contains suspicious patterns + """ + if not path: + raise ValueError("Path cannot be empty") + + # Get absolute path to prevent path traversal + abs_path = os.path.abspath(path) + + # Check for null bytes (common in path traversal attacks) + if '\x00' in path: + raise ValueError(f"Invalid path contains null byte: {path}") + + return abs_path + + def _validate_env_name(self, env_name: str) -> None: + """Validate conda environment name to prevent command injection. + + Args: + env_name (str): The environment name to validate + + Raises: + ValueError: If the environment name contains invalid characters + """ + if not env_name: + raise ValueError("Environment name cannot be empty") + + # Allow only alphanumeric, underscore, and hyphen + import re + if not re.match(r'^[a-zA-Z0-9_-]+$', env_name): + raise ValueError( + f"Invalid environment name '{env_name}'. " + "Only alphanumeric characters, underscores, and hyphens are allowed." + ) + def snapshot(self, dependencies: str = None) -> str: """Creates snapshot of the user's environment @@ -252,42 +296,77 @@ def _is_file_exists(self, dependencies): def _install_requirements_txt(self, local_path, python_executable): """Install requirements.txt file""" - cmd = f"{python_executable} -m pip install -r {local_path} -U" - logger.info("Running command: '%s' in the dir: '%s' ", cmd, os.getcwd()) + # Validate path to prevent command injection + validated_path = self._validate_path(local_path) + cmd = [python_executable, "-m", "pip", "install", "-r", validated_path, "-U"] + logger.info("Running command: '%s' in the dir: '%s' ", " ".join(cmd), os.getcwd()) _run_shell_cmd(cmd) - logger.info("Command %s ran successfully", cmd) + logger.info("Command %s ran successfully", " ".join(cmd)) def _create_conda_env(self, env_name, local_path): """Create conda env using conda yml file""" + # Validate inputs to prevent command injection + self._validate_env_name(env_name) + validated_path = self._validate_path(local_path) - cmd = f"{self._get_conda_exe()} env create -n {env_name} --file {local_path}" - logger.info("Creating conda environment %s using: %s.", env_name, cmd) + cmd = [self._get_conda_exe(), "env", "create", "-n", env_name, "--file", validated_path] + logger.info("Creating conda environment %s using: %s.", env_name, " ".join(cmd)) _run_shell_cmd(cmd) logger.info("Conda environment %s created successfully.", env_name) def _install_req_txt_in_conda_env(self, env_name, local_path): """Install requirements.txt in the given conda environment""" + # Validate inputs to prevent command injection + self._validate_env_name(env_name) + validated_path = self._validate_path(local_path) - cmd = f"{self._get_conda_exe()} run -n {env_name} pip install -r {local_path} -U" - logger.info("Activating conda env and installing requirements: %s", cmd) + cmd = [self._get_conda_exe(), "run", "-n", env_name, "pip", "install", "-r", validated_path, "-U"] + logger.info("Activating conda env and installing requirements: %s", " ".join(cmd)) _run_shell_cmd(cmd) logger.info("Requirements installed successfully in conda env %s", env_name) def _update_conda_env(self, env_name, local_path): """Update conda env using conda yml file""" + # Validate inputs to prevent command injection + self._validate_env_name(env_name) + validated_path = self._validate_path(local_path) - cmd = f"{self._get_conda_exe()} env update -n {env_name} --file {local_path}" - logger.info("Updating conda env: %s", cmd) + cmd = [self._get_conda_exe(), "env", "update", "-n", env_name, "--file", validated_path] + logger.info("Updating conda env: %s", " ".join(cmd)) _run_shell_cmd(cmd) logger.info("Conda env %s updated succesfully", env_name) def _export_conda_env_from_prefix(self, prefix, local_path): """Export the conda env to a conda yml file""" - - cmd = f"{self._get_conda_exe()} env export -p {prefix} --no-builds > {local_path}" - logger.info("Exporting conda environment: %s", cmd) - _run_shell_cmd(cmd) - logger.info("Conda environment %s exported successfully", prefix) + # Validate inputs to prevent command injection + validated_prefix = self._validate_path(prefix) + validated_path = self._validate_path(local_path) + + cmd = [self._get_conda_exe(), "env", "export", "-p", validated_prefix, "--no-builds"] + logger.info("Exporting conda environment: %s", " ".join(cmd)) + + # Capture output and write to file instead of using shell redirection + try: + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=False + ) + output, error_output = process.communicate() + return_code = process.wait() + + if return_code: + error_message = f"Encountered error while running command '{' '.join(cmd)}'. Reason: {error_output.decode('utf-8')}" + raise RuntimeEnvironmentError(error_message) + + # Write the captured output to the file + with open(validated_path, 'w') as f: + f.write(output.decode('utf-8')) + + logger.info("Conda environment %s exported successfully", validated_prefix) + except Exception as e: + raise RuntimeEnvironmentError(f"Failed to export conda environment: {str(e)}") def _write_conda_env_to_file(self, env_name): """Writes conda env to the text file""" @@ -330,6 +409,7 @@ def _current_sagemaker_pysdk_version(self): """Returns the current sagemaker python sdk version where program is running""" try: from importlib import metadata + return metadata.version("sagemaker") except Exception: return "3.0.0.dev0" # Development version fallback @@ -402,19 +482,26 @@ def _run_pre_execution_command_script(script_path: str): return return_code, error_logs -def _run_shell_cmd(cmd: str): +def _run_shell_cmd(cmd: list): """This method runs a given shell command using subprocess - Raises RuntimeEnvironmentError if the command fails + Args: + cmd (list): Command and arguments as a list (e.g., ['pip', 'install', '-r', 'requirements.txt']) + + Raises: + RuntimeEnvironmentError: If the command fails + ValueError: If cmd is not a list """ + if not isinstance(cmd, list): + raise ValueError("Command must be a list of arguments for security reasons") - process = subprocess.Popen((cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) _log_output(process) error_logs = _log_error(process) return_code = process.wait() if return_code: - error_message = f"Encountered error while running command '{cmd}'. Reason: {error_logs}" + error_message = f"Encountered error while running command '{' '.join(cmd)}'. Reason: {error_logs}" raise RuntimeEnvironmentError(error_message) @@ -464,4 +551,4 @@ class RuntimeEnvironmentError(Exception): def __init__(self, message): self.message = message - super().__init__(self.message) \ No newline at end of file + super().__init__(self.message) diff --git a/sagemaker-train/src/sagemaker/train/remote_function/spark_config.py b/sagemaker-train/src/sagemaker/train/remote_function/spark_config.py index b5083b0566..6b25d5da8b 100644 --- a/sagemaker-train/src/sagemaker/train/remote_function/spark_config.py +++ b/sagemaker-train/src/sagemaker/train/remote_function/spark_config.py @@ -10,21 +10,140 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -""" -DEPRECATED: This module has been moved to sagemaker.core.remote_function.spark_config - -This is a backward compatibility shim. -""" +"""This module is used to define the Spark job config to remote function.""" from __future__ import absolute_import -import warnings +from typing import Optional, List, Dict, Union +import attr +from urllib.parse import urlparse +from sagemaker.core.workflow import is_pipeline_variable + + +def _validate_configuration(instance, attribute, configuration): + # pylint: disable=unused-argument + """This is the helper method to validate the spark configuration""" + if configuration: + SparkConfigUtils.validate_configuration(configuration=configuration) + + +def _validate_s3_uri(instance, attribute, s3_uri): + # pylint: disable=unused-argument + """This is the helper method to validate the s3 uri""" + if s3_uri: + SparkConfigUtils.validate_s3_uri(s3_uri) + + +@attr.s(frozen=True) +class SparkConfig: + """This is the class to initialize the spark configurations for remote function + + Attributes: + submit_jars (Optional[List[str]]): A list which contains paths to the jars which + are going to be submitted to Spark job. The location can be a valid s3 uri or + local path to the jar. Defaults to ``None``. + submit_py_files (Optional[List[str]]): A list which contains paths to the python + files which are going to be submitted to Spark job. The location can be a + valid s3 uri or local path to the python file. Defaults to ``None``. + submit_files (Optional[List[str]]): A list which contains paths to the files which + are going to be submitted to Spark job. The location can be a valid s3 uri or + local path to the python file. Defaults to ``None``. + configuration (list[dict] or dict): Configuration for Hadoop, Spark, or Hive. + List or dictionary of EMR-style classifications. + https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html + spark_event_logs_s3_uri (str): S3 path where Spark application events will + be published to. + """ + + submit_jars: Optional[List[str]] = attr.ib(default=None) + submit_py_files: Optional[List[str]] = attr.ib(default=None) + submit_files: Optional[List[str]] = attr.ib(default=None) + configuration: Optional[Union[List[Dict], Dict]] = attr.ib( + default=None, validator=_validate_configuration + ) + spark_event_logs_uri: Optional[str] = attr.ib(default=None, validator=_validate_s3_uri) + + +class SparkConfigUtils: + """Util class for spark configurations""" + + _valid_configuration_keys = ["Classification", "Properties", "Configurations"] + _valid_configuration_classifications = [ + "core-site", + "hadoop-env", + "hadoop-log4j", + "hive-env", + "hive-log4j", + "hive-exec-log4j", + "hive-site", + "spark-defaults", + "spark-env", + "spark-log4j", + "spark-hive-site", + "spark-metrics", + "yarn-env", + "yarn-site", + "export", + ] + + @staticmethod + def validate_configuration(configuration: Dict): + """Validates the user-provided Hadoop/Spark/Hive configuration. + + This ensures that the list or dictionary the user provides will serialize to + JSON matching the schema of EMR's application configuration + + Args: + configuration (Dict): A dict that contains the configuration overrides to + the default values. For more information, please visit: + https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html + """ + emr_configure_apps_url = ( + "https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html" + ) + if isinstance(configuration, dict): + keys = configuration.keys() + if "Classification" not in keys or "Properties" not in keys: + raise ValueError( + f"Missing one or more required keys in configuration dictionary " + f"{configuration} Please see {emr_configure_apps_url} for more information" + ) + + for key in keys: + if key not in SparkConfigUtils._valid_configuration_keys: + raise ValueError( + f"Invalid key: {key}. " + f"Must be one of {SparkConfigUtils._valid_configuration_keys}. " + f"Please see {emr_configure_apps_url} for more information." + ) + if key == "Classification": + if ( + configuration[key] + not in SparkConfigUtils._valid_configuration_classifications + ): + raise ValueError( + f"Invalid classification: {key}. Must be one of " + f"{SparkConfigUtils._valid_configuration_classifications}" + ) + + if isinstance(configuration, list): + for item in configuration: + SparkConfigUtils.validate_configuration(item) + + # TODO (guoqioa@): method only checks urlparse scheme, need to perform deep s3 validation + @staticmethod + def validate_s3_uri(spark_output_s3_path): + """Validate whether the URI uses an S3 scheme. + + In the future, this validation will perform deeper S3 validation. -# Backward compatibility: re-export from core -from sagemaker.core.remote_function.spark_config import * # noqa: F401, F403 + Args: + spark_output_s3_path (str): The URI of the Spark output S3 Path. + """ + if is_pipeline_variable(spark_output_s3_path): + return -warnings.warn( - "sagemaker.train.remote_function.spark_config has been moved to sagemaker.core.remote_function.spark_config. " - "Please update your imports. This shim will be removed in a future version.", - DeprecationWarning, - stacklevel=2 -) + if urlparse(spark_output_s3_path).scheme != "s3": + raise ValueError( + f"Invalid s3 path: {spark_output_s3_path}. Please enter something like " + "s3://bucket-name/folder-name" + ) diff --git a/sagemaker-train/tests/unit/train/remote_function/test_invoke_function.py b/sagemaker-train/tests/unit/train/remote_function/test_invoke_function.py index 6beafc3d27..c98d815189 100644 --- a/sagemaker-train/tests/unit/train/remote_function/test_invoke_function.py +++ b/sagemaker-train/tests/unit/train/remote_function/test_invoke_function.py @@ -167,7 +167,7 @@ def test_loads_context_with_empty_property_references(self): class TestExecuteRemoteFunction: """Test _execute_remote_function function.""" - @patch("sagemaker.core.remote_function.core.stored_function.StoredFunction") + @patch("sagemaker.train.remote_function.core.stored_function.StoredFunction") def test_executes_without_run_context(self, mock_stored_function_class): """Test executes stored function without run context.""" mock_stored_func = MagicMock() @@ -194,7 +194,7 @@ def test_executes_without_run_context(self, mock_stored_function_class): mock_stored_func.load_and_invoke.assert_called_once() @patch("sagemaker.train.remote_function.invoke_function._load_run_object") - @patch("sagemaker.core.remote_function.core.stored_function.StoredFunction") + @patch("sagemaker.train.remote_function.core.stored_function.StoredFunction") def test_executes_with_run_context(self, mock_stored_function_class, mock_load_run): """Test executes stored function with run context.""" mock_stored_func = MagicMock()