Skip to main content

1. Configure Sagemaker Endpoint

Configure your Sagemaker endpoint in the Freeplay app and set your prompt to use it https://docs.freeplay.ai/model-management

2. Configure your Freeplay Client

3. Configure your Sagemaker Client

Configure your Sagemaker client using your AWS access keys

4. Fetch and Format your Prompt

Fetch your prompt from Freeplay, formatting it with your input variables

5. Call your Sagemaker Endpoint

Call your sagemaker endpoint directly using the formatted prompt object to key off needed parameters

6. Record to Freeplay

Record the interaction back to freeplay!

Examples

from freeplay import Freeplay, RecordPayload, TestRunInfo, CallInfo, ResponseInfo
import boto3
import os
from dotenv import load_dotenv
import time
import json

load_dotenv("../.env")
project_id = os.getenv("FREEPLAY_PROJECT_ID")
freeplay_key = os.getenv("FREEPLAY_KEY")
freeplay_url = os.getenv("FREEPLAY_URL")
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_key = os.getenv("AWS_SECRET_KEY")

fpClient = Freeplay(
    api_base=freeplay_url,
    freeplay_api_key=freeplay_key
)

sagemakerClient = boto3.client(
    'sagemaker-runtime',
    region_name='us-east-1',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_key
)

# fetch the prompt
prompt_vars = {
    "pop_star": "Taylor Swift",
}
formatted_prompt = fpClient.prompts.get_formatted(
    project_id=project_id,
    template_name="album_bot",
    environment="latest",
    variables=prompt_vars
)

endpoint_name = formatted_prompt.prompt_info.provider_info['endpoint_name']
inference_component_name = formatted_prompt.prompt_info.provider_info['inference_component_name']

payload_body = {
    "inputs": formatted_prompt.llm_prompt_text,
    "parameters": formatted_prompt.prompt_info.model_parameters
}

# make the llm call
start = time.time()
response = sagemakerClient.invoke_endpoint(
    EndpointName=endpoint_name,
    InferenceComponentName=inference_component_name,
    ContentType='application/json',
    Accept='application/json',
    Body=json.dumps(payload_body)
)
end = time.time()

response_content = json.loads(response['Body'].read().decode('utf-8'))['generated_text']
print("response_content: ", response_content)

all_messages = formatted_prompt.all_messages(
    {"role": "assistant",
    "content": response_content}
)

session = fpClient.sessions.create()

fpClient.recordings.create(
  RecordPayload(
				project_id=project_id,
        all_messages=all_messages,
        inputs=prompt_vars,
        session_info=session,
        prompt_version_info=formatted_prompt.prompt_info,
        call_info=CallInfo.from_prompt_info(formatted_prompt.prompt_info, start_time=start, end_time=end),
        response_info=ResponseInfo(is_complete=True)
    )
)