import os
import time
from openai import OpenAI
from freeplay import Freeplay, RecordPayload, ResponseInfo, CallInfo
fp_client = Freeplay(freeplay_api_key=os.environ['FREEPLAY_API_KEY'])
openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
project_id = os.environ['FREEPLAY_PROJECT_ID']
template_prompt = fp_client.prompts.get(
project_id=project_id,
template_name='your-prompt',
environment='latest'
)
test_run = fp_client.test_runs.create(
project_id,
"Name of your dataset",
include_outputs=True,
name=f'My Example Test Run',
description='Run from examples',
flavor_name=template_prompt.prompt_info.flavor_name
)
for test_case in test_run.test_cases:
formatted_prompt = template_prompt.bind(test_case.variables, history=test_case.history).format()
start = time.time()
completion = openai_client.chat.completions.create(
messages=formatted_prompt.llm_prompt,
model=formatted_prompt.prompt_info.model,
tools=formatted_prompt.tool_schema,
**formatted_prompt.prompt_info.model_parameters
)
end = time.time()
session = fp_client.sessions.create()
all_messages = formatted_prompt.all_messages(completion.choices[0].message)
# Handle tool call and append its result to all_messages.
# Look at OpenAI Recipe: https://docs.freeplay.ai/developer-resources/recipes/using-tools-with-openai
# Anthropic: https://docs.freeplay.ai/developer-resources/recipes/using-tools-with-anthropic
fp_client.recordings.create(
RecordPayload(
project_id=project_id,
all_messages=all_messages,
tool_schema=formatted_prompt.tool_schema,
session_info=session.session_info,
inputs=test_case.variables,
prompt_version_info=formatted_prompt.prompt_info,
call_info=CallInfo.from_prompt_info(formatted_prompt.prompt_info, start, end),
response_info=ResponseInfo(is_complete=completion.choices[0].finish_reason == 'stop'),
test_run_info=test_run.get_test_run_info(test_case.id),
eval_results={
'f1-score': 0.48,
'is_non_empty': True
}
)
)