from nbdev.showdoc import show_doc
transform
common transformations for LLM data
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def chat(**kwargs):
"A wrapper around `openai.ChatCompletion` that has automatic retries."
= os.environ['OPENAI_API_KEY']
client.api_key return client.chat.completions.create(**kwargs)
= '1863d76e-1462-489a-a8a7-e0404239fe47'
_tst_run_id
with _temp_env_var(tmp_env): #context manager that has specific environment vars for testing
= fetch_run_componets(_tst_run_id)
_inp, _out, _funcs
print(f"""first input:
{_inp[0]}
output:
{_out}
functions:
{_funcs}""")
first input:
{'role': 'system', 'content': "You are a helpful documentation Q&A assistant, trained to answer questions from LangSmith's documentation. LangChain is a framework for building applications using large language models.\nThe current time is 2023-09-05 16:49:07.308007.\n\nRelevant documents will be retrieved in the following messages."}
output:
{'role': 'assistant', 'content': "Currently, LangSmith does not support project migration between organizations. However, you can manually imitate this process by reading and writing runs and datasets using the SDK. Here's an example of exporting runs:\n\n1. Read the runs from the source organization using the SDK.\n2. Write the runs to the destination organization using the SDK.\n\nBy following this process, you can transfer your runs from one organization to another. However, it may be faster to create a new project within your destination organization and start fresh.\n\nIf you have any further questions or need assistance, please reach out to us at support@langchain.dev."}
functions:
[]
class RunData(BaseModel):
"Key components of a run from LangSmith"
dict]
inputs:List[dict
output:dict]
funcs:List[str
run_id:
@classmethod
def from_run_id(cls, run_id:str):
"Create a `RunData` object from a run id."
= fetch_run_componets(run_id)
inputs, output, funcs return cls(inputs=inputs, output=output, funcs=funcs, run_id=run_id)
def to_msg_dict(self):
"Transform the instance into a dict in the format that can be used for OpenAI fine-tuning."
= self.inputs + [self.output]
msgs return {"functions": self.funcs,
"messages": msgs}
def to_json(self):
"The json version of `to_msg_dict`."
return json.dumps(self.to_msg_dict())
@property
def outputs(self):
"Return outputs for langsmith Datasets compatibility."
return self.output
@property
def flat_input(self):
"The input to the LLM in markdown."
return self._flatten_data(self.inputs)
@property
def flat_output(self):
"The output of the LLM in markdown."
return self._flatten_data([self.output])
@classmethod
def _flatten_data(cls, data):
"Produce a flattened view of the data as human readable Markdown."
= ""
md_str for item in data:
# Heading
= item['role']
role if role == 'assistant' and 'function_call' in item:
+= ' - function call'
role if role == 'function':
+= ' - results'
role
+= f"### {role.title()}\n\n"
md_str
= item.get('content', '')
content if content: md_str += content + "\n"
elif 'function_call' in item:
= item['function_call']['name']
func_name = json.loads(item['function_call']['arguments'])
args = ', '.join([f"{k}={v}" for k, v in args.items()])
formatted_args += f"{func_name}({formatted_args})\n"
md_str += "\n"
md_str return md_str
RunData.from_run_id
RunData.from_run_id (run_id:str)
Create a RunData
object from a run id.
with _temp_env_var(tmp_env): #context manager that has specific environment vars for testing
= RunData.from_run_id(_tst_run_id)
rd
print(f'Run {rd.run_id} has {len(rd.inputs)} inputs.')
print(f'Run {rd.run_id} output:\n{rd.output}')
Run 1863d76e-1462-489a-a8a7-e0404239fe47 has 3 inputs.
Run 1863d76e-1462-489a-a8a7-e0404239fe47 output:
{'role': 'assistant', 'content': "Currently, LangSmith does not support project migration between organizations. However, you can manually imitate this process by reading and writing runs and datasets using the SDK. Here's an example of exporting runs:\n\n1. Read the runs from the source organization using the SDK.\n2. Write the runs to the destination organization using the SDK.\n\nBy following this process, you can transfer your runs from one organization to another. However, it may be faster to create a new project within your destination organization and start fresh.\n\nIf you have any further questions or need assistance, please reach out to us at support@langchain.dev."}
RunData.to_msg_dict
RunData.to_msg_dict ()
Transform the instance into a dict in the format that can be used for OpenAI fine-tuning.
'messages'][-2:] rd.to_msg_dict()[
[{'role': 'user',
'content': 'How do I move my project between organizations?'},
{'role': 'assistant',
'content': "Currently, LangSmith does not support project migration between organizations. However, you can manually imitate this process by reading and writing runs and datasets using the SDK. Here's an example of exporting runs:\n\n1. Read the runs from the source organization using the SDK.\n2. Write the runs to the destination organization using the SDK.\n\nBy following this process, you can transfer your runs from one organization to another. However, it may be faster to create a new project within your destination organization and start fresh.\n\nIf you have any further questions or need assistance, please reach out to us at support@langchain.dev."}]
RunData.to_json
RunData.to_json ()
The json version of to_msg_dict
.
100] rd.to_json()[:
'{"functions": [], "messages": [{"role": "system", "content": "You are a helpful documentation Q&A as'
The properties flat_input
and flat_output
allow you to view the input to the LLM and the output in a human readable format (markdown):
RunData.flat_input
RunData.flat_input ()
The input to the LLM in markdown.
print(rd.flat_input[:400])
### System
You are a helpful documentation Q&A assistant, trained to answer questions from LangSmith's documentation. LangChain is a framework for building applications using large language models.
The current time is 2023-09-05 16:49:07.308007.
Relevant documents will be retrieved in the following messages.
### System
Skip to main content
**🦜️🛠️ LangSmith Docs**Python DocsJS/TS Docs
Sear
RunData.flat_output
RunData.flat_output ()
The output of the LLM in markdown.
print(rd.flat_output)
### Assistant
Currently, LangSmith does not support project migration between organizations. However, you can manually imitate this process by reading and writing runs and datasets using the SDK. Here's an example of exporting runs:
1. Read the runs from the source organization using the SDK.
2. Write the runs to the destination organization using the SDK.
By following this process, you can transfer your runs from one organization to another. However, it may be faster to create a new project within your destination organization and start fresh.
If you have any further questions or need assistance, please reach out to us at support@langchain.dev.
Preparing .jsonl
files
OpenAI fine-tuning takes .jsonl
files.
= ['59080971-8786-4849-be88-898d3ffc2b45', '8cd7deed-9547-4a07-ac01-55e9513ca1cd']
_rids = [RunData.from_run_id(rid) for rid in _rids]
_tsfm_runs '_data/test_data.jsonl'); write_to_jsonl(_tsfm_runs,
It can save you time to validate jsonl files prior to uploading them.
'_data/test_data.jsonl') validate_jsonl(
Num examples: 2
No errors found