Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions models/ref/sdk-coding-cheat-sheet.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ Select a card to view code examples in that category.
<Card title="Runs" href="/models/ref/sdk-coding-cheat-sheet/runs">
Code examples to initialize, manage, and fork W&B runs
</Card>
<Card title="Sweeps" href="/models/ref/sdk-coding-cheat-sheet/sweeps">
Code examples to configure, create, and run hyperparameter sweeps with W&B
</Card>
<Card title="Logging" href="/models/ref/sdk-coding-cheat-sheet/logging">
Code examples to log metrics, hyperparameters, tables, and custom data to W&B
</Card>
Expand Down
1 change: 1 addition & 0 deletions models/ref/sdk-coding-cheat-sheet/artifacts.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,4 @@ Create, update, download, and manage W&B Artifacts for data versioning.
## Given an existing artifact, update its description, metadata, and aliases without creating a new run

<ArtifactUpdateExisting />

1 change: 1 addition & 0 deletions models/ref/sdk-coding-cheat-sheet/logging.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@ Log metrics, hyperparameters, tables, and custom data to W&B.
## Log a table

<LogTable />

1 change: 1 addition & 0 deletions models/ref/sdk-coding-cheat-sheet/registry.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ Work with W&B Model Registry to organize and manage model versions.
## Remove a tag from a collection in a registry

<RegistryCollectionTagsRemove />

1 change: 1 addition & 0 deletions models/ref/sdk-coding-cheat-sheet/runs.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,4 @@ Initialize and manage W&B runs to organize your experiments and track your work.
## Add one or more tags to previously saved runs

<RunsUpdateTagPublicApi />

19 changes: 16 additions & 3 deletions snippets/CodeSnippet.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
* AUTO-GENERATED: Do not edit manually. Run sync_code_examples.sh to regenerate.
*/


// Import all MDX-wrapped code examples
import ArtifactAddAlias from '/snippets/_includes/code-examples/artifact_add_alias.mdx';
import ArtifactAddAliasExisting from '/snippets/_includes/code-examples/artifact_add_alias_existing.mdx';
Expand Down Expand Up @@ -61,6 +60,13 @@ import SweepConfig from '/snippets/_includes/code-examples/sweep_config.mdx';
import SweepCreate from '/snippets/_includes/code-examples/sweep_create.mdx';
import SweepInitialize from '/snippets/_includes/code-examples/sweep_initialize.mdx';
import SweepStart from '/snippets/_includes/code-examples/sweep_start.mdx';
import WeaveEvalBasic from '/snippets/_includes/code-examples/weave_eval_basic.mdx';
import WeaveExportMetrics from '/snippets/_includes/code-examples/weave_export_metrics.mdx';
import WeavePublishDataset from '/snippets/_includes/code-examples/weave_publish_dataset.mdx';
import WeaveScoringFunction from '/snippets/_includes/code-examples/weave_scoring_function.mdx';
import WeaveTraceCall from '/snippets/_includes/code-examples/weave_trace_call.mdx';
import WeaveTraceImages from '/snippets/_includes/code-examples/weave_trace_images.mdx';
import WeaveTraceOp from '/snippets/_includes/code-examples/weave_trace_op.mdx';

// Map filenames to imported content
const snippets = {
Expand Down Expand Up @@ -111,19 +117,26 @@ const snippets = {
'sweep_create.py': SweepCreate,
'sweep_initialize.py': SweepInitialize,
'sweep_start.py': SweepStart,
'weave_eval_basic.py': WeaveEvalBasic,
'weave_export_metrics.py': WeaveExportMetrics,
'weave_publish_dataset.py': WeavePublishDataset,
'weave_scoring_function.py': WeaveScoringFunction,
'weave_trace_call.py': WeaveTraceCall,
'weave_trace_images.py': WeaveTraceImages,
'weave_trace_op.py': WeaveTraceOp,
};

export const CodeSnippet = ({ file }) => {
const Component = snippets[file];

if (!Component) {
return (
<div style={{ padding: '1rem', background: '#fee', border: '1px solid #fcc', borderRadius: '4px' }}>
<p style={{ margin: 0, color: '#c00' }}>Code snippet not found: {file}</p>
</div>
);
}

return <Component />;
};

Expand Down
71 changes: 71 additions & 0 deletions snippets/_includes/code-examples/weave_eval_basic.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
```python
'''
Create a basic Weave evaluation pipeline for scoring responses from a model.
'''
import json
import asyncio
import openai
import weave
from weave.scorers import MultiTaskBinaryClassificationF1

# Initialize Weave once
weave.init('your-team-name/your-project-name')

# Define Model
class ExtractFruitsModel(weave.Model):
model_name: str
prompt_template: str

@weave.op()
async def predict(self, sentence: str) -> dict:
client = openai.AsyncClient()
response = await client.chat.completions.create(
model=self.model_name,
messages=[{"role": "user", "content": self.prompt_template.format(sentence=sentence)}],
)
result = response.choices[0].message.content
if result is None:
raise ValueError("No response from model")
return json.loads(result)

# Instantiate model
model = ExtractFruitsModel(
model_name='gpt-3.5-turbo-1106',
prompt_template='Extract fields ("fruit": <str>, "color": <str>, "flavor": <str>) from the following text, as json: {sentence}'
)

# Create and publish dataset
sentences = ["There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.",
"Pounits are a bright green color and are more savory than sweet.",
"Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them."]
labels = [
{'fruit': 'neoskizzles', 'color': 'purple', 'flavor': 'candy'},
{'fruit': 'pounits', 'color': 'bright green', 'flavor': 'savory'},
{'fruit': 'glowls', 'color': 'pale orange', 'flavor': 'sour and bitter'}
]
examples = [
{'id': '0', 'sentence': sentences[0], 'target': labels[0]},
{'id': '1', 'sentence': sentences[1], 'target': labels[1]},
{'id': '2', 'sentence': sentences[2], 'target': labels[2]}
]

dataset = weave.Dataset(name='fruits', rows=examples)
weave.publish(dataset)

# Define a scoring function
@weave.op()
def fruit_name_score(target: dict, output: dict) -> dict:
return {'correct': target['fruit'] == output['fruit']}

# Run the evaluation
evaluation = weave.Evaluation(
name='fruit_eval',
dataset=dataset,
scorers=[
MultiTaskBinaryClassificationF1(class_names=["fruit", "color", "flavor"]),
fruit_name_score
],
)

print(asyncio.run(evaluation.evaluate(model)))
```
69 changes: 69 additions & 0 deletions snippets/_includes/code-examples/weave_eval_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
'''
Create a basic Weave evaluation pipeline for scoring responses from a model.
'''
import json
import asyncio
import openai
import weave
from weave.scorers import MultiTaskBinaryClassificationF1

# Initialize Weave once
weave.init('your-team-name/your-project-name')

# Define Model
class ExtractFruitsModel(weave.Model):
model_name: str
prompt_template: str

@weave.op()
async def predict(self, sentence: str) -> dict:
client = openai.AsyncClient()
response = await client.chat.completions.create(
model=self.model_name,
messages=[{"role": "user", "content": self.prompt_template.format(sentence=sentence)}],
)
result = response.choices[0].message.content
if result is None:
raise ValueError("No response from model")
return json.loads(result)

# Instantiate model
model = ExtractFruitsModel(
model_name='gpt-3.5-turbo-1106',
prompt_template='Extract fields ("fruit": <str>, "color": <str>, "flavor": <str>) from the following text, as json: {sentence}'
)

# Create and publish dataset
sentences = ["There are many fruits that were found on the recently discovered planet Goocrux. There are neoskizzles that grow there, which are purple and taste like candy.",
"Pounits are a bright green color and are more savory than sweet.",
"Finally, there are fruits called glowls, which have a very sour and bitter taste which is acidic and caustic, and a pale orange tinge to them."]
labels = [
{'fruit': 'neoskizzles', 'color': 'purple', 'flavor': 'candy'},
{'fruit': 'pounits', 'color': 'bright green', 'flavor': 'savory'},
{'fruit': 'glowls', 'color': 'pale orange', 'flavor': 'sour and bitter'}
]
examples = [
{'id': '0', 'sentence': sentences[0], 'target': labels[0]},
{'id': '1', 'sentence': sentences[1], 'target': labels[1]},
{'id': '2', 'sentence': sentences[2], 'target': labels[2]}
]

dataset = weave.Dataset(name='fruits', rows=examples)
weave.publish(dataset)

# Define a scoring function
@weave.op()
def fruit_name_score(target: dict, output: dict) -> dict:
return {'correct': target['fruit'] == output['fruit']}

# Run the evaluation
evaluation = weave.Evaluation(
name='fruit_eval',
dataset=dataset,
scorers=[
MultiTaskBinaryClassificationF1(class_names=["fruit", "color", "flavor"]),
fruit_name_score
],
)

print(asyncio.run(evaluation.evaluate(model)))
42 changes: 42 additions & 0 deletions snippets/_includes/code-examples/weave_export_metrics.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
```python
'''
Retrieve metrics about your calls
'''

import requests
import json
import os

# Weave API URL
url = "https://trace.wandb.ai/calls/stats"

# Configure the types of metrics to retrieve for a specified time range
payload = {
"project_id": "<your-team-name/your-project-name>",
# Specify time range
"start": "2026-03-01T00:00:00Z",
"end": "2026-03-10T00:00:00Z",
# Specify the size of the buckets, in seconds.
"granularity": 86400,
"filter": {
"trace_roots_only": True,
"op_names": ["web_app"]
},
# Specify metrics and their aggregate function
"usage_metrics": [
{"metric": "total_tokens", "aggregations": ["sum"]},
{"metric": "total_cost", "aggregations": ["sum"]}
],
"call_metrics": [
{"metric": "call_count", "aggregations": ["sum"]},
{"metric": "error_count", "aggregations": ["sum"]},
{"metric": "latency_ms", "aggregations": ["avg", "min", "max"], "percentiles": [50, 95, 99]}
]
}

API_KEY = os.getenv("WANDB_API_KEY")

response = requests.post(url, json=payload, auth=("api", API_KEY))

print(json.dumps(response.json(), indent=2))
```
40 changes: 40 additions & 0 deletions snippets/_includes/code-examples/weave_export_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
'''
Retrieve metrics about your calls
'''

import requests
import json
import os

# Weave API URL
url = "https://trace.wandb.ai/calls/stats"

# Configure the types of metrics to retrieve for a specified time range
payload = {
"project_id": "<your-team-name/your-project-name>",
# Specify time range
"start": "2026-03-01T00:00:00Z",
"end": "2026-03-10T00:00:00Z",
# Specify the size of the buckets, in seconds.
"granularity": 86400,
"filter": {
"trace_roots_only": True,
"op_names": ["web_app"]
},
# Specify metrics and their aggregate function
"usage_metrics": [
{"metric": "total_tokens", "aggregations": ["sum"]},
{"metric": "total_cost", "aggregations": ["sum"]}
],
"call_metrics": [
{"metric": "call_count", "aggregations": ["sum"]},
{"metric": "error_count", "aggregations": ["sum"]},
{"metric": "latency_ms", "aggregations": ["avg", "min", "max"], "percentiles": [50, 95, 99]}
]
}

API_KEY = os.getenv("WANDB_API_KEY")

response = requests.post(url, json=payload, auth=("api", API_KEY))

print(json.dumps(response.json(), indent=2))
29 changes: 29 additions & 0 deletions snippets/_includes/code-examples/weave_publish_dataset.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
```python
'''
Publish a dataset to Weave
'''

import weave
from weave import Dataset
# Initialize Weave
weave.init('<your-team-name>/<your-project-name>')

# Create a dataset
dataset = Dataset(
name='grammar',
rows=[
{'id': '0', 'sentence': "He no likes ice cream.", 'correction': "He doesn't like ice cream."},
{'id': '1', 'sentence': "She goed to the store.", 'correction': "She went to the store."},
{'id': '2', 'sentence': "They plays video games all day.", 'correction': "They play video games all day."}
]
)

# Publish the dataset
weave.publish(dataset)

# Retrieve the dataset
dataset_ref = weave.ref('grammar').get()

# Access a specific example in the dataset
example_label = dataset_ref.rows[2]['sentence']
```
27 changes: 27 additions & 0 deletions snippets/_includes/code-examples/weave_publish_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
'''
Publish a dataset to Weave
'''

import weave
from weave import Dataset
# Initialize Weave
weave.init('<your-team-name>/<your-project-name>')

# Create a dataset
dataset = Dataset(
name='grammar',
rows=[
{'id': '0', 'sentence': "He no likes ice cream.", 'correction': "He doesn't like ice cream."},
{'id': '1', 'sentence': "She goed to the store.", 'correction': "She went to the store."},
{'id': '2', 'sentence': "They plays video games all day.", 'correction': "They play video games all day."}
]
)

# Publish the dataset
weave.publish(dataset)

# Retrieve the dataset
dataset_ref = weave.ref('grammar').get()

# Access a specific example in the dataset
example_label = dataset_ref.rows[2]['sentence']
20 changes: 20 additions & 0 deletions snippets/_includes/code-examples/weave_scoring_function.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
```python
'''
Define custom scoring function
'''

import weave

# Collect your examples
examples = [
{"question": "What is the capital of France?", "expected": "Paris"},
{"question": "Who wrote 'To Kill a Mockingbird'?", "expected": "Harper Lee"},
{"question": "What is the square root of 64?", "expected": "8"},
]

# Define any custom scoring function
@weave.op()
def match_score1(expected: str, output: dict) -> dict:
# Define the logic to score the output
return {'match': expected == output['generated_text']}
```
Loading
Loading