Advanced Claude API Cheat Sheet

Advanced Claude API cheat sheet covering function calling, streaming responses, prompt caching, vision capabilities, and best practices for building with Anthropic's Claude AI. Updated December 2025.

Last Updated: December 24, 2025

Basic API Setup

pip install anthropic
Install Python SDK
npm install @anthropic-ai/sdk
Install Node.js SDK
export ANTHROPIC_API_KEY='your_key_here'
Set API key as environment variable
import anthropic

client = anthropic.Anthropic(
    api_key="your_api_key_here"
)

message = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    messages=[
        {"role": "user", "content": "Hello, Claude"}
    ]
)
print(message.content)

Available Models

Model ID Best For
Claude Opus 4.5 claude-opus-4-5-20251101 Complex tasks, maximum intelligence
Claude Sonnet 4.5 claude-sonnet-4-5-20250929 Balanced performance, most tasks
Claude Haiku 3.5 claude-3-5-haiku-20241022 Fast responses, simple tasks

Function Calling (Tool Use)

tools = [
    {
        "name": "get_weather",
        "description": "Get current weather for a location",
        "input_schema": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "City name"
                },
                "unit": {
                    "type": "string",
                    "enum": ["celsius", "fahrenheit"],
                    "description": "Temperature unit"
                }
            },
            "required": ["location"]
        }
    }
]

message = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    tools=tools,
    messages=[
        {"role": "user", "content": "What's the weather in San Francisco?"}
    ]
)

# Handle tool use response
if message.stop_reason == "tool_use":
    for content in message.content:
        if content.type == "tool_use":
            tool_name = content.name
            tool_input = content.input
            # Execute your function here
            result = get_weather(**tool_input)

            # Send result back to Claude
            response = client.messages.create(
                model="claude-sonnet-4-20250514",
                max_tokens=1024,
                tools=tools,
                messages=[
                    {"role": "user", "content": "What's the weather?"},
                    {"role": "assistant", "content": message.content},
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "tool_result",
                                "tool_use_id": content.id,
                                "content": str(result)
                            }
                        ]
                    }
                ]
            )

Streaming Responses

with client.messages.stream(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    messages=[
        {"role": "user", "content": "Write a story"}
    ]
) as stream:
    for text in stream.text_stream:
        print(text, end="", flush=True)

# With event handling
with client.messages.stream(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    messages=[{"role": "user", "content": "Hello"}]
) as stream:
    for event in stream:
        if event.type == "content_block_start":
            print("Content started")
        elif event.type == "content_block_delta":
            print(event.delta.text, end="")
        elif event.type == "message_stop":
            print("\nDone")

Prompt Caching

cache_control: {"type": "ephemeral"}
Cache content blocks for 5 minutes
message = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    system=[
        {
            "type": "text",
            "text": "You are an AI assistant with expertise in Python.",
            "cache_control": {"type": "ephemeral"}
        }
    ],
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": large_code_base,
                    "cache_control": {"type": "ephemeral"}
                },
                {
                    "type": "text",
                    "text": "Explain this code"
                }
            ]
        }
    ]
)

# Cache hits reduce cost by 90% and latency significantly

Vision Capabilities

import base64

with open("image.jpg", "rb") as image_file:
    image_data = base64.standard_b64encode(image_file.read()).decode("utf-8")

message = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": image_data
                    }
                },
                {
                    "type": "text",
                    "text": "Describe this image"
                }
            ]
        }
    ]
)

# Supported formats: JPEG, PNG, GIF, WebP
# Max size: 5MB per image

System Prompts & Context

message = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    system=[
        {
            "type": "text",
            "text": "You are a helpful AI coding assistant.",
        },
        {
            "type": "text",
            "text": "Always provide code examples with explanations.",
            "cache_control": {"type": "ephemeral"}
        }
    ],
    messages=[
        {"role": "user", "content": "Help me with Python"}
    ]
)

# System prompts strongly influence Claude's behavior

Message History & Multi-turn Conversations

conversation = [
    {"role": "user", "content": "What's 2+2?"},
    {"role": "assistant", "content": "2+2 equals 4."},
    {"role": "user", "content": "What about 3+3?"}
]

message = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    messages=conversation
)

# Add response to conversation
conversation.append({
    "role": "assistant",
    "content": message.content[0].text
})

Temperature & Sampling Parameters

Parameter Range Description
temperature 0.0 - 1.0 Randomness (0 = focused, 1 = creative)
top_p 0.0 - 1.0 Nucleus sampling threshold
top_k 1 - 500 Limit token selection to top K
message = client.messages.create(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    temperature=0.7,
    top_p=0.9,
    messages=[{"role": "user", "content": "Be creative"}]
)

Error Handling

from anthropic import (
    APIError,
    RateLimitError,
    APIConnectionError,
    AuthenticationError
)

try:
    message = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1024,
        messages=[{"role": "user", "content": "Hello"}]
    )
except RateLimitError:
    print("Rate limit exceeded, wait before retrying")
except AuthenticationError:
    print("Invalid API key")
except APIConnectionError:
    print("Network connection failed")
except APIError as e:
    print(f"API error: {e}")

Best Practices

Use cache_control for repeated context
Reduce costs by 90% on cached content
Stream for long responses
Improve user experience with real-time output
Set max_tokens appropriately
Avoid unnecessary costs and timeouts
Use system prompts for consistency
Define behavior once, apply to all messages
Implement exponential backoff
Handle rate limits gracefully
Use temperature=0 for deterministic output
Get consistent results for same inputs

Pricing Optimization

Strategy Savings
Prompt caching (cache hits) 90% on cached tokens
Use Haiku for simple tasks 10x cheaper than Opus
Reduce max_tokens Pay only for what you need
Batch requests Reduce API overhead
💡 Pro Tip: Use prompt caching for large codebases, documentation, or any repeated context. Cache the system prompt and large context blocks, then vary only the user questions. This can reduce costs by 90% and improve response times significantly.
← Back to Data Science & ML | Browse all categories | View all cheat sheets