Auto Continue ExampleΒΆ

Continue cut-off responses automatically:

  1#!/usr/bin/env python
  2"""
  341 Auto Continue - Handle Long Responses
  4
  5Learn how to automatically continue responses that get cut off
  6due to max_tokens limits.
  7
  8Level: Expert Topic
  9"""
 10
 11from config_loader import get_chat_config, parse_args
 12
 13from lexilux import Chat
 14
 15
 16def main():
 17    """Demonstrate auto-continue functionality."""
 18    args = parse_args()
 19    try:
 20        config = get_chat_config(config_path=args.config)
 21    except (FileNotFoundError, KeyError) as e:
 22        print(f"Configuration error: {e}")
 23        print("\nUsing placeholder values. Please configure test_endpoints.json")
 24        config = {
 25            "base_url": "https://api.example.com/v1",
 26            "api_key": "your-api-key",
 27            "model": "gpt-4",
 28        }
 29
 30    chat = Chat(**config)
 31
 32    # Example 1: Simple auto continue
 33    print("=" * 50)
 34    print("Example 1: Auto Continue Truncated Response")
 35    print("=" * 50)
 36
 37    # Set a very low max_tokens to trigger truncation
 38    result = chat(
 39        "Write a detailed explanation of machine learning. "
 40        "Include at least 10 paragraphs.",
 41        max_tokens=50,  # Very small to force truncation
 42        auto_continue=True,
 43        max_continues=3,
 44    )
 45
 46    print("Response with auto-continue:")
 47    print(result.text)
 48    print(f"\nFinish reason: {result.finish_reason}")
 49    print(f"Total tokens: {result.usage.total_tokens}")
 50    print(f"Continues used: {result.metadata.get('continue_count', 0)}\n")
 51
 52    # Example 2: Manual continue
 53    print("=" * 50)
 54    print("Example 2: Manual Continue (more control)")
 55    print("=" * 50)
 56
 57    # First request
 58    result = chat(
 59        "Explain quantum computing in detail",
 60        max_tokens=50,
 61    )
 62
 63    print("First response:")
 64    print(result.text)
 65    print(f"Finish reason: {result.finish_reason}\n")
 66
 67    # If truncated, continue
 68    if result.finish_reason == "length":
 69        print("Response was truncated. Continuing...\n")
 70
 71        continued_result = chat.continue_chat(
 72            result,
 73            max_continues=2,
 74            max_tokens=50,
 75        )
 76
 77        print("Continued response:")
 78        print(continued_result.text)
 79        print(f"\nFinal finish reason: {continued_result.finish_reason}")
 80        print(f"Total tokens: {continued_result.usage.total_tokens}\n")
 81
 82    # Example 3: Streaming with continue
 83    print("=" * 50)
 84    print("Example 3: Streaming with Auto Continue")
 85    print("=" * 50)
 86
 87    print("Streaming response with auto-continue:\n")
 88
 89    for chunk in chat.stream(
 90        "Tell me everything about the history of computing",
 91        max_tokens=30,
 92        auto_continue=True,
 93        max_continues=2,
 94    ):
 95        if chunk.delta:
 96            print(chunk.delta, end="", flush=True)
 97
 98        if chunk.done:
 99            print(f"\n\nTotal tokens: {chunk.usage.total_tokens}")
100            print(f"Finish reason: {chunk.finish_reason}")
101
102            # Check if continues were used
103            continue_count = chunk.metadata.get("continue_count", 0)
104            if continue_count > 0:
105                print(f"Continues used: {continue_count}")
106    print()
107
108    # Example 4: Custom continue prompt
109    print("=" * 50)
110    print("Example 4: Custom Continue Prompt")
111    print("=" * 50)
112
113    result = chat(
114        "List 20 programming languages",
115        max_tokens=30,
116        auto_continue=True,
117        continue_prompt="Continue the list from where you stopped",
118        max_continues=3,
119    )
120
121    print("Response with custom continue prompt:")
122    print(result.text)
123    print(f"\nTokens: {result.usage.total_tokens}\n")
124
125    # Example 5: Ensure complete response
126    print("=" * 50)
127    print("Example 5: Ensure Complete Response")
128    print("=" * 50)
129
130    print("Ensuring response completes (raises error if still truncated):\n")
131
132    try:
133        result = chat(
134            "Write a very long essay about artificial intelligence",
135            max_tokens=50,
136            auto_continue=True,
137            max_continues=2,
138            ensure_complete=True,
139        )
140        print("Response completed successfully!")
141        print(f"Tokens: {result.usage.total_tokens}")
142    except Exception as e:
143        print(f"Could not complete response: {e}")
144        print("Try increasing max_continues or max_tokens")
145
146
147if __name__ == "__main__":
148    main()