Streaming ExampleΒΆ

Streaming chat completion:

 1#!/usr/bin/env python
 2"""
 310 Streaming - Real-time Response Generation
 4
 5Learn how to receive responses in real-time as they're generated.
 6Streaming makes your application feel faster and more responsive.
 7
 8Level: Core Feature
 9"""
10
11from config_loader import get_chat_config, parse_args
12
13from lexilux import Chat
14
15
16def main():
17    """Demonstrate streaming chat completions."""
18    args = parse_args()
19    try:
20        config = get_chat_config(config_path=args.config)
21    except (FileNotFoundError, KeyError) as e:
22        print(f"Configuration error: {e}")
23        print("\nUsing placeholder values. Please configure test_endpoints.json")
24        config = {
25            "base_url": "https://api.example.com/v1",
26            "api_key": "your-api-key",
27            "model": "gpt-4",
28        }
29
30    chat = Chat(**config)
31
32    # Example 1: Basic streaming
33    print("=" * 50)
34    print("Example 1: Basic Streaming")
35    print("=" * 50)
36    print("Response: ", end="", flush=True)
37
38    for chunk in chat.stream("Tell me a short joke about programming"):
39        print(chunk.delta, end="", flush=True)
40        if chunk.done:
41            print(f"\n\nTokens used: {chunk.usage.total_tokens}")
42    print()
43
44    # Example 2: Streaming with more control
45    print("=" * 50)
46    print("Example 2: Streaming with Full Control")
47    print("=" * 50)
48
49    full_response = ""
50    for chunk in chat.stream("Count from 1 to 5"):
51        if chunk.delta:
52            full_response += chunk.delta
53            print(chunk.delta, end="", flush=True)
54
55        # Check if we're done
56        if chunk.done:
57            print(f"\n\nComplete response: {full_response}")
58            print(f"Input tokens: {chunk.usage.input_tokens}")
59            print(f"Output tokens: {chunk.usage.output_tokens}")
60            print(f"Total tokens: {chunk.usage.total_tokens}")
61            print(f"Finish reason: {chunk.finish_reason}")
62    print()
63
64    # Example 3: Streaming for long content
65    print("=" * 50)
66    print("Example 3: Streaming Long Content")
67    print("=" * 50)
68    print("Ask a question that requires a longer answer...\n")
69
70    question = "What are the main differences between Python and JavaScript?"
71    for chunk in chat.stream(question):
72        if chunk.delta:
73            print(chunk.delta, end="", flush=True)
74        if chunk.done:
75            print(f"\n\n(Total tokens: {chunk.usage.total_tokens})")
76
77
78if __name__ == "__main__":
79    main()