Multimodal ExampleΒΆ
Vision capabilities with image inputs:
1#!/usr/bin/env python
2"""
331 Multimodal - Text and Images Together
4
5Learn how to process images along with text.
6This enables vision capabilities like analyzing images, reading charts, etc.
7
8Level: Advanced Feature
9"""
10
11import base64
12from pathlib import Path
13
14from config_loader import get_chat_config, parse_args
15
16from lexilux import Chat, ImageContentBlock, TextContentBlock
17
18
19def encode_image(image_path: str) -> str:
20 """Encode an image file to base64."""
21 with open(image_path, "rb") as f:
22 return base64.b64encode(f.read()).decode("utf-8")
23
24
25def main():
26 """Demonstrate multimodal capabilities."""
27 args = parse_args()
28 try:
29 config = get_chat_config(config_path=args.config)
30 except (FileNotFoundError, KeyError) as e:
31 print(f"Configuration error: {e}")
32 print("\nUsing placeholder values. Please configure test_endpoints.json")
33 config = {
34 "base_url": "https://api.example.com/v1",
35 "api_key": "your-api-key",
36 "model": "gpt-4-vision-preview",
37 }
38
39 chat = Chat(**config)
40
41 # Example 1: Image from URL
42 print("=" * 50)
43 print("Example 1: Analyze Image from URL")
44 print("=" * 50)
45
46 messages = [
47 {
48 "role": "user",
49 "content": [
50 TextContentBlock(text="What's in this image? Describe it briefly."),
51 ImageContentBlock(
52 image_url={
53 "url": "https://raw.githubusercontent.com/python/python-logo/main/Python-logo-notext.svg"
54 }
55 ),
56 ],
57 }
58 ]
59
60 try:
61 result = chat(messages)
62 print(f"Response: {result.text}\n")
63 except Exception as e:
64 print("Note: This example requires a vision-enabled model.")
65 print(f"Error: {e}\n")
66
67 # Example 2: Local image (base64 encoded)
68 print("=" * 50)
69 print("Example 2: Analyze Local Image")
70 print("=" * 50)
71
72 # Check if we have a sample image
73 sample_images = [
74 "examples/sample_image.png",
75 "examples/sample_image.jpg",
76 "/tmp/sample_image.png",
77 ]
78
79 image_path = None
80 for path in sample_images:
81 if Path(path).exists():
82 image_path = path
83 break
84
85 if image_path:
86 base64_image = encode_image(image_path)
87
88 messages = [
89 {
90 "role": "user",
91 "content": [
92 TextContentBlock(text="Describe this image in detail."),
93 ImageContentBlock(
94 image_url={"url": f"data:image/png;base64,{base64_image}"}
95 ),
96 ],
97 }
98 ]
99
100 try:
101 result = chat(messages)
102 print(f"Response: {result.text}\n")
103 except Exception as e:
104 print(f"Error: {e}\n")
105 else:
106 print("No sample image found. To test local images:")
107 print(" 1. Place an image at examples/sample_image.png")
108 print(" 2. Or update the image_path variable\n")
109
110 # Example 3: Multiple images
111 print("=" * 50)
112 print("Example 3: Compare Multiple Images")
113 print("=" * 50)
114
115 messages = [
116 {
117 "role": "user",
118 "content": [
119 TextContentBlock(
120 text="What are the differences between these two images?"
121 ),
122 ImageContentBlock(
123 image_url={"url": "https://placehold.co/100x100/red/red"}
124 ),
125 ImageContentBlock(
126 image_url={"url": "https://placehold.co/100x100/blue/blue"}
127 ),
128 ],
129 }
130 ]
131
132 try:
133 result = chat(messages)
134 print(f"Response: {result.text}\n")
135 except Exception as e:
136 print("Note: This example requires network access and vision model.")
137 print(f"Error: {e}\n")
138
139 # Example 4: Image detail levels
140 print("=" * 50)
141 print("Example 4: Image Detail Levels")
142 print("=" * 50)
143
144 print("Different detail levels for different use cases:\n")
145
146 print("Low detail - faster, less detailed analysis:")
147 print(" ImageUrlDetail.LOW\n")
148
149 print("High detail - slower, more detailed analysis:")
150 print(" ImageUrlDetail.HIGH\n")
151
152 print("Auto detail - lets the model decide:")
153 print(" ImageUrlDetail.AUTO\n")
154
155 # Example usage
156 from lexilux import ImageUrlDetail
157
158 messages = [
159 {
160 "role": "user",
161 "content": [
162 TextContentBlock(text="Quick: what color is this?"),
163 ImageContentBlock(
164 image_url={
165 "url": "https://placehold.co/100x100/green/green",
166 "detail": ImageUrlDetail.LOW,
167 }
168 ),
169 ],
170 }
171 ]
172
173 try:
174 result = chat(messages)
175 print(f"\nWith LOW detail: {result.text}\n")
176 except Exception as e:
177 print(f"Error: {e}\n")
178
179 # Example 5: Reading text from images (OCR)
180 print("=" * 50)
181 print("Example 5: OCR - Reading Text from Images")
182 print("=" * 50)
183
184 messages = [
185 {
186 "role": "user",
187 "content": [
188 TextContentBlock(text="Read and transcribe any text in this image."),
189 ImageContentBlock(
190 image_url={
191 "url": "https://placehold.co/300x100/000000/FFF?text=Hello+World"
192 }
193 ),
194 ],
195 }
196 ]
197
198 try:
199 result = chat(messages)
200 print(f"Transcribed text: {result.text}\n")
201 except Exception as e:
202 print("Note: This requires a vision-enabled model.")
203 print(f"Error: {e}\n")
204
205
206if __name__ == "__main__":
207 main()