Embedding ExampleΒΆ
Text embedding example:
1#!/usr/bin/env python
2"""
320 Text Embedding - Convert Text to Numbers
4
5Learn how to convert text into vector embeddings.
6Embeddings are useful for semantic search, similarity, and more.
7
8Level: Other APIs
9"""
10
11from config_loader import get_chat_config, parse_args
12
13from lexilux import Embed
14
15
16def cosine_similarity(a, b):
17 """Calculate cosine similarity between two vectors."""
18 import math
19
20 dot_product = sum(x * y for x, y in zip(a, b))
21 magnitude_a = math.sqrt(sum(x * x for x in a))
22 magnitude_b = math.sqrt(sum(y * y for y in b))
23 return dot_product / (magnitude_a * magnitude_b)
24
25
26def main():
27 """Demonstrate text embedding."""
28 args = parse_args()
29
30 # For embedding, we need to use the embed config
31 try:
32 config = get_chat_config(config_path=args.config)
33 print(
34 "Note: Using chat config for embedding. "
35 "Configure 'embedding' section for better results."
36 )
37 except (FileNotFoundError, KeyError) as e:
38 print(f"Configuration error: {e}")
39 print("\nUsing placeholder values. Please configure test_endpoints.json")
40 config = {
41 "base_url": "https://api.example.com/v1",
42 "api_key": "your-api-key",
43 "model": "text-embedding-ada-002",
44 }
45
46 embed = Embed(**config)
47
48 # Example 1: Single text embedding
49 print("=" * 50)
50 print("Example 1: Single Text Embedding")
51 print("=" * 50)
52
53 text = "Hello, world!"
54 result = embed(text)
55
56 print(f"Text: {text}")
57 print(f"Vector dimension: {len(result.vectors)}")
58 print(f"First 5 values: {result.vectors[:5]}")
59 print(f"Tokens used: {result.usage.total_tokens}\n")
60
61 # Example 2: Batch embeddings
62 print("=" * 50)
63 print("Example 2: Batch Embeddings")
64 print("=" * 50)
65
66 texts = [
67 "The cat sits on the mat",
68 "The dog plays in the park",
69 "A feline is resting on a rug",
70 "I love programming in Python",
71 ]
72
73 result = embed(texts)
74
75 print(f"Embedded {len(texts)} texts")
76 print(f"Each vector has {len(result.vectors[0])} dimensions")
77 print(f"Total tokens used: {result.usage.total_tokens}\n")
78
79 # Example 3: Semantic similarity
80 print("=" * 50)
81 print("Example 3: Semantic Similarity")
82 print("=" * 50)
83
84 # Get embeddings
85 result = embed(texts)
86 embeddings = result.vectors
87
88 # Calculate similarities
89 print("Similarity matrix:")
90 print("-" * 50)
91 for i, text_i in enumerate(texts):
92 for j, text_j in enumerate(texts):
93 if i <= j:
94 similarity = cosine_similarity(embeddings[i], embeddings[j])
95 print(f"{similarity:.2f}".rjust(6), end=" ")
96 else:
97 print(" ".rjust(6), end=" ")
98 print()
99
100 print("\nText labels:")
101 for i, text in enumerate(texts):
102 print(f" {i}: {text[:40]}...")
103
104 print("\nHigher values = more similar meaning!")
105 print("Notice how 'cat' and 'feline' are most similar.\n")
106
107
108if __name__ == "__main__":
109 main()