Embedding ExampleΒΆ

Text embedding example:

  1#!/usr/bin/env python
  2"""
  320 Text Embedding - Convert Text to Numbers
  4
  5Learn how to convert text into vector embeddings.
  6Embeddings are useful for semantic search, similarity, and more.
  7
  8Level: Other APIs
  9"""
 10
 11from config_loader import get_chat_config, parse_args
 12
 13from lexilux import Embed
 14
 15
 16def cosine_similarity(a, b):
 17    """Calculate cosine similarity between two vectors."""
 18    import math
 19
 20    dot_product = sum(x * y for x, y in zip(a, b))
 21    magnitude_a = math.sqrt(sum(x * x for x in a))
 22    magnitude_b = math.sqrt(sum(y * y for y in b))
 23    return dot_product / (magnitude_a * magnitude_b)
 24
 25
 26def main():
 27    """Demonstrate text embedding."""
 28    args = parse_args()
 29
 30    # For embedding, we need to use the embed config
 31    try:
 32        config = get_chat_config(config_path=args.config)
 33        print(
 34            "Note: Using chat config for embedding. "
 35            "Configure 'embedding' section for better results."
 36        )
 37    except (FileNotFoundError, KeyError) as e:
 38        print(f"Configuration error: {e}")
 39        print("\nUsing placeholder values. Please configure test_endpoints.json")
 40        config = {
 41            "base_url": "https://api.example.com/v1",
 42            "api_key": "your-api-key",
 43            "model": "text-embedding-ada-002",
 44        }
 45
 46    embed = Embed(**config)
 47
 48    # Example 1: Single text embedding
 49    print("=" * 50)
 50    print("Example 1: Single Text Embedding")
 51    print("=" * 50)
 52
 53    text = "Hello, world!"
 54    result = embed(text)
 55
 56    print(f"Text: {text}")
 57    print(f"Vector dimension: {len(result.vectors)}")
 58    print(f"First 5 values: {result.vectors[:5]}")
 59    print(f"Tokens used: {result.usage.total_tokens}\n")
 60
 61    # Example 2: Batch embeddings
 62    print("=" * 50)
 63    print("Example 2: Batch Embeddings")
 64    print("=" * 50)
 65
 66    texts = [
 67        "The cat sits on the mat",
 68        "The dog plays in the park",
 69        "A feline is resting on a rug",
 70        "I love programming in Python",
 71    ]
 72
 73    result = embed(texts)
 74
 75    print(f"Embedded {len(texts)} texts")
 76    print(f"Each vector has {len(result.vectors[0])} dimensions")
 77    print(f"Total tokens used: {result.usage.total_tokens}\n")
 78
 79    # Example 3: Semantic similarity
 80    print("=" * 50)
 81    print("Example 3: Semantic Similarity")
 82    print("=" * 50)
 83
 84    # Get embeddings
 85    result = embed(texts)
 86    embeddings = result.vectors
 87
 88    # Calculate similarities
 89    print("Similarity matrix:")
 90    print("-" * 50)
 91    for i, text_i in enumerate(texts):
 92        for j, text_j in enumerate(texts):
 93            if i <= j:
 94                similarity = cosine_similarity(embeddings[i], embeddings[j])
 95                print(f"{similarity:.2f}".rjust(6), end=" ")
 96            else:
 97                print("     ".rjust(6), end=" ")
 98        print()
 99
100    print("\nText labels:")
101    for i, text in enumerate(texts):
102        print(f"  {i}: {text[:40]}...")
103
104    print("\nHigher values = more similar meaning!")
105    print("Notice how 'cat' and 'feline' are most similar.\n")
106
107
108if __name__ == "__main__":
109    main()