You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

23 lines
717 B
Python

import numpy as np
# 假设我们已经学习到了简化的二维词向量
embeddings = {
"king": np.array([0.9, 0.8]),
"queen": np.array([0.9, 0.2]),
"man": np.array([0.7, 0.9]),
"woman": np.array([0.7, 0.3])
}
def cosine_similarity(vec1, vec2):
dot_product = np.dot(vec1, vec2)
norm_product = np.linalg.norm(vec1) * np.linalg.norm(vec2)
return dot_product / norm_product
# king - man + woman
result_vec = embeddings["king"] - embeddings["man"] + embeddings["woman"]
# 计算结果向量与 "queen" 的相似度
sim = cosine_similarity(result_vec, embeddings["queen"])
print(f"king - man + woman 的结果向量: {result_vec}")
print(f"该结果与 'queen' 的相似度: {sim:.4f}")