You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
23 lines
717 B
Python
23 lines
717 B
Python
|
6 months ago
|
import numpy as np
|
||
|
|
|
||
|
|
# 假设我们已经学习到了简化的二维词向量
|
||
|
|
embeddings = {
|
||
|
|
"king": np.array([0.9, 0.8]),
|
||
|
|
"queen": np.array([0.9, 0.2]),
|
||
|
|
"man": np.array([0.7, 0.9]),
|
||
|
|
"woman": np.array([0.7, 0.3])
|
||
|
|
}
|
||
|
|
|
||
|
|
def cosine_similarity(vec1, vec2):
|
||
|
|
dot_product = np.dot(vec1, vec2)
|
||
|
|
norm_product = np.linalg.norm(vec1) * np.linalg.norm(vec2)
|
||
|
|
return dot_product / norm_product
|
||
|
|
|
||
|
|
# king - man + woman
|
||
|
|
result_vec = embeddings["king"] - embeddings["man"] + embeddings["woman"]
|
||
|
|
|
||
|
|
# 计算结果向量与 "queen" 的相似度
|
||
|
|
sim = cosine_similarity(result_vec, embeddings["queen"])
|
||
|
|
|
||
|
|
print(f"king - man + woman 的结果向量: {result_vec}")
|
||
|
|
print(f"该结果与 'queen' 的相似度: {sim:.4f}")
|