# Filename: embeddings.py
# pip install langchain sentence_transformers
import sys
import json
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
if __name__ == "__main__":
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Get the JSON file's path.
if len(sys.argv) < 2:
print("Error: Specify the path to the input JSON file.")
print("For example, 'python embeddings.py myfile.json'")
sys.exit(1)
file_path = sys.argv[1]
try:
# Get the JSON file's contents.
with open(file_path, 'r') as file:
file_elements = json.load(file)
# Process each element in the JSON file.
for element in file_elements:
# Get the element's "text" field.
text = element["text"]
# Generate the embeddings for that "text" field.
query_result = embeddings.embed_query(text)
# Add the embeddings to that element as an "embeddings" field.
element["embeddings"] = query_result
# Save the updated JSON back into the original file.
with open(file_path, 'w') as file:
json.dump(file_elements, file, indent=2)
print(f"Done! Updated JSON saved to '{file_path}'.")
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
except IOError:
print(f"Error: Unable to access file '{file_path}'.")