import pymongo import numpy as np import faiss import time from bson.objectid import ObjectId # MongoDB连接配置 client = pymongo.MongoClient("mongodb://root:faiss_image_search@localhost:27017/") db = client["faiss_index"] collection = db["mat_vectors"] collection.create_index([("product_id", 1)], unique=True) collection.create_index([("faiss_id", 1)], unique=True) # FAISS配置 dimension = 2048 base_index = faiss.IndexFlatL2(dimension) index = faiss.IndexIDMap(base_index) # 生成随机向量 def generate_random_vector(dimension): return np.random.random(dimension).astype('float32') # 插入100万条数据 def insert_million_records(): batch_size = 10000 # 每批插入的数据量 total_records = 200000 start_time = time.time() for i in range(0, total_records, batch_size): batch = [] for j in range(batch_size): faiss_id = i + j vector = generate_random_vector(dimension) index.add_with_ids(np.array([vector]), np.array([faiss_id])) record = { "_id": ObjectId(), "product_id": ObjectId(), "faiss_id": faiss_id, "vector": vector.tolist() } batch.append(record) collection.insert_many(batch) print(f"Inserted {i + batch_size} records") end_time = time.time() print(f"Total time taken: {end_time - start_time} seconds") if __name__ == "__main__": insert_million_records()