欢迎来到尧图网

客户服务 关于我们

您的位置:首页 > 健康 > 养生 > 使用milvus数据库实现文本相似比较

使用milvus数据库实现文本相似比较

2024/10/23 23:29:52 来源:https://blog.csdn.net/u011564831/article/details/143075163  浏览:    关键词:使用milvus数据库实现文本相似比较

先部署milvus, 使用单机模式模式

milvus-install.yaml 

---
apiVersion: v1
kind: PersistentVolume
metadata:annotations:pv.kubernetes.io/bound-by-controller: "yes"finalizers:- kubernetes.io/pv-protectionmanagedFields:- apiVersion: v1fieldsType: FieldsV1fieldsV1:f:spec:f:accessModes: {}f:capacity:.: {}f:storage: {}f:hostPath:.: {}f:path: {}f:type: {}f:persistentVolumeReclaimPolicy: {}f:volumeMode: {}manager: agentoperation: Update- apiVersion: v1fieldsType: FieldsV1fieldsV1:f:status:f:phase: {}manager: kube-controller-manageroperation: Updatesubresource: status- apiVersion: v1fieldsType: FieldsV1fieldsV1:f:metadata:f:annotations:.: {}f:pv.kubernetes.io/bound-by-controller: {}f:spec:f:claimRef: {}manager: kube-controller-manageroperation: Updatename: my-release-milvus-pv
spec:accessModes:- ReadWriteOncecapacity:storage: 50GiclaimRef:apiVersion: v1kind: PersistentVolumeClaimname: my-release-milvusnamespace: milvusresourceVersion: "1844339"uid: 7efe1c3c-bd92-4c21-9b4d-d5d99d06a835hostPath:path: /opt/pv/my-release-milvus-pvtype: DirectoryOrCreatepersistentVolumeReclaimPolicy: RetainvolumeMode: Filesystem
---
apiVersion: v1
kind: PersistentVolume
metadata:annotations:pv.kubernetes.io/bound-by-controller: "yes"finalizers:- kubernetes.io/pv-protectionmanagedFields:- apiVersion: v1fieldsType: FieldsV1fieldsV1:f:spec:f:accessModes: {}f:capacity:.: {}f:storage: {}f:hostPath:.: {}f:path: {}f:type: {}f:persistentVolumeReclaimPolicy: {}f:volumeMode: {}manager: agentoperation: Updatetime: "2024-08-16T02:08:43Z"- apiVersion: v1fieldsType: FieldsV1fieldsV1:f:status:f:phase: {}manager: kube-controller-manageroperation: Updatesubresource: statustime: "2024-08-16T02:08:43Z"- apiVersion: v1fieldsType: FieldsV1fieldsV1:f:metadata:f:annotations:.: {}f:pv.kubernetes.io/bound-by-controller: {}f:spec:f:claimRef: {}manager: kube-controller-manageroperation: Updatename: my-release-minio-pv
spec:accessModes:- ReadWriteOncecapacity:storage: 500GiclaimRef:apiVersion: v1kind: PersistentVolumeClaimname: my-release-minionamespace: milvusresourceVersion: "1844338"uid: 5e332cae-55df-4a22-a673-94cd18e06badhostPath:path: /opt/pv/my-release-minio-pvtype: DirectoryOrCreatepersistentVolumeReclaimPolicy: RetainvolumeMode: Filesystem
---
# Source: milvus/charts/minio/templates/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:name: "my-release-minio"namespace: "milvus"labels:app: miniochart: minio-8.0.17release: "my-release"
---
# Source: milvus/charts/minio/templates/secrets.yaml
apiVersion: v1
kind: Secret
metadata:name: my-release-minionamespace: "milvus"labels:app: miniochart: minio-8.0.17release: my-releaseheritage: Helm
type: Opaque
data:accesskey: "bWluaW9hZG1pbg=="secretkey: "bWluaW9hZG1pbg=="
---
# Source: milvus/charts/minio/templates/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:name: my-release-minionamespace: "milvus"labels:app: miniochart: minio-8.0.17release: my-releaseheritage: Helm
data:initialize: |-#!/bin/shset -e ; # Have script exit in the event of a failed command.MC_CONFIG_DIR="/etc/minio/mc/"MC="/usr/bin/mc --insecure --config-dir ${MC_CONFIG_DIR}"# connectToMinio# Use a check-sleep-check loop to wait for Minio service to be availableconnectToMinio() {SCHEME=$1ATTEMPTS=0 ; LIMIT=29 ; # Allow 30 attemptsset -e ; # fail if we can't read the keys.ACCESS=$(cat /config/accesskey) ; SECRET=$(cat /config/secretkey) ;set +e ; # The connections to minio are allowed to fail.echo "Connecting to Minio server: $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT" ;MC_COMMAND="${MC} config host add myminio $SCHEME://$MINIO_ENDPOINT:$MINIO_PORT $ACCESS $SECRET" ;$MC_COMMAND ;STATUS=$? ;until [ $STATUS = 0 ]doATTEMPTS=`expr $ATTEMPTS + 1` ;echo \"Failed attempts: $ATTEMPTS\" ;if [ $ATTEMPTS -gt $LIMIT ]; thenexit 1 ;fi ;sleep 2 ; # 1 second intervals between attempts$MC_COMMAND ;STATUS=$? ;done ;set -e ; # reset `e` as activereturn 0}# checkBucketExists ($bucket)# Check if the bucket exists, by using the exit code of `mc ls`checkBucketExists() {BUCKET=$1CMD=$(${MC} ls myminio/$BUCKET > /dev/null 2>&1)return $?}# createBucket ($bucket, $policy, $purge)# Ensure bucket exists, purging if asked tocreateBucket() {BUCKET=$1POLICY=$2PURGE=$3VERSIONING=$4# Purge the bucket, if set & exists# Since PURGE is user input, check explicitly for `true`if [ $PURGE = true ]; thenif checkBucketExists $BUCKET ; thenecho "Purging bucket '$BUCKET'."set +e ; # don't exit if this fails${MC} rm -r --force myminio/$BUCKETset -e ; # reset `e` as activeelseecho "Bucket '$BUCKET' does not exist, skipping purge."fifi# Create the bucket if it does not existif ! checkBucketExists $BUCKET ; thenecho "Creating bucket '$BUCKET'"${MC} mb myminio/$BUCKETelseecho "Bucket '$BUCKET' already exists."fi# set versioning for bucketif [ ! -z $VERSIONING ] ; thenif [ $VERSIONING = true ] ; thenecho "Enabling versioning for '$BUCKET'"${MC} version enable myminio/$BUCKETelif [ $VERSIONING = false ] ; thenecho "Suspending versioning for '$BUCKET'"${MC} version suspend myminio/$BUCKETfielseecho "Bucket '$BUCKET' versioning unchanged."fi# At this point, the bucket should exist, skip checking for existence# Set policy on the bucketecho "Setting policy of bucket '$BUCKET' to '$POLICY'."${MC} policy set $POLICY myminio/$BUCKET}# Try connecting to Minio instancescheme=httpconnectToMinio $scheme
---
# Source: milvus/templates/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:name: my-release-milvusnamespace: "milvus"
data:default.yaml: |+# Copyright (C) 2019-2021 Zilliz. All rights reserved.## Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software distributed under the License# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express# or implied. See the License for the specific language governing permissions and limitations under the License.etcd:endpoints:- my-release-etcd.milvus.svc.cluster.local:2379metastore:type: etcdminio:address: my-release-minioport: 9000accessKeyID: minioadminsecretAccessKey: minioadminuseSSL: falsebucketName: milvus-bucketrootPath: fileuseIAM: falseiamEndpoint:region:useVirtualHost: falsemq:type: rocksmqmessageQueue: rocksmqrootCoord:address: localhostport: 53100enableActiveStandby: false  # Enable rootcoord active-standbyproxy:port: 19530internalPort: 19529queryCoord:address: localhostport: 19531enableActiveStandby: false  # Enable querycoord active-standbyqueryNode:port: 21123enableDisk: true # Enable querynode load disk index, and search on disk indexindexCoord:address: localhostport: 31000enableActiveStandby: false  # Enable indexcoord active-standbyindexNode:port: 21121enableDisk: true # Enable index node build disk vector indexdataCoord:address: localhostport: 13333enableActiveStandby: false  # Enable datacoord active-standbydataNode:port: 21124log:level: infofile:rootPath: ""maxSize: 300maxAge: 10maxBackups: 20format: textuser.yaml: |-#    For example enable rest http for milvus proxy#    proxy:#      http:#        enabled: true
---
# Source: milvus/charts/minio/templates/pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:name: my-release-minionamespace: "milvus"annotations:helm.sh/resource-policy: keeplabels:app: miniochart: minio-8.0.17release: my-releaseheritage: Helm
spec:accessModes:- "ReadWriteOnce"resources:requests:storage: "500Gi"
---
# Source: milvus/templates/pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:name: my-release-milvusnamespace: "milvus"annotations:helm.sh/resource-policy: keeplabels:helm.sh/chart: milvus-4.1.8app.kubernetes.io/name: milvusapp.kubernetes.io/instance: my-releaseapp.kubernetes.io/version: "2.3.2"app.kubernetes.io/managed-by: Helm
spec:accessModes:- "ReadWriteOnce"resources:requests:storage: 50Gi
---
# Source: milvus/charts/etcd/templates/svc-headless.yaml
apiVersion: v1
kind: Service
metadata:name: my-release-etcd-headlessnamespace: milvuslabels:app.kubernetes.io/name: etcdhelm.sh/chart: etcd-6.3.3app.kubernetes.io/instance: my-releaseapp.kubernetes.io/managed-by: Helmannotations:service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
spec:type: ClusterIPclusterIP: NonepublishNotReadyAddresses: trueports:- name: "client"port: 2379targetPort: client- name: "peer"port: 2380targetPort: peerselector:app.kubernetes.io/name: etcdapp.kubernetes.io/instance: my-release
---
# Source: milvus/charts/etcd/templates/svc.yaml
apiVersion: v1
kind: Service
metadata:name: my-release-etcdnamespace: milvuslabels:app.kubernetes.io/name: etcdhelm.sh/chart: etcd-6.3.3app.kubernetes.io/instance: my-releaseapp.kubernetes.io/managed-by: Helmannotations:
spec:type: ClusterIPports:- name: "client"port: 2379targetPort: 2379nodePort: null- name: "peer"port: 2380targetPort: 2380nodePort: nullselector:app.kubernetes.io/name: etcdapp.kubernetes.io/instance: my-release
---
# Source: milvus/charts/minio/templates/service.yaml
apiVersion: v1
kind: Service
metadata:name: my-release-minionamespace: milvuslabels:app: miniochart: minio-8.0.17release: my-releaseheritage: Helm
spec:type: ClusterIPports:- name: httpport: 9000protocol: TCPtargetPort: 9000selector:app: miniorelease: my-release
---
# Source: milvus/templates/service.yaml
apiVersion: v1
kind: Service
metadata:name: my-release-milvusnamespace: milvuslabels:helm.sh/chart: milvus-4.1.8app.kubernetes.io/name: milvusapp.kubernetes.io/instance: my-releaseapp.kubernetes.io/version: "2.3.2"app.kubernetes.io/managed-by: Helmcomponent: "standalone"
spec:type: ClusterIPports:- name: milvusport: 19530protocol: TCPtargetPort: milvus- name: metricsprotocol: TCPport: 9091targetPort: metricsselector:app.kubernetes.io/name: milvusapp.kubernetes.io/instance: my-releasecomponent: "standalone"
---
# Source: milvus/charts/minio/templates/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:name: my-release-minionamespace: milvuslabels:app: miniochart: minio-8.0.17release: my-releaseheritage: Helm
spec:strategy:type: RollingUpdaterollingUpdate:maxSurge: 100%maxUnavailable: 0selector:matchLabels:app: miniorelease: my-releasetemplate:metadata:name: my-release-miniolabels:app: miniorelease: my-releaseannotations:checksum/secrets: aa3f4f64eb45653d3fee45079a6e55a9869ce76297baa50df3bc33192434d05echecksum/config: ed4d4467dd70f3e0ed89e5d2bc3c4414b02a52fcf7db7f1b66b675b9016664a7spec:serviceAccountName: "my-release-minio"securityContext:runAsUser: 1000runAsGroup: 1000fsGroup: 1000containers:- name: minioimage: "minio/minio:RELEASE.2023-03-20T20-16-18Z"imagePullPolicy: IfNotPresentcommand:- "/bin/sh"- "-ce"- "/usr/bin/docker-entrypoint.sh minio -S /etc/minio/certs/ server /export"volumeMounts:- name: exportmountPath: /exportports:- name: httpcontainerPort: 9000livenessProbe:httpGet:path: /minio/health/liveport: httpscheme: HTTPinitialDelaySeconds: 5periodSeconds: 5timeoutSeconds: 5successThreshold: 1failureThreshold: 5readinessProbe:tcpSocket:port: httpinitialDelaySeconds: 5periodSeconds: 5timeoutSeconds: 1successThreshold: 1failureThreshold: 5startupProbe:tcpSocket:port: httpinitialDelaySeconds: 0periodSeconds: 10timeoutSeconds: 5successThreshold: 1failureThreshold: 60env:- name: MINIO_ACCESS_KEYvalueFrom:secretKeyRef:name: my-release-miniokey: accesskey- name: MINIO_SECRET_KEYvalueFrom:secretKeyRef:name: my-release-miniokey: secretkeyresources:requests:memory: 2Givolumes:- name: exportpersistentVolumeClaim:claimName: my-release-minio- name: minio-usersecret:secretName: my-release-minio
---
# Source: milvus/templates/standalone-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:name: my-release-milvus-standalonenamespace: milvuslabels:helm.sh/chart: milvus-4.1.8app.kubernetes.io/name: milvusapp.kubernetes.io/instance: my-releaseapp.kubernetes.io/version: "2.3.2"app.kubernetes.io/managed-by: Helmcomponent: "standalone"annotations:
spec:replicas: 1strategy:type: Recreateselector:matchLabels:app.kubernetes.io/name: milvusapp.kubernetes.io/instance: my-releasecomponent: "standalone"template:metadata:labels:app.kubernetes.io/name: milvusapp.kubernetes.io/instance: my-releasecomponent: "standalone"annotations:checksum/config: bc6a7e82027efa3ad0df3b00d07c3ad1dcc40d7665f314e6d13a000fdd26aec2spec:serviceAccountName: defaultinitContainers:- name: configcommand:- /cp- /run-helm.sh,/merge- /milvus/tools/run-helm.sh,/milvus/tools/mergeimage: "milvusdb/milvus-config-tool:v0.1.1"imagePullPolicy: IfNotPresentvolumeMounts:- mountPath: /milvus/toolsname: toolscontainers:- name: standaloneimage: "milvusdb/milvus:v2.3.21"imagePullPolicy: IfNotPresentargs: [ "/milvus/tools/run-helm.sh", "milvus", "run", "standalone" ]ports:- name: milvuscontainerPort: 19530protocol: TCP- name: metricscontainerPort: 9091protocol: TCPlivenessProbe:httpGet:path: /healthzport: metricsinitialDelaySeconds: 90periodSeconds: 30timeoutSeconds: 5successThreshold: 1failureThreshold: 5readinessProbe:httpGet:path: /healthzport: metricsinitialDelaySeconds: 90periodSeconds: 10timeoutSeconds: 5successThreshold: 1failureThreshold: 5resources:{}env:volumeMounts:- mountPath: /milvus/toolsname: tools- name: milvus-configmountPath: /milvus/configs/default.yamlsubPath: default.yamlreadOnly: true- name: milvus-configmountPath: /milvus/configs/user.yamlsubPath: user.yamlreadOnly: true- name: milvus-data-diskmountPath: "/var/lib/milvus"subPath:- mountPath: /var/lib/milvus/dataname: diskvolumes:- emptyDir: {}name: tools- name: milvus-configconfigMap:name: my-release-milvus- name: milvus-data-diskpersistentVolumeClaim:claimName: my-release-milvus- name: diskemptyDir: {}
---
# Source: milvus/charts/etcd/templates/statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:name: my-release-etcdnamespace: milvuslabels:app.kubernetes.io/name: etcdhelm.sh/chart: etcd-6.3.3app.kubernetes.io/instance: my-releaseapp.kubernetes.io/managed-by: Helm
spec:replicas: 1selector:matchLabels:app.kubernetes.io/name: etcdapp.kubernetes.io/instance: my-releaseserviceName: my-release-etcd-headlesspodManagementPolicy: ParallelupdateStrategy:type: RollingUpdatetemplate:metadata:labels:app.kubernetes.io/name: etcdhelm.sh/chart: etcd-6.3.3app.kubernetes.io/instance: my-releaseapp.kubernetes.io/managed-by: Helmannotations:spec:affinity:podAffinity:podAntiAffinity:preferredDuringSchedulingIgnoredDuringExecution:- podAffinityTerm:labelSelector:matchLabels:app.kubernetes.io/name: etcdapp.kubernetes.io/instance: my-releasenamespaces:- "default"topologyKey: kubernetes.io/hostnameweight: 1nodeAffinity:securityContext:fsGroup: 1001serviceAccountName: "default"containers:- name: etcdimage: docker.io/milvusdb/etcd:3.5.5-r4imagePullPolicy: "IfNotPresent"securityContext:runAsNonRoot: truerunAsUser: 1001env:- name: BITNAMI_DEBUGvalue: "false"- name: MY_POD_IPvalueFrom:fieldRef:fieldPath: status.podIP- name: MY_POD_NAMEvalueFrom:fieldRef:fieldPath: metadata.name- name: ETCDCTL_APIvalue: "3"- name: ETCD_ON_K8Svalue: "yes"- name: ETCD_START_FROM_SNAPSHOTvalue: "no"- name: ETCD_DISASTER_RECOVERYvalue: "no"- name: ETCD_NAMEvalue: "$(MY_POD_NAME)"- name: ETCD_DATA_DIRvalue: "/bitnami/etcd/data"- name: ETCD_LOG_LEVELvalue: "info"- name: ALLOW_NONE_AUTHENTICATIONvalue: "yes"- name: ETCD_ADVERTISE_CLIENT_URLSvalue: "http://$(MY_POD_NAME).my-release-etcd-headless.milvus.svc.cluster.local:2379"- name: ETCD_LISTEN_CLIENT_URLSvalue: "http://0.0.0.0:2379"- name: ETCD_INITIAL_ADVERTISE_PEER_URLSvalue: "http://$(MY_POD_NAME).my-release-etcd-headless.milvus.svc.cluster.local:2380"- name: ETCD_LISTEN_PEER_URLSvalue: "http://0.0.0.0:2380"- name: ETCD_AUTO_COMPACTION_MODEvalue: "revision"- name: ETCD_AUTO_COMPACTION_RETENTIONvalue: "1000"- name: ETCD_QUOTA_BACKEND_BYTESvalue: "4294967296"- name: ETCD_HEARTBEAT_INTERVALvalue: "500"- name: ETCD_ELECTION_TIMEOUTvalue: "2500"envFrom:ports:- name: clientcontainerPort: 2379protocol: TCP- name: peercontainerPort: 2380protocol: TCPlivenessProbe:exec:command:- /opt/bitnami/scripts/etcd/healthcheck.shinitialDelaySeconds: 60periodSeconds: 30timeoutSeconds: 10successThreshold: 1failureThreshold: 5readinessProbe:exec:command:- /opt/bitnami/scripts/etcd/healthcheck.shinitialDelaySeconds: 60periodSeconds: 20timeoutSeconds: 10successThreshold: 1failureThreshold: 5resources:limits: {}requests: {}volumeMounts:- name: datamountPath: /bitnami/etcdvolumes:volumeClaimTemplates:- metadata:name: dataspec:accessModes:- "ReadWriteOnce"resources:requests:storage: "10Gi"

创建图形管理界面工具attu

attu-deployment.yaml

apiVersion: apps/v1
kind: Deployment
metadata:name: milvus-attunamespace: milvus
spec:replicas: 1selector:matchLabels:app: milvus-attutemplate:metadata:labels:app: milvus-attuspec:containers:- name: milvus-attuimage: zilliz/attu:latestports:- containerPort: 3000env:- name: MILVUS_HOSTvalue: 192.168.110.125- name: MILVUS_PORTvalue: "19530"
---
apiVersion: v1
kind: Service
metadata:name: milvus-attunamespace: milvus
spec:type: NodePortports:- port: 3000targetPort: 3000nodePort: 31333selector:app: milvus-attu

先录入点数据然后调用查询接口查看相似度

import timefrom pymilvus import connections, FieldSchema, CollectionSchema, Collection, DataType, MilvusException# 连接到 Milvus 服务
host = '192.168.110.125'
port = 19530
connections.connect(alias='default', host=host, port=port)# 定义字段
fields = [FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),FieldSchema(name="vec", dtype=DataType.FLOAT_VECTOR, dim=128)
]# 创建集合
schema = CollectionSchema(fields=fields, description="test collection")
collection = Collection(name="test", schema=schema)# 生成一些测试数据
import numpy as npvectors = [[np.random.random() for _ in range(128)] for _ in range(10)]
ids = list(range(10))# 插入数据
collection.insert(data=[ids, vectors])print(collection.describe())query_vetor = np.random.random(128).astype(np.float32)search_params = {"metric_type": "COSINE","params": {"nprobe": 10}
}results = None
try:# 执行搜索操作results = collection.search(data=[query_vetor],anns_field="vec",param=search_params,limit=5,output_fields=["id", "vec"])
except MilvusException as e:print(f"Search error: {e}")# 增加重试次数和间隔时间retries = 3delay = 1for attempt in range(retries):try:results = collection.search(data=[query_vetor],anns_field="vec",param=search_params,limit=5,output_fields=["id", "vec"])breakexcept MilvusException as e:print(f"Retry attempt {attempt+1}/{retries}: {e}")time.sleep(delay)# 5. 打印搜索结果
if results:for res in results[0]:print(f"id: {res.id}, distance: {res.distance}")
else:print("no foud result")# 6. 关闭连接
connections.disconnect(alias='default')

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com