milvus.yaml 70 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067
  1. # Licensed to the LF AI & Data foundation under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. # Related configuration of etcd, used to store Milvus metadata & service discovery.
  17. etcd:
  18. # Endpoints used to access etcd service. You can change this parameter as the endpoints of your own etcd cluster.
  19. # Environment variable: ETCD_ENDPOINTS
  20. # etcd preferentially acquires valid address from environment variable ETCD_ENDPOINTS when Milvus is started.
  21. endpoints: localhost:2379
  22. # Root prefix of the key to where Milvus stores data in etcd.
  23. # It is recommended to change this parameter before starting Milvus for the first time.
  24. # To share an etcd instance among multiple Milvus instances, consider changing this to a different value for each Milvus instance before you start them.
  25. # Set an easy-to-identify root path for Milvus if etcd service already exists.
  26. # Changing this for an already running Milvus instance may result in failures to read legacy data.
  27. rootPath: by-dev
  28. # Sub-prefix of the key to where Milvus stores metadata-related information in etcd.
  29. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  30. # It is recommended to change this parameter before starting Milvus for the first time.
  31. metaSubPath: meta
  32. # Sub-prefix of the key to where Milvus stores timestamps in etcd.
  33. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  34. # It is recommended not to change this parameter if there is no specific reason.
  35. kvSubPath: kv
  36. log:
  37. level: info # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
  38. # path is one of:
  39. # - "default" as os.Stderr,
  40. # - "stderr" as os.Stderr,
  41. # - "stdout" as os.Stdout,
  42. # - file path to append server logs to.
  43. # please adjust in embedded Milvus: /tmp/milvus/logs/etcd.log
  44. path: stdout
  45. ssl:
  46. enabled: false # Whether to support ETCD secure connection mode
  47. tlsCert: /path/to/etcd-client.pem # path to your cert file
  48. tlsKey: /path/to/etcd-client-key.pem # path to your key file
  49. tlsCACert: /path/to/ca.pem # path to your CACert file
  50. # TLS min version
  51. # Optional values: 1.0, 1.1, 1.2, 1.3。
  52. # We recommend using version 1.2 and above.
  53. tlsMinVersion: 1.3
  54. requestTimeout: 10000 # Etcd operation timeout in milliseconds
  55. use:
  56. embed: false # Whether to enable embedded Etcd (an in-process EtcdServer).
  57. data:
  58. dir: default.etcd # Embedded Etcd only. please adjust in embedded Milvus: /tmp/milvus/etcdData/
  59. auth:
  60. enabled: false # Whether to enable authentication
  61. userName: # username for etcd authentication
  62. password: # password for etcd authentication
  63. metastore:
  64. type: etcd # Default value: etcd, Valid values: [etcd, tikv]
  65. snapshot:
  66. ttl: 86400 # snapshot ttl in seconds
  67. reserveTime: 3600 # snapshot reserve time in seconds
  68. # Related configuration of tikv, used to store Milvus metadata.
  69. # Notice that when TiKV is enabled for metastore, you still need to have etcd for service discovery.
  70. # TiKV is a good option when the metadata size requires better horizontal scalability.
  71. tikv:
  72. endpoints: 127.0.0.1:2389 # Note that the default pd port of tikv is 2379, which conflicts with etcd.
  73. rootPath: by-dev # The root path where data is stored in tikv
  74. metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath
  75. kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath
  76. requestTimeout: 10000 # ms, tikv request timeout
  77. snapshotScanSize: 256 # batch size of tikv snapshot scan
  78. ssl:
  79. enabled: false # Whether to support TiKV secure connection mode
  80. tlsCert: # path to your cert file
  81. tlsKey: # path to your key file
  82. tlsCACert: # path to your CACert file
  83. localStorage:
  84. # Local path to where vector data are stored during a search or a query to avoid repetitve access to MinIO or S3 service.
  85. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  86. # It is recommended to change this parameter before starting Milvus for the first time.
  87. path: /var/lib/milvus/data/
  88. # Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus.
  89. # We refer to the storage service as MinIO/S3 in the following description for simplicity.
  90. minio:
  91. # IP address of MinIO or S3 service.
  92. # Environment variable: MINIO_ADDRESS
  93. # minio.address and minio.port together generate the valid access to MinIO or S3 service.
  94. # MinIO preferentially acquires the valid IP address from the environment variable MINIO_ADDRESS when Milvus is started.
  95. # Default value applies when MinIO or S3 is running on the same network with Milvus.
  96. address: localhost
  97. port: 9000 # Port of MinIO or S3 service.
  98. # Access key ID that MinIO or S3 issues to user for authorized access.
  99. # Environment variable: MINIO_ACCESS_KEY_ID or minio.accessKeyID
  100. # minio.accessKeyID and minio.secretAccessKey together are used for identity authentication to access the MinIO or S3 service.
  101. # This configuration must be set identical to the environment variable MINIO_ACCESS_KEY_ID, which is necessary for starting MinIO or S3.
  102. # The default value applies to MinIO or S3 service that started with the default docker-compose.yml file.
  103. accessKeyID: minioadmin
  104. # Secret key used to encrypt the signature string and verify the signature string on server. It must be kept strictly confidential and accessible only to the MinIO or S3 server and users.
  105. # Environment variable: MINIO_SECRET_ACCESS_KEY or minio.secretAccessKey
  106. # minio.accessKeyID and minio.secretAccessKey together are used for identity authentication to access the MinIO or S3 service.
  107. # This configuration must be set identical to the environment variable MINIO_SECRET_ACCESS_KEY, which is necessary for starting MinIO or S3.
  108. # The default value applies to MinIO or S3 service that started with the default docker-compose.yml file.
  109. secretAccessKey: minioadmin
  110. useSSL: false # Switch value to control if to access the MinIO or S3 service through SSL.
  111. ssl:
  112. tlsCACert: /path/to/public.crt # path to your CACert file
  113. # Name of the bucket where Milvus stores data in MinIO or S3.
  114. # Milvus 2.0.0 does not support storing data in multiple buckets.
  115. # Bucket with this name will be created if it does not exist. If the bucket already exists and is accessible, it will be used directly. Otherwise, there will be an error.
  116. # To share an MinIO instance among multiple Milvus instances, consider changing this to a different value for each Milvus instance before you start them. For details, see Operation FAQs.
  117. # The data will be stored in the local Docker if Docker is used to start the MinIO service locally. Ensure that there is sufficient storage space.
  118. # A bucket name is globally unique in one MinIO or S3 instance.
  119. bucketName: a-bucket
  120. # Root prefix of the key to where Milvus stores data in MinIO or S3.
  121. # It is recommended to change this parameter before starting Milvus for the first time.
  122. # To share an MinIO instance among multiple Milvus instances, consider changing this to a different value for each Milvus instance before you start them. For details, see Operation FAQs.
  123. # Set an easy-to-identify root key prefix for Milvus if etcd service already exists.
  124. # Changing this for an already running Milvus instance may result in failures to read legacy data.
  125. rootPath: files
  126. # Whether to useIAM role to access S3/GCS instead of access/secret keys
  127. # For more information, refer to
  128. # aws: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use.html
  129. # gcp: https://cloud.google.com/storage/docs/access-control/iam
  130. # aliyun (ack): https://www.alibabacloud.com/help/en/container-service-for-kubernetes/latest/use-rrsa-to-enforce-access-control
  131. # aliyun (ecs): https://www.alibabacloud.com/help/en/elastic-compute-service/latest/attach-an-instance-ram-role
  132. useIAM: false
  133. # Cloud Provider of S3. Supports: "aws", "gcp", "aliyun".
  134. # Cloud Provider of Google Cloud Storage. Supports: "gcpnative".
  135. # You can use "aws" for other cloud provider supports S3 API with signature v4, e.g.: minio
  136. # You can use "gcp" for other cloud provider supports S3 API with signature v2
  137. # You can use "aliyun" for other cloud provider uses virtual host style bucket
  138. # You can use "gcpnative" for the Google Cloud Platform provider. Uses service account credentials
  139. # for authentication.
  140. # When useIAM enabled, only "aws", "gcp", "aliyun" is supported for now
  141. cloudProvider: aws
  142. # The JSON content contains the gcs service account credentials.
  143. # Used only for the "gcpnative" cloud provider.
  144. gcpCredentialJSON:
  145. # Custom endpoint for fetch IAM role credentials. when useIAM is true & cloudProvider is "aws".
  146. # Leave it empty if you want to use AWS default endpoint
  147. iamEndpoint:
  148. logLevel: fatal # Log level for aws sdk log. Supported level: off, fatal, error, warn, info, debug, trace
  149. region: # Specify minio storage system location region
  150. useVirtualHost: false # Whether use virtual host mode for bucket
  151. requestTimeoutMs: 10000 # minio timeout for request time in milliseconds
  152. # The maximum number of objects requested per batch in minio ListObjects rpc,
  153. # 0 means using oss client by default, decrease these configration if ListObjects timeout
  154. listObjectsMaxKeys: 0
  155. # Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka.
  156. # You can change your mq by setting mq.type field.
  157. # If you don't set mq.type field as default, there is a note about enabling priority if we config multiple mq in this file.
  158. # 1. standalone(local) mode: rocksmq(default) > natsmq > Pulsar > Kafka
  159. # 2. cluster mode: Pulsar(default) > Kafka (rocksmq and natsmq is unsupported in cluster mode)
  160. mq:
  161. # Default value: "default"
  162. # Valid values: [default, pulsar, kafka, rocksmq, natsmq]
  163. type: default
  164. enablePursuitMode: true # Default value: "true"
  165. pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds
  166. pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes
  167. pursuitBufferTime: 60 # pursuit mode buffer time in seconds
  168. mqBufSize: 16 # MQ client consumer buffer length
  169. dispatcher:
  170. mergeCheckInterval: 1 # the interval time(in seconds) for dispatcher to check whether to merge
  171. targetBufSize: 16 # the lenth of channel buffer for targe
  172. maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack
  173. # Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services.
  174. pulsar:
  175. # IP address of Pulsar service.
  176. # Environment variable: PULSAR_ADDRESS
  177. # pulsar.address and pulsar.port together generate the valid access to Pulsar.
  178. # Pulsar preferentially acquires the valid IP address from the environment variable PULSAR_ADDRESS when Milvus is started.
  179. # Default value applies when Pulsar is running on the same network with Milvus.
  180. address: localhost
  181. port: 6650 # Port of Pulsar service.
  182. webport: 80 # Web port of of Pulsar service. If you connect direcly without proxy, should use 8080.
  183. # The maximum size of each message in Pulsar. Unit: Byte.
  184. # By default, Pulsar can transmit at most 2MB of data in a single message. When the size of inserted data is greater than this value, proxy fragments the data into multiple messages to ensure that they can be transmitted correctly.
  185. # If the corresponding parameter in Pulsar remains unchanged, increasing this configuration will cause Milvus to fail, and reducing it produces no advantage.
  186. maxMessageSize: 2097152
  187. # Pulsar can be provisioned for specific tenants with appropriate capacity allocated to the tenant.
  188. # To share a Pulsar instance among multiple Milvus instances, you can change this to an Pulsar tenant rather than the default one for each Milvus instance before you start them. However, if you do not want Pulsar multi-tenancy, you are advised to change msgChannel.chanNamePrefix.cluster to the different value.
  189. tenant: public
  190. namespace: default # A Pulsar namespace is the administrative unit nomenclature within a tenant.
  191. requestTimeout: 60 # pulsar client global request timeout in seconds
  192. enableClientMetrics: false # Whether to register pulsar client metrics into milvus metrics path.
  193. # If you want to enable kafka, needs to comment the pulsar configs
  194. # kafka:
  195. # brokerList:
  196. # saslUsername:
  197. # saslPassword:
  198. # saslMechanisms:
  199. # securityProtocol:
  200. # ssl:
  201. # enabled: false # whether to enable ssl mode
  202. # tlsCert: # path to client's public key (PEM) used for authentication
  203. # tlsKey: # path to client's private key (PEM) used for authentication
  204. # tlsCaCert: # file or directory path to CA certificate(s) for verifying the broker's key
  205. # tlsKeyPassword: # private key passphrase for use with ssl.key.location and set_ssl_cert(), if any
  206. # readTimeout: 10
  207. rocksmq:
  208. # Prefix of the key to where Milvus stores data in RocksMQ.
  209. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  210. # It is recommended to change this parameter before starting Milvus for the first time.
  211. # Set an easy-to-identify root key prefix for Milvus if etcd service already exists.
  212. path: /var/lib/milvus/rdb_data
  213. lrucacheratio: 0.06 # rocksdb cache memory ratio
  214. rocksmqPageSize: 67108864 # The maximum size of messages in each page in RocksMQ. Messages in RocksMQ are checked and cleared (when expired) in batch based on this parameters. Unit: Byte.
  215. retentionTimeInMinutes: 4320 # The maximum retention time of acked messages in RocksMQ. Acked messages in RocksMQ are retained for the specified period of time and then cleared. Unit: Minute.
  216. retentionSizeInMB: 8192 # The maximum retention size of acked messages of each topic in RocksMQ. Acked messages in each topic are cleared if their size exceed this parameter. Unit: MB.
  217. compactionInterval: 86400 # Time interval to trigger rocksdb compaction to remove deleted data. Unit: Second
  218. compressionTypes: 0,0,7,7,7 # compaction compression type, only support use 0,7. 0 means not compress, 7 will use zstd. Length of types means num of rocksdb level.
  219. # natsmq configuration.
  220. # more detail: https://docs.nats.io/running-a-nats-service/configuration
  221. natsmq:
  222. server:
  223. port: 4222 # Listening port of the NATS server.
  224. storeDir: /var/lib/milvus/nats # Directory to use for JetStream storage of nats
  225. maxFileStore: 17179869184 # Maximum size of the 'file' storage
  226. maxPayload: 8388608 # Maximum number of bytes in a message payload
  227. maxPending: 67108864 # Maximum number of bytes buffered for a connection Applies to client connections
  228. initializeTimeout: 4000 # waiting for initialization of natsmq finished
  229. monitor:
  230. trace: false # If true enable protocol trace log messages
  231. debug: false # If true enable debug log messages
  232. logTime: true # If set to false, log without timestamps.
  233. logFile: /tmp/milvus/logs/nats.log # Log file path relative to .. of milvus binary if use relative path
  234. logSizeLimit: 536870912 # Size in bytes after the log file rolls over to a new one
  235. retention:
  236. maxAge: 4320 # Maximum age of any message in the P-channel
  237. maxBytes: # How many bytes the single P-channel may contain. Removing oldest messages if the P-channel exceeds this size
  238. maxMsgs: # How many message the single P-channel may contain. Removing oldest messages if the P-channel exceeds this limit
  239. # Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests
  240. rootCoord:
  241. dmlChannelNum: 16 # The number of DML-Channels to create at the root coord startup.
  242. # The maximum number of partitions in each collection.
  243. # New partitions cannot be created if this parameter is set as 0 or 1.
  244. # Range: [0, INT64MAX]
  245. maxPartitionNum: 1024
  246. # The minimum row count of a segment required for creating index.
  247. # Segments with smaller size than this parameter will not be indexed, and will be searched with brute force.
  248. minSegmentSizeToEnableIndex: 1024
  249. enableActiveStandby: false
  250. maxDatabaseNum: 64 # Maximum number of database
  251. maxGeneralCapacity: 65536 # upper limit for the sum of of product of partitionNumber and shardNumber
  252. gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
  253. ip: # TCP/IP address of rootCoord. If not specified, use the first unicastable address
  254. port: 53100 # TCP port of rootCoord
  255. grpc:
  256. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the rootCoord can send, unit: byte
  257. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the rootCoord can receive, unit: byte
  258. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on rootCoord can send, unit: byte
  259. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on rootCoord can receive, unit: byte
  260. # Related configuration of proxy, used to validate client requests and reduce the returned results.
  261. proxy:
  262. timeTickInterval: 200 # The interval at which proxy synchronizes the time tick, unit: ms.
  263. healthCheckTimeout: 3000 # ms, the interval that to do component healthy check
  264. msgStream:
  265. timeTick:
  266. bufSize: 512 # The maximum number of messages can be buffered in the timeTick message stream of the proxy when producing messages.
  267. maxNameLength: 255 # The maximum length of the name or alias that can be created in Milvus, including the collection name, collection alias, partition name, and field name.
  268. maxFieldNum: 64 # The maximum number of field can be created when creating in a collection. It is strongly DISCOURAGED to set maxFieldNum >= 64.
  269. maxVectorFieldNum: 4 # The maximum number of vector fields that can be specified in a collection. Value range: [1, 10].
  270. maxShardNum: 16 # The maximum number of shards can be created when creating in a collection.
  271. maxDimension: 32768 # The maximum number of dimensions of a vector can have when creating in a collection.
  272. # Whether to produce gin logs.\n
  273. # please adjust in embedded Milvus: false
  274. ginLogging: true
  275. ginLogSkipPaths: / # skip url path for gin log
  276. maxTaskNum: 1024 # The maximum number of tasks in the task queue of the proxy.
  277. mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection
  278. accessLog:
  279. enable: false # Whether to enable the access log feature.
  280. minioEnable: false # Whether to upload local access log files to MinIO. This parameter can be specified when proxy.accessLog.filename is not empty.
  281. localPath: /tmp/milvus_access # The local folder path where the access log file is stored. This parameter can be specified when proxy.accessLog.filename is not empty.
  282. filename: # The name of the access log file. If you leave this parameter empty, access logs will be printed to stdout.
  283. maxSize: 64 # The maximum size allowed for a single access log file. If the log file size reaches this limit, a rotation process will be triggered. This process seals the current access log file, creates a new log file, and clears the contents of the original log file. Unit: MB.
  284. rotatedTime: 0 # The maximum time interval allowed for rotating a single access log file. Upon reaching the specified time interval, a rotation process is triggered, resulting in the creation of a new access log file and sealing of the previous one. Unit: seconds
  285. remotePath: access_log/ # The path of the object storage for uploading access log files.
  286. remoteMaxTime: 0 # The time interval allowed for uploading access log files. If the upload time of a log file exceeds this interval, the file will be deleted. Setting the value to 0 disables this feature.
  287. formatters:
  288. base:
  289. format: "[$time_now] [ACCESS] <$user_name: $user_addr> $method_name [status: $method_status] [code: $error_code] [sdk: $sdk_version] [msg: $error_msg] [traceID: $trace_id] [timeCost: $time_cost]"
  290. query:
  291. format: "[$time_now] [ACCESS] <$user_name: $user_addr> $method_name [status: $method_status] [code: $error_code] [sdk: $sdk_version] [msg: $error_msg] [traceID: $trace_id] [timeCost: $time_cost] [database: $database_name] [collection: $collection_name] [partitions: $partition_name] [expr: $method_expr]"
  292. methods: "Query,Search,Delete"
  293. cacheSize: 0 # Size of log of write cache, in byte. (Close write cache if size was 0)
  294. cacheFlushInterval: 3 # time interval of auto flush write cache, in seconds. (Close auto flush if interval was 0)
  295. connectionCheckIntervalSeconds: 120 # the interval time(in seconds) for connection manager to scan inactive client info
  296. connectionClientInfoTTLSeconds: 86400 # inactive client info TTL duration, in seconds
  297. maxConnectionNum: 10000 # the max client info numbers that proxy should manage, avoid too many client infos
  298. gracefulStopTimeout: 30 # seconds. force stop node without graceful stop
  299. slowQuerySpanInSeconds: 5 # query whose executed time exceeds the `slowQuerySpanInSeconds` can be considered slow, in seconds.
  300. queryNodePooling:
  301. size: 10 # the size for shardleader(querynode) client pool
  302. http:
  303. enabled: true # Whether to enable the http server
  304. debug_mode: false # Whether to enable http server debug mode
  305. port: # high-level restful api
  306. acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64
  307. enablePprof: true # Whether to enable pprof middleware on the metrics port
  308. ip: # TCP/IP address of proxy. If not specified, use the first unicastable address
  309. port: 19530 # TCP port of proxy
  310. internalPort: 19529
  311. grpc:
  312. serverMaxSendSize: 268435456 # The maximum size of each RPC request that the proxy can send, unit: byte
  313. serverMaxRecvSize: 67108864 # The maximum size of each RPC request that the proxy can receive, unit: byte
  314. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on proxy can send, unit: byte
  315. clientMaxRecvSize: 67108864 # The maximum size of each RPC request that the clients on proxy can receive, unit: byte
  316. # Related configuration of queryCoord, used to manage topology and load balancing for the query nodes, and handoff from growing segments to sealed segments.
  317. queryCoord:
  318. taskMergeCap: 1
  319. taskExecutionCap: 256
  320. # Switch value to control if to automatically replace a growing segment with the corresponding indexed sealed segment when the growing segment reaches the sealing threshold.
  321. # If this parameter is set false, Milvus simply searches the growing segments with brute force.
  322. autoHandoff: true
  323. autoBalance: true # Switch value to control if to automatically balance the memory usage among query nodes by distributing segment loading and releasing operations evenly.
  324. autoBalanceChannel: true # Enable auto balance channel
  325. balancer: ScoreBasedBalancer # auto balancer used for segments on queryNodes
  326. globalRowCountFactor: 0.1 # the weight used when balancing segments among queryNodes
  327. scoreUnbalanceTolerationFactor: 0.05 # the least value for unbalanced extent between from and to nodes when doing balance
  328. reverseUnBalanceTolerationFactor: 1.3 # the largest value for unbalanced extent between from and to nodes after doing balance
  329. overloadedMemoryThresholdPercentage: 90 # The threshold of memory usage (in percentage) in a query node to trigger the sealed segment balancing.
  330. balanceIntervalSeconds: 60 # The interval at which query coord balances the memory usage among query nodes.
  331. memoryUsageMaxDifferencePercentage: 30 # The threshold of memory usage difference (in percentage) between any two query nodes to trigger the sealed segment balancing.
  332. rowCountFactor: 0.4 # the row count weight used when balancing segments among queryNodes
  333. segmentCountFactor: 0.4 # the segment count weight used when balancing segments among queryNodes
  334. globalSegmentCountFactor: 0.1 # the segment count weight used when balancing segments among queryNodes
  335. segmentCountMaxSteps: 50 # segment count based plan generator max steps
  336. rowCountMaxSteps: 50 # segment count based plan generator max steps
  337. randomMaxSteps: 10 # segment count based plan generator max steps
  338. growingRowCountWeight: 4 # the memory weight of growing segment row count
  339. delegatorMemoryOverloadFactor: 0.1 # the factor of delegator overloaded memory
  340. balanceCostThreshold: 0.001 # the threshold of balance cost, if the difference of cluster's cost after executing the balance plan is less than this value, the plan will not be executed
  341. checkSegmentInterval: 1000
  342. checkChannelInterval: 1000
  343. checkBalanceInterval: 10000
  344. checkIndexInterval: 10000
  345. channelTaskTimeout: 60000 # 1 minute
  346. segmentTaskTimeout: 120000 # 2 minute
  347. distPullInterval: 500
  348. heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available
  349. loadTimeoutSeconds: 600
  350. distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds
  351. heatbeatWarningLag: 5000 # the lag value for querycoord report warning when last heatbeat is too old, in milliseconds
  352. checkHandoffInterval: 5000
  353. enableActiveStandby: false
  354. checkInterval: 1000
  355. checkHealthInterval: 3000 # 3s, the interval when query coord try to check health of query node
  356. checkHealthRPCTimeout: 2000 # 100ms, the timeout of check health rpc to query node
  357. brokerTimeout: 5000 # 5000ms, querycoord broker rpc timeout
  358. collectionRecoverTimes: 3 # if collection recover times reach the limit during loading state, release it
  359. observerTaskParallel: 16 # the parallel observer dispatcher task number
  360. checkAutoBalanceConfigInterval: 10 # the interval of check auto balance config
  361. checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session
  362. gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
  363. enableStoppingBalance: true # whether enable stopping balance
  364. channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode
  365. collectionObserverInterval: 200 # the interval of collection observer
  366. checkExecutedFlagInterval: 100 # the interval of check executed flag to force to pull dist
  367. updateCollectionLoadStatusInterval: 5 # 5m, max interval of updating collection loaded status for check health
  368. cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds
  369. ip: # TCP/IP address of queryCoord. If not specified, use the first unicastable address
  370. port: 19531 # TCP port of queryCoord
  371. grpc:
  372. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the queryCoord can send, unit: byte
  373. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the queryCoord can receive, unit: byte
  374. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on queryCoord can send, unit: byte
  375. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on queryCoord can receive, unit: byte
  376. # Related configuration of queryNode, used to run hybrid search between vector and scalar data.
  377. queryNode:
  378. stats:
  379. publishInterval: 1000 # The interval that query node publishes the node statistics information, including segment status, cpu usage, memory usage, health status, etc. Unit: ms.
  380. segcore:
  381. knowhereThreadPoolNumRatio: 4 # The number of threads in knowhere's thread pool. If disk is enabled, the pool size will multiply with knowhereThreadPoolNumRatio([1, 32]).
  382. chunkRows: 128 # Row count by which Segcore divides a segment into chunks.
  383. interimIndex:
  384. # Whether to create a temporary index for growing segments and sealed segments not yet indexed, improving search performance.
  385. # Milvus will eventually seals and indexes all segments, but enabling this optimizes search performance for immediate queries following data insertion.
  386. # This defaults to true, indicating that Milvus creates temporary index for growing segments and the sealed segments that are not indexed upon searches.
  387. enableIndex: true
  388. nlist: 128 # temp index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8
  389. nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist
  390. memExpansionRate: 1.15 # extra memory needed by building interim index
  391. buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num
  392. multipleChunkedEnable: false # Enable multiple chunked search
  393. knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
  394. loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
  395. enableDisk: false # enable querynode load disk index, and search on disk index
  396. maxDiskUsagePercentage: 95
  397. cache:
  398. memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024
  399. readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed`
  400. # options: async, sync, disable.
  401. # Specifies the necessity for warming up the chunk cache.
  402. # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the
  403. # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency
  404. # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage;
  405. # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query.
  406. warmup: disable
  407. mmap:
  408. vectorField: false # Enable mmap for loading vector data
  409. vectorIndex: false # Enable mmap for loading vector index
  410. scalarField: false # Enable mmap for loading scalar data
  411. scalarIndex: false # Enable mmap for loading scalar index
  412. chunkCache: true # Enable mmap for chunk cache (raw vector retrieving).
  413. # Enable memory mapping (mmap) to optimize the handling of growing raw data.
  414. # By activating this feature, the memory overhead associated with newly added or modified data will be significantly minimized.
  415. # However, this optimization may come at the cost of a slight decrease in query latency for the affected data segments.
  416. growingMmapEnabled: false
  417. fixedFileSizeForMmapAlloc: 1 # tmp file size for mmap chunk manager
  418. maxDiskUsagePercentageForMmapAlloc: 50 # disk percentage used in mmap chunk manager
  419. lazyload:
  420. enabled: false # Enable lazyload for loading data
  421. waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve
  422. requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default
  423. requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default
  424. maxRetryTimes: 1 # max retry times for lazy load, 1 by default
  425. maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default
  426. indexOffsetCacheEnabled: false # enable index offset cache for some scalar indexes, now is just for bitmap index, enable this param can improve performance for retrieving raw data from index
  427. grouping:
  428. enabled: true
  429. maxNQ: 1000
  430. topKMergeRatio: 20
  431. scheduler:
  432. receiveChanSize: 10240
  433. unsolvedQueueSize: 10240
  434. # maxReadConcurrentRatio is the concurrency ratio of read task (search task and query task).
  435. # Max read concurrency would be the value of hardware.GetCPUNum * maxReadConcurrentRatio.
  436. # It defaults to 2.0, which means max read concurrency would be the value of hardware.GetCPUNum * 2.
  437. # Max read concurrency must greater than or equal to 1, and less than or equal to hardware.GetCPUNum * 100.
  438. # (0, 100]
  439. maxReadConcurrentRatio: 1
  440. cpuRatio: 10 # ratio used to estimate read task cpu usage.
  441. maxTimestampLag: 86400
  442. scheduleReadPolicy:
  443. # fifo: A FIFO queue support the schedule.
  444. # user-task-polling:
  445. # The user's tasks will be polled one by one and scheduled.
  446. # Scheduling is fair on task granularity.
  447. # The policy is based on the username for authentication.
  448. # And an empty username is considered the same user.
  449. # When there are no multi-users, the policy decay into FIFO"
  450. name: fifo
  451. taskQueueExpire: 60 # Control how long (many seconds) that queue retains since queue is empty
  452. enableCrossUserGrouping: false # Enable Cross user grouping when using user-task-polling policy. (Disable it if user's task can not merge each other)
  453. maxPendingTaskPerUser: 1024 # Max pending task per user in scheduler
  454. levelZeroForwardPolicy: FilterByBF # delegator level zero deletion forward policy, possible option["FilterByBF", "RemoteLoad"]
  455. streamingDeltaForwardPolicy: FilterByBF # delegator streaming deletion forward policy, possible option["FilterByBF", "Direct"]
  456. dataSync:
  457. flowGraph:
  458. maxQueueLength: 16 # The maximum size of task queue cache in flow graph in query node.
  459. maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph
  460. enableSegmentPrune: false # use partition stats to prune data in search/query on shard delegator
  461. queryStreamBatchSize: 4194304 # return min batch size of stream query
  462. queryStreamMaxBatchSize: 134217728 # return max batch size of stream query
  463. bloomFilterApplyParallelFactor: 4 # parallel factor when to apply pk to bloom filter, default to 4*CPU_CORE_NUM
  464. workerPooling:
  465. size: 10 # the size for worker querynode client pool
  466. ip: # TCP/IP address of queryNode. If not specified, use the first unicastable address
  467. port: 21123 # TCP port of queryNode
  468. grpc:
  469. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the queryNode can send, unit: byte
  470. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the queryNode can receive, unit: byte
  471. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on queryNode can send, unit: byte
  472. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on queryNode can receive, unit: byte
  473. indexCoord:
  474. bindIndexNodeMode:
  475. enable: false
  476. address: localhost:22930
  477. withCred: false
  478. nodeID: 0
  479. segment:
  480. minSegmentNumRowsToEnableIndex: 1024 # It's a threshold. When the segment num rows is less than this value, the segment will not be indexed
  481. indexNode:
  482. scheduler:
  483. buildParallel: 1
  484. enableDisk: true # enable index node build disk vector index
  485. maxDiskUsagePercentage: 95
  486. ip: # TCP/IP address of indexNode. If not specified, use the first unicastable address
  487. port: 21121 # TCP port of indexNode
  488. grpc:
  489. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the indexNode can send, unit: byte
  490. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the indexNode can receive, unit: byte
  491. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on indexNode can send, unit: byte
  492. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on indexNode can receive, unit: byte
  493. dataCoord:
  494. channel:
  495. watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer.
  496. legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels
  497. balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing
  498. balanceInterval: 360 # The interval with which the channel manager check dml channel balance status
  499. checkInterval: 1 # The interval in seconds with which the channel manager advances channel states
  500. notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds).
  501. segment:
  502. maxSize: 1024 # The maximum size of a segment, unit: MB. datacoord.segment.maxSize and datacoord.segment.sealProportion together determine if a segment can be sealed.
  503. diskSegmentMaxSize: 2048 # Maximun size of a segment in MB for collection which has Disk index
  504. sealProportion: 0.12 # The minimum proportion to datacoord.segment.maxSize to seal a segment. datacoord.segment.maxSize and datacoord.segment.sealProportion together determine if a segment can be sealed.
  505. sealProportionJitter: 0.1 # segment seal proportion jitter ratio, default value 0.1(10%), if seal proportion is 12%, with jitter=0.1, the actuall applied ratio will be 10.8~12%
  506. assignmentExpiration: 2000 # Expiration time of the segment assignment, unit: ms
  507. allocLatestExpireAttempt: 200 # The time attempting to alloc latest lastExpire from rootCoord after restart
  508. maxLife: 86400 # The max lifetime of segment in seconds, 24*60*60
  509. # If a segment didn't accept dml records in maxIdleTime and the size of segment is greater than
  510. # minSizeFromIdleToSealed, Milvus will automatically seal it.
  511. # The max idle time of segment in seconds, 10*60.
  512. maxIdleTime: 600
  513. minSizeFromIdleToSealed: 16 # The min size in MB of segment which can be idle from sealed.
  514. # The max number of binlog file for one segment, the segment will be sealed if
  515. # the number of binlog file reaches to max value.
  516. maxBinlogFileNumber: 32
  517. smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than
  518. # (smallProportion * segment max # of rows).
  519. # A compaction will happen on small segments if the segment after compaction will have
  520. compactableProportion: 0.85
  521. # over (compactableProportion * segment max # of rows) rows.
  522. # MUST BE GREATER THAN OR EQUAL TO <smallProportion>!!!
  523. # During compaction, the size of segment # of rows is able to exceed segment max # of rows by (expansionRate-1) * 100%.
  524. expansionRate: 1.25
  525. sealPolicy:
  526. channel:
  527. # The size threshold in MB, if the total size of growing segments of each shard
  528. # exceeds this threshold, the largest growing segment will be sealed.
  529. growingSegmentsMemSize: 4096
  530. autoUpgradeSegmentIndex: false # whether auto upgrade segment index to index engine's version
  531. segmentFlushInterval: 2 # the minimal interval duration(unit: Seconds) between flusing operation on same segment
  532. # Switch value to control if to enable segment compaction.
  533. # Compaction merges small-size segments into a large segment, and clears the entities deleted beyond the rentention duration of Time Travel.
  534. enableCompaction: true
  535. compaction:
  536. # Switch value to control if to enable automatic segment compaction during which data coord locates and merges compactable segments in the background.
  537. # This configuration takes effect only when dataCoord.enableCompaction is set as true.
  538. enableAutoCompaction: true
  539. indexBasedCompaction: true
  540. # compaction task prioritizer, options: [default, level, mix].
  541. # default is FIFO.
  542. # level is prioritized by level: L0 compactions first, then mix compactions, then clustering compactions.
  543. # mix is prioritized by level: mix compactions first, then L0 compactions, then clustering compactions.
  544. taskPrioritizer: default
  545. rpcTimeout: 10
  546. maxParallelTaskNum: 10
  547. workerMaxParallelTaskNum: 2
  548. dropTolerance: 86400 # Compaction task will be cleaned after finish longer than this time(in seconds)
  549. gcInterval: 1800 # The time interval in seconds for compaction gc
  550. clustering:
  551. enable: true # Enable clustering compaction
  552. autoEnable: false # Enable auto clustering compaction
  553. triggerInterval: 600 # clustering compaction trigger interval in seconds
  554. minInterval: 3600 # The minimum interval between clustering compaction executions of one collection, to avoid redundant compaction
  555. maxInterval: 259200 # If a collection haven't been clustering compacted for longer than maxInterval, force compact
  556. newDataSizeThreshold: 512m # If new data size is large than newDataSizeThreshold, execute clustering compaction
  557. preferSegmentSizeRatio: 0.8
  558. maxSegmentSizeRatio: 1
  559. maxTrainSizeRatio: 0.8 # max data size ratio in Kmeans train, if larger than it, will down sampling to meet this limit
  560. maxCentroidsNum: 10240 # maximum centroids number in Kmeans train
  561. minCentroidsNum: 16 # minimum centroids number in Kmeans train
  562. minClusterSizeRatio: 0.01 # minimum cluster size / avg size in Kmeans train
  563. maxClusterSizeRatio: 10 # maximum cluster size / avg size in Kmeans train
  564. maxClusterSize: 5g # maximum cluster size in Kmeans train
  565. levelzero:
  566. forceTrigger:
  567. minSize: 8388608 # The minmum size in bytes to force trigger a LevelZero Compaction, default as 8MB
  568. maxSize: 67108864 # The maxmum size in bytes to force trigger a LevelZero Compaction, default as 64MB
  569. deltalogMinNum: 10 # The minimum number of deltalog files to force trigger a LevelZero Compaction
  570. deltalogMaxNum: 30 # The maxmum number of deltalog files to force trigger a LevelZero Compaction, default as 30
  571. syncSegmentsInterval: 300 # The time interval for regularly syncing segments
  572. enableGarbageCollection: true # Switch value to control if to enable garbage collection to clear the discarded data in MinIO or S3 service.
  573. gc:
  574. interval: 3600 # The interval at which data coord performs garbage collection, unit: second.
  575. missingTolerance: 86400 # The retention duration of the unrecorded binary log (binlog) files. Setting a reasonably large value for this parameter avoids erroneously deleting the newly created binlog files that lack metadata. Unit: second.
  576. dropTolerance: 10800 # The retention duration of the binlog files of the deleted segments before they are cleared, unit: second.
  577. removeConcurrent: 32 # number of concurrent goroutines to remove dropped s3 objects
  578. scanInterval: 168 # orphan file (file on oss but has not been registered on meta) on object storage garbage collection scanning interval in hours
  579. enableActiveStandby: false
  580. brokerTimeout: 5000 # 5000ms, dataCoord broker rpc timeout
  581. autoBalance: true # Enable auto balance
  582. checkAutoBalanceConfigInterval: 10 # the interval of check auto balance config
  583. import:
  584. filesPerPreImportTask: 2 # The maximum number of files allowed per pre-import task.
  585. taskRetention: 10800 # The retention period in seconds for tasks in the Completed or Failed state.
  586. maxSizeInMBPerImportTask: 6144 # To prevent generating of small segments, we will re-group imported files. This parameter represents the sum of file sizes in each group (each ImportTask).
  587. scheduleInterval: 2 # The interval for scheduling import, measured in seconds.
  588. checkIntervalHigh: 2 # The interval for checking import, measured in seconds, is set to a high frequency for the import checker.
  589. checkIntervalLow: 120 # The interval for checking import, measured in seconds, is set to a low frequency for the import checker.
  590. maxImportFileNumPerReq: 1024 # The maximum number of files allowed per single import request.
  591. waitForIndex: true # Indicates whether the import operation waits for the completion of index building.
  592. gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
  593. slot:
  594. clusteringCompactionUsage: 16 # slot usage of clustering compaction job.
  595. mixCompactionUsage: 8 # slot usage of mix compaction job.
  596. l0DeleteCompactionUsage: 8 # slot usage of l0 compaction job.
  597. ip: # TCP/IP address of dataCoord. If not specified, use the first unicastable address
  598. port: 13333 # TCP port of dataCoord
  599. grpc:
  600. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the dataCoord can send, unit: byte
  601. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the dataCoord can receive, unit: byte
  602. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on dataCoord can send, unit: byte
  603. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on dataCoord can receive, unit: byte
  604. dataNode:
  605. dataSync:
  606. flowGraph:
  607. maxQueueLength: 16 # Maximum length of task queue in flowgraph
  608. maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph
  609. maxParallelSyncMgrTasks: 256 # The max concurrent sync task number of datanode sync mgr globally
  610. skipMode:
  611. enable: true # Support skip some timetick message to reduce CPU usage
  612. skipNum: 4 # Consume one for every n records skipped
  613. coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds
  614. segment:
  615. # The maximum size of each binlog file in a segment buffered in memory. Binlog files whose size exceeds this value are then flushed to MinIO or S3 service.
  616. # Unit: Byte
  617. # Setting this parameter too small causes the system to store a small amount of data too frequently. Setting it too large increases the system's demand for memory.
  618. insertBufSize: 16777216
  619. deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB
  620. syncPeriod: 600 # The period to sync segments if buffer is not empty.
  621. memory:
  622. forceSyncEnable: true # Set true to force sync if memory usage is too high
  623. forceSyncSegmentNum: 1 # number of segments to sync, segments with top largest buffer will be synced.
  624. checkInterval: 3000 # the interal to check datanode memory usage, in milliseconds
  625. forceSyncWatermark: 0.5 # memory watermark for standalone, upon reaching this watermark, segments will be synced.
  626. timetick:
  627. interval: 500
  628. channel:
  629. # specify the size of global work pool of all channels
  630. # if this parameter <= 0, will set it as the maximum number of CPUs that can be executing
  631. # suggest to set it bigger on large collection numbers to avoid blocking
  632. workPoolSize: -1
  633. # specify the size of global work pool for channel checkpoint updating
  634. # if this parameter <= 0, will set it as 10
  635. updateChannelCheckpointMaxParallel: 10
  636. updateChannelCheckpointInterval: 60 # the interval duration(in seconds) for datanode to update channel checkpoint of each channel
  637. updateChannelCheckpointRPCTimeout: 20 # timeout in seconds for UpdateChannelCheckpoint RPC call
  638. maxChannelCheckpointsPerPRC: 128 # The maximum number of channel checkpoints per UpdateChannelCheckpoint RPC.
  639. channelCheckpointUpdateTickInSeconds: 10 # The frequency, in seconds, at which the channel checkpoint updater executes updates.
  640. import:
  641. maxConcurrentTaskNum: 16 # The maximum number of import/pre-import tasks allowed to run concurrently on a datanode.
  642. maxImportFileSizeInGB: 16 # The maximum file size (in GB) for an import file, where an import file refers to either a Row-Based file or a set of Column-Based files.
  643. readBufferSizeInMB: 16 # The data block size (in MB) read from chunk manager by the datanode during import.
  644. maxTaskSlotNum: 16 # The maximum number of slots occupied by each import/pre-import task.
  645. compaction:
  646. levelZeroBatchMemoryRatio: 0.5 # The minimal memory ratio of free memory for level zero compaction executing in batch mode
  647. levelZeroMaxBatchSize: -1 # Max batch size refers to the max number of L1/L2 segments in a batch when executing L0 compaction. Default to -1, any value that is less than 1 means no limit. Valid range: >= 1.
  648. gracefulStopTimeout: 1800 # seconds. force stop node without graceful stop
  649. slot:
  650. slotCap: 16 # The maximum number of tasks(e.g. compaction, importing) allowed to run concurrently on a datanode
  651. clusteringCompaction:
  652. memoryBufferRatio: 0.1 # The ratio of memory buffer of clustering compaction. Data larger than threshold will be flushed to storage.
  653. workPoolSize: 8 # worker pool size for one clustering compaction job.
  654. bloomFilterApplyParallelFactor: 4 # parallel factor when to apply pk to bloom filter, default to 4*CPU_CORE_NUM
  655. storage:
  656. deltalog: json # deltalog format, options: [json, parquet]
  657. ip: # TCP/IP address of dataNode. If not specified, use the first unicastable address
  658. port: 21124 # TCP port of dataNode
  659. grpc:
  660. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the dataNode can send, unit: byte
  661. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the dataNode can receive, unit: byte
  662. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on dataNode can send, unit: byte
  663. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on dataNode can receive, unit: byte
  664. # This topic introduces the message channel-related configurations of Milvus.
  665. msgChannel:
  666. chanNamePrefix:
  667. # Root name prefix of the channel when a message channel is created.
  668. # It is recommended to change this parameter before starting Milvus for the first time.
  669. # To share a Pulsar instance among multiple Milvus instances, consider changing this to a name rather than the default one for each Milvus instance before you start them.
  670. cluster: by-dev
  671. # Sub-name prefix of the message channel where the root coord publishes time tick messages.
  672. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.rootCoordTimeTick}
  673. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  674. # It is recommended to change this parameter before starting Milvus for the first time.
  675. rootCoordTimeTick: rootcoord-timetick
  676. # Sub-name prefix of the message channel where the root coord publishes its own statistics messages.
  677. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.rootCoordStatistics}
  678. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  679. # It is recommended to change this parameter before starting Milvus for the first time.
  680. rootCoordStatistics: rootcoord-statistics
  681. # Sub-name prefix of the message channel where the root coord publishes Data Manipulation Language (DML) messages.
  682. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.rootCoordDml}
  683. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  684. # It is recommended to change this parameter before starting Milvus for the first time.
  685. rootCoordDml: rootcoord-dml
  686. replicateMsg: replicate-msg
  687. # Sub-name prefix of the message channel where the query node publishes time tick messages.
  688. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.queryTimeTick}
  689. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  690. # It is recommended to change this parameter before starting Milvus for the first time.
  691. queryTimeTick: queryTimeTick
  692. # Sub-name prefix of the message channel where the data coord publishes time tick messages.
  693. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.dataCoordTimeTick}
  694. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  695. # It is recommended to change this parameter before starting Milvus for the first time.
  696. dataCoordTimeTick: datacoord-timetick-channel
  697. # Sub-name prefix of the message channel where the data coord publishes segment information messages.
  698. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.dataCoordSegmentInfo}
  699. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  700. # It is recommended to change this parameter before starting Milvus for the first time.
  701. dataCoordSegmentInfo: segment-info-channel
  702. subNamePrefix:
  703. # Subscription name prefix of the data coord.
  704. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  705. # It is recommended to change this parameter before starting Milvus for the first time.
  706. dataCoordSubNamePrefix: dataCoord
  707. # Subscription name prefix of the data node.
  708. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  709. # It is recommended to change this parameter before starting Milvus for the first time.
  710. dataNodeSubNamePrefix: dataNode
  711. # Configures the system log output.
  712. log:
  713. # Milvus log level. Option: debug, info, warn, error, panic, and fatal.
  714. # It is recommended to use debug level under test and development environments, and info level in production environment.
  715. level: info
  716. file:
  717. # Root path to the log files.
  718. # The default value is set empty, indicating to output log files to standard output (stdout) and standard error (stderr).
  719. # If this parameter is set to a valid local path, Milvus writes and stores log files in this path.
  720. # Set this parameter as the path that you have permission to write.
  721. rootPath:
  722. maxSize: 300 # The maximum size of a log file, unit: MB.
  723. maxAge: 10 # The maximum retention time before a log file is automatically cleared, unit: day. The minimum value is 1.
  724. maxBackups: 20 # The maximum number of log files to back up, unit: day. The minimum value is 1.
  725. format: text # Milvus log format. Option: text and JSON
  726. stdout: true # Stdout enable or not
  727. grpc:
  728. log:
  729. level: WARNING
  730. gracefulStopTimeout: 10 # second, time to wait graceful stop finish
  731. client:
  732. compressionEnabled: false
  733. dialTimeout: 200
  734. keepAliveTime: 10000
  735. keepAliveTimeout: 20000
  736. maxMaxAttempts: 10
  737. initialBackoff: 0.2
  738. maxBackoff: 10
  739. backoffMultiplier: 2
  740. minResetInterval: 1000
  741. maxCancelError: 32
  742. minSessionCheckInterval: 200
  743. # Configure the proxy tls enable.
  744. tls:
  745. serverPemPath: configs/cert/server.pem
  746. serverKeyPath: configs/cert/server.key
  747. caPemPath: configs/cert/ca.pem
  748. common:
  749. defaultPartitionName: _default # Name of the default partition when a collection is created
  750. defaultIndexName: _default_idx # Name of the index when it is created with name unspecified
  751. entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire
  752. indexSliceSize: 16 # Index slice size in MB
  753. threadCoreCoefficient:
  754. highPriority: 10 # This parameter specify how many times the number of threads is the number of cores in high priority pool
  755. middlePriority: 5 # This parameter specify how many times the number of threads is the number of cores in middle priority pool
  756. lowPriority: 1 # This parameter specify how many times the number of threads is the number of cores in low priority pool
  757. buildIndexThreadPoolRatio: 0.75
  758. DiskIndex:
  759. MaxDegree: 56
  760. SearchListSize: 100
  761. PQCodeBudgetGBRatio: 0.125
  762. BuildNumThreadsRatio: 1
  763. SearchCacheBudgetGBRatio: 0.1
  764. LoadNumThreadRatio: 8
  765. BeamWidthRatio: 4
  766. gracefulTime: 5000 # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency.
  767. gracefulStopTimeout: 1800 # seconds. it will force quit the server if the graceful stop process is not completed during this time.
  768. storageType: remote # please adjust in embedded Milvus: local, available values are [local, remote, opendal], value minio is deprecated, use remote instead
  769. # Default value: auto
  770. # Valid values: [auto, avx512, avx2, avx, sse4_2]
  771. # This configuration is only used by querynode and indexnode, it selects CPU instruction set for Searching and Index-building.
  772. simdType: auto
  773. security:
  774. authorizationEnabled: false
  775. # The superusers will ignore some system check processes,
  776. # like the old password verification when updating the credential
  777. superUsers:
  778. defaultRootPassword: Milvus # default password for root user
  779. tlsMode: 0
  780. session:
  781. ttl: 30 # ttl value when session granting a lease to register service
  782. retryTimes: 30 # retry times when session sending etcd requests
  783. locks:
  784. metrics:
  785. enable: false # whether gather statistics for metrics locks
  786. threshold:
  787. info: 500 # minimum milliseconds for printing durations in info level
  788. warn: 1000 # minimum milliseconds for printing durations in warn level
  789. storage:
  790. scheme: s3
  791. enablev2: false
  792. # Whether to disable the internal time messaging mechanism for the system.
  793. # If disabled (set to false), the system will not allow DML operations, including insertion, deletion, queries, and searches.
  794. # This helps Milvus-CDC synchronize incremental data
  795. ttMsgEnabled: true
  796. traceLogMode: 0 # trace request info
  797. bloomFilterSize: 100000 # bloom filter initial size
  798. bloomFilterType: BlockedBloomFilter # bloom filter type, support BasicBloomFilter and BlockedBloomFilter
  799. maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter
  800. bloomFilterApplyBatchSize: 1000 # batch size when to apply pk to bloom filter
  801. usePartitionKeyAsClusteringKey: false # if true, do clustering compaction and segment prune on partition key field
  802. useVectorAsClusteringKey: false # if true, do clustering compaction and segment prune on vector field
  803. enableVectorClusteringKey: false # if true, enable vector clustering key and vector clustering compaction
  804. # QuotaConfig, configurations of Milvus quota and limits.
  805. # By default, we enable:
  806. # 1. TT protection;
  807. # 2. Memory protection.
  808. # 3. Disk quota protection.
  809. # You can enable:
  810. # 1. DML throughput limitation;
  811. # 2. DDL, DQL qps/rps limitation;
  812. # 3. DQL Queue length/latency protection;
  813. # 4. DQL result rate protection;
  814. # If necessary, you can also manually force to deny RW requests.
  815. quotaAndLimits:
  816. enabled: true # `true` to enable quota and limits, `false` to disable.
  817. # quotaCenterCollectInterval is the time interval that quotaCenter
  818. # collects metrics from Proxies, Query cluster and Data cluster.
  819. # seconds, (0 ~ 65536)
  820. quotaCenterCollectInterval: 3
  821. limits:
  822. allocRetryTimes: 15 # retry times when delete alloc forward data from rate limit failed
  823. allocWaitInterval: 1000 # retry wait duration when delete alloc forward data rate failed, in millisecond
  824. complexDeleteLimitEnable: false # whether complex delete check forward data by limiter
  825. maxCollectionNum: 65536
  826. maxCollectionNumPerDB: 65536 # Maximum number of collections per database.
  827. maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit
  828. maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes
  829. maxGroupSize: 10 # maximum size for one single group when doing search group by
  830. ddl:
  831. enabled: false # Whether DDL request throttling is enabled.
  832. # Maximum number of collection-related DDL requests per second.
  833. # Setting this item to 10 indicates that Milvus processes no more than 10 collection-related DDL requests per second, including collection creation requests, collection drop requests, collection load requests, and collection release requests.
  834. # To use this setting, set quotaAndLimits.ddl.enabled to true at the same time.
  835. collectionRate: -1
  836. # Maximum number of partition-related DDL requests per second.
  837. # Setting this item to 10 indicates that Milvus processes no more than 10 partition-related requests per second, including partition creation requests, partition drop requests, partition load requests, and partition release requests.
  838. # To use this setting, set quotaAndLimits.ddl.enabled to true at the same time.
  839. partitionRate: -1
  840. db:
  841. collectionRate: -1 # qps of db level , default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection
  842. partitionRate: -1 # qps of db level, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition
  843. indexRate:
  844. enabled: false # Whether index-related request throttling is enabled.
  845. # Maximum number of index-related requests per second.
  846. # Setting this item to 10 indicates that Milvus processes no more than 10 partition-related requests per second, including index creation requests and index drop requests.
  847. # To use this setting, set quotaAndLimits.indexRate.enabled to true at the same time.
  848. max: -1
  849. db:
  850. max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex
  851. flushRate:
  852. enabled: true # Whether flush request throttling is enabled.
  853. # Maximum number of flush requests per second.
  854. # Setting this item to 10 indicates that Milvus processes no more than 10 flush requests per second.
  855. # To use this setting, set quotaAndLimits.flushRate.enabled to true at the same time.
  856. max: -1
  857. collection:
  858. max: 0.1 # qps, default no limit, rate for flush at collection level.
  859. db:
  860. max: -1 # qps of db level, default no limit, rate for flush
  861. compactionRate:
  862. enabled: false # Whether manual compaction request throttling is enabled.
  863. # Maximum number of manual-compaction requests per second.
  864. # Setting this item to 10 indicates that Milvus processes no more than 10 manual-compaction requests per second.
  865. # To use this setting, set quotaAndLimits.compaction.enabled to true at the same time.
  866. max: -1
  867. db:
  868. max: -1 # qps of db level, default no limit, rate for manualCompaction
  869. dml:
  870. enabled: false # Whether DML request throttling is enabled.
  871. insertRate:
  872. # Highest data insertion rate per second.
  873. # Setting this item to 5 indicates that Milvus only allows data insertion at the rate of 5 MB/s.
  874. # To use this setting, set quotaAndLimits.dml.enabled to true at the same time.
  875. max: -1
  876. db:
  877. max: -1 # MB/s, default no limit
  878. collection:
  879. # Highest data insertion rate per collection per second.
  880. # Setting this item to 5 indicates that Milvus only allows data insertion to any collection at the rate of 5 MB/s.
  881. # To use this setting, set quotaAndLimits.dml.enabled to true at the same time.
  882. max: -1
  883. partition:
  884. max: -1 # MB/s, default no limit
  885. upsertRate:
  886. max: -1 # MB/s, default no limit
  887. db:
  888. max: -1 # MB/s, default no limit
  889. collection:
  890. max: -1 # MB/s, default no limit
  891. partition:
  892. max: -1 # MB/s, default no limit
  893. deleteRate:
  894. # Highest data deletion rate per second.
  895. # Setting this item to 0.1 indicates that Milvus only allows data deletion at the rate of 0.1 MB/s.
  896. # To use this setting, set quotaAndLimits.dml.enabled to true at the same time.
  897. max: -1
  898. db:
  899. max: -1 # MB/s, default no limit
  900. collection:
  901. # Highest data deletion rate per second.
  902. # Setting this item to 0.1 indicates that Milvus only allows data deletion from any collection at the rate of 0.1 MB/s.
  903. # To use this setting, set quotaAndLimits.dml.enabled to true at the same time.
  904. max: -1
  905. partition:
  906. max: -1 # MB/s, default no limit
  907. bulkLoadRate:
  908. max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate
  909. db:
  910. max: -1 # MB/s, default no limit, not support yet. TODO: limit db bulkLoad rate
  911. collection:
  912. max: -1 # MB/s, default no limit, not support yet. TODO: limit collection bulkLoad rate
  913. partition:
  914. max: -1 # MB/s, default no limit, not support yet. TODO: limit partition bulkLoad rate
  915. dql:
  916. enabled: false # Whether DQL request throttling is enabled.
  917. searchRate:
  918. # Maximum number of vectors to search per second.
  919. # Setting this item to 100 indicates that Milvus only allows searching 100 vectors per second no matter whether these 100 vectors are all in one search or scattered across multiple searches.
  920. # To use this setting, set quotaAndLimits.dql.enabled to true at the same time.
  921. max: -1
  922. db:
  923. max: -1 # vps (vectors per second), default no limit
  924. collection:
  925. # Maximum number of vectors to search per collection per second.
  926. # Setting this item to 100 indicates that Milvus only allows searching 100 vectors per second per collection no matter whether these 100 vectors are all in one search or scattered across multiple searches.
  927. # To use this setting, set quotaAndLimits.dql.enabled to true at the same time.
  928. max: -1
  929. partition:
  930. max: -1 # vps (vectors per second), default no limit
  931. queryRate:
  932. # Maximum number of queries per second.
  933. # Setting this item to 100 indicates that Milvus only allows 100 queries per second.
  934. # To use this setting, set quotaAndLimits.dql.enabled to true at the same time.
  935. max: -1
  936. db:
  937. max: -1 # qps, default no limit
  938. collection:
  939. # Maximum number of queries per collection per second.
  940. # Setting this item to 100 indicates that Milvus only allows 100 queries per collection per second.
  941. # To use this setting, set quotaAndLimits.dql.enabled to true at the same time.
  942. max: -1
  943. partition:
  944. max: -1 # qps, default no limit
  945. limitWriting:
  946. # forceDeny false means dml requests are allowed (except for some
  947. # specific conditions, such as memory of nodes to water marker), true means always reject all dml requests.
  948. forceDeny: false
  949. ttProtection:
  950. enabled: false
  951. # maxTimeTickDelay indicates the backpressure for DML Operations.
  952. # DML rates would be reduced according to the ratio of time tick delay to maxTimeTickDelay,
  953. # if time tick delay is greater than maxTimeTickDelay, all DML requests would be rejected.
  954. # seconds
  955. maxTimeTickDelay: 300
  956. memProtection:
  957. # When memory usage > memoryHighWaterLevel, all dml requests would be rejected;
  958. # When memoryLowWaterLevel < memory usage < memoryHighWaterLevel, reduce the dml rate;
  959. # When memory usage < memoryLowWaterLevel, no action.
  960. enabled: true
  961. dataNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in DataNodes
  962. dataNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in DataNodes
  963. queryNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in QueryNodes
  964. queryNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in QueryNodes
  965. growingSegmentsSizeProtection:
  966. # No action will be taken if the growing segments size is less than the low watermark.
  967. # When the growing segments size exceeds the low watermark, the dml rate will be reduced,
  968. # but the rate will not be lower than minRateRatio * dmlRate.
  969. enabled: false
  970. minRateRatio: 0.5
  971. lowWaterLevel: 0.2
  972. highWaterLevel: 0.4
  973. diskProtection:
  974. enabled: true # When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected;
  975. diskQuota: -1 # MB, (0, +inf), default no limit
  976. diskQuotaPerDB: -1 # MB, (0, +inf), default no limit
  977. diskQuotaPerCollection: -1 # MB, (0, +inf), default no limit
  978. diskQuotaPerPartition: -1 # MB, (0, +inf), default no limit
  979. l0SegmentsRowCountProtection:
  980. enabled: false # switch to enable l0 segment row count quota
  981. lowWaterLevel: 30000000 # l0 segment row count quota, low water level
  982. highWaterLevel: 50000000 # l0 segment row count quota, high water level
  983. deleteBufferRowCountProtection:
  984. enabled: false # switch to enable delete buffer row count quota
  985. lowWaterLevel: 32768 # delete buffer row count quota, low water level
  986. highWaterLevel: 65536 # delete buffer row count quota, high water level
  987. deleteBufferSizeProtection:
  988. enabled: false # switch to enable delete buffer size quota
  989. lowWaterLevel: 134217728 # delete buffer size quota, low water level
  990. highWaterLevel: 268435456 # delete buffer size quota, high water level
  991. limitReading:
  992. # forceDeny false means dql requests are allowed (except for some
  993. # specific conditions, such as collection has been dropped), true means always reject all dql requests.
  994. forceDeny: false
  995. trace:
  996. # trace exporter type, default is stdout,
  997. # optional values: ['noop','stdout', 'jaeger', 'otlp']
  998. exporter: noop
  999. # fraction of traceID based sampler,
  1000. # optional values: [0, 1]
  1001. # Fractions >= 1 will always sample. Fractions < 0 are treated as zero.
  1002. sampleFraction: 0
  1003. jaeger:
  1004. url: # when exporter is jaeger should set the jaeger's URL
  1005. otlp:
  1006. endpoint: # example: "127.0.0.1:4317" for grpc, "127.0.0.1:4318" for http
  1007. method: # otlp export method, acceptable values: ["grpc", "http"], using "grpc" by default
  1008. secure: true
  1009. initTimeoutSeconds: 10 # segcore initialization timeout in seconds, preventing otlp grpc hangs forever
  1010. #when using GPU indexing, Milvus will utilize a memory pool to avoid frequent memory allocation and deallocation.
  1011. #here, you can set the size of the memory occupied by the memory pool, with the unit being MB.
  1012. #note that there is a possibility of Milvus crashing when the actual memory demand exceeds the value set by maxMemSize.
  1013. #if initMemSize and MaxMemSize both set zero,
  1014. #milvus will automatically initialize half of the available GPU memory,
  1015. #maxMemSize will the whole available GPU memory.
  1016. gpu:
  1017. initMemSize: 2048 # Gpu Memory Pool init size
  1018. maxMemSize: 4096 # Gpu Memory Pool Max size
  1019. # Any configuration related to the streaming node server.
  1020. streamingNode:
  1021. ip: # TCP/IP address of streamingNode. If not specified, use the first unicastable address
  1022. port: 22222 # TCP port of streamingNode
  1023. grpc:
  1024. serverMaxSendSize: 268435456 # The maximum size of each RPC request that the streamingNode can send, unit: byte
  1025. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the streamingNode can receive, unit: byte
  1026. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on streamingNode can send, unit: byte
  1027. clientMaxRecvSize: 268435456 # The maximum size of each RPC request that the clients on streamingNode can receive, unit: byte
  1028. # Any configuration related to the streaming service.
  1029. streaming:
  1030. walBalancer:
  1031. # The interval of balance task trigger at background, 1 min by default.
  1032. # It's ok to set it into duration string, such as 30s or 1m30s, see time.ParseDuration
  1033. triggerInterval: 1m
  1034. # The initial interval of balance task trigger backoff, 50 ms by default.
  1035. # It's ok to set it into duration string, such as 30s or 1m30s, see time.ParseDuration
  1036. backoffInitialInterval: 50ms
  1037. backoffMultiplier: 2 # The multiplier of balance task trigger backoff, 2 by default
  1038. txn:
  1039. defaultKeepaliveTimeout: 10s # The default keepalive timeout for wal txn, 10s by default