docker 启动
参考:https://www.elastic.co/guide/en/elasticsearch/reference/5.1/docker.html#docker-cli-run-prod-mode
查询语句:https://n3xtchen.github.io/n3xtchen/elasticsearch/2017/07/05/elasticsearch-23-useful-query-example
mac系统配置参数:
screen ~/Library/Containers/com.docker.docker/Data/com.docker.driver.amd64-linux/tty
# or screen ~/Library/Containers/com.docker.docker/Data/vms/0/tty
# find ~/Library/Containers/com.docker.docker/Data/ -name 'tty'
sysctl -w vm.max_map_count=262144
Exit by Control-A Control-
删除部分container
增加一个行yam文件
docker-compose -f docker-compose-production.yml down -v # 同时删除volum
批量操作
一般建议是1000-5000个文档,如果你的文档很大,可以适当减少队列,大小建议是5-15MB,默认不能超过100M
logstash
input file error path.data
删除 data/.lock 文件
doe logstash ./bin/logstash -f custom_file/event_app.conf --path.data=path_data
doe logstash ./bin/logstash --debug -f custom_file/event_app.conf --path.data=path_data
不停止logstash,更新配置文件 kill -HUP pid (pid 用 top 查看)
同步 mysql 到elasticsearch
把jdbc docker cp 到 /usr/share/logstash/logstash-core/lib/jars
同步mongo monstache
doe monstache monstache -f ./custome_files/mongo_es.toml -verbose
# 前提条件,mongo开启副本集,用户可以访问local数据库,
mongo-url = "mongodb://192.168.10.47:27017,192.168.10.47:27018/?replicaSet=my-mongo-rs"
elasticsearch-urls = ["http://192.168.10.47:9200"]
elasticsearch-user = ""
elasticsearch-password = ""
# 存储monstache元信息的数据库名称
config-database-name = "mongoache"
# monstache集群名称
cluster-name = "mongo-es"
# 删除策略 0: 删除 1: 弃用 2: 忽略删除动作
#delete-strategy = 0
#delete-index-pattern = "mydb"
# 如果需要全量同步多个集合, 该参数控制是否同时同步, 如果是1会一个一个同步
direct-read-concur = 1
# 需同步的集合
namespace-regex = '^jieloan.api_order_data'
# 需全量同步的集合, 第一次运行时必须打开, 以后运行时必须关闭, 否则每次都做全量同步
direct-read-namespaces = ["jieloan.api_order_data"]
# 需增量同步的集合, change-stream只有在3.6后才能使用
# change-stream-namespaces = ["jieloan.api_order_data"]
# mongo3.6之前的版本需开启, 3.6版本后添加了change-stream的功能
enable-oplog = true
# 全量同步时, 设置no cursor timeout
direct-read-no-timeout = true
# 全量同步时,把集合分成n个range, 由n个go routines去处理, 增加该参数会增加连接数和monstache的内存, 会提高吞吐量, -1则是使用一个cursor去迭代所有数据
direct-read-split-max = 8
# 如果设为true, 那么只做全量同步, 不同步更新
disable-change-events = false
# 是否同步数据库或集合删除事件
dropped-databases = true
dropped-collections = true
# 发送到es的请求失败了, 是否重试
elasticsearch-retry = false
# 连接es的连接池, 控制bulk的并发数量
elasticsearch-max-conns = 8
# 当文档数达到多少时, 发送一次bulk请求, 该参数一般不推荐使用
elasticsearch-max-docs = -1
# 当connection buffer达到多少字节时, 发送一次bulk, 默认8M, 该参数能有效提高性能, 增加该参数意味着消耗更多内存
#elasticsearch-max-bytes =
# 是否使用easy-json处理序列化
enable-easy-json = false
# 全量同步完后立刻退出, 该参数一般用于定时任务
exit-after-direct-reads = false
# 当收到失败的事件时, 是否立刻退出程序
fail-fast = false
gzip = true
# 开启该参数才能与mysql的数据一起index
index-as-update = true
index-oplog-time = false
# 是否在es中创建index来保存统计信息
index-stats = true
mongo-oplog-database-name = "local"
mongo-oplog-collection-name = "oplog.rs"
prune-invalid-json = true
# deprecated
replay = false
# 如果开启该参数, monstache会把已经处理的事件的事件写入mongodb, 如果monstache意外中断,那么下次启动时,会从该时间点同步, 如果设置cluster-name,该参数字段开启
resume = true
#resume-from-timestamp = 0
# use a custom resume strategy (tokens) instead of the default strategy (timestamps)
# tokens work with MongoDB API 3.6+ while timestamps work only with MongoDB API 4.0+
#resume-strategy = 0
enable-http-server = true
http-server-addr = "0.0.0.0:8080"
[[mapping]]
namespace = "jieloan.api_order_data"
index = "loan_apiorder"
搜索引擎 衡量 infomation retrieval
- precision (查准率) - 尽可能返回较少的无关文档
2.recall (查全率) - 尽量返回较多的相关文档
3.ranking - 能否按照相关度进行排序
kibana 高亮highlight有[1000000]限制,且会减慢查询速度
关闭高亮显示 doc_table:highlight
-
在es同步大量数据时可以通过对索引增加分片来提升写的能力,之后到了稳定的增量阶段可以通过合并分片来提升查询性能。
-
可以考虑关闭动态索引
put _cluster/settings
{
"persistent": {
"action.auto_create_index": false
}
}
通过模版设置白名单
put _cluster/settings
{
"persistent": {
"action.auto_create_index": "logstash-*, .kibana*"
}
}
查询日志入ES
经纬度查询
GET loan_apiorder/_search
{
"query": {
"bool": {
"must": [
{
"geo_distance":{
"distance": "500m",
"location":{
"lat":31,
"lon":121
}
}
}
]
}
}
}
watcher
{
"_index" : ".watches",
"_type" : "_doc",
"_id" : "test3",
"_version" : 2954,
"_seq_no" : 1452147,
"_primary_term" : 10,
"found" : true,
"_source" : {
"trigger" : {
"schedule" : {
"interval" : "1m"
}
},
"input" : {
"search" : {
"request" : {
"search_type" : "query_then_fetch",
"indices" : [
"email_sent*"
],
"rest_total_hits_as_int" : true,
"body" : {
"size" : 0,
"query" : {
"bool" : {
"must" : [
{
"range" : {
"datetime" : {
"gt" : "now-10m",
"lt" : "now"
}
}
},
{
"match" : {
"subject" : "micros"
}
}
]
}
}
}
}
}
},
"condition" : {
"compare" : {
"ctx.payload.hits.total" : {
"gte" : 100
}
}
},
"actions" : {
"my_webhook" : {
"throttle_period_in_millis" : 600000,
"webhook" : {
"scheme" : "http",
"host" : "i1.celtgame.com",
"port" : 80,
"method" : "post",
"path" : "/es_alert",
"params" : { },
"headers" : { },
"body" : """
{
"msgtype": "text",
"text": {
"content": "过去10分钟, 微服务报错: {{ctx.payload.hits.total}}条"
}
}
"""
}
},
"index_payload" : {
"transform" : {
"script" : {
"source" : "return [ 'scheduled_time' : ctx.trigger.scheduled_time , 'watch_id': ctx.watch_id , 'ts': System.currentTimeMillis()/1000 ]",
"lang" : "painless"
}
},
"index" : {
"index" : "watch_alert"
}
}
},
"status" : {
"state" : {
"active" : true,
"timestamp" : "2020-04-23T03:27:43.143Z"
},
"actions" : {
"index_payload" : {
"ack" : {
"timestamp" : "2020-04-24T00:42:43.206Z",
"state" : "awaits_successful_execution"
},
"last_execution" : {
"timestamp" : "2020-04-24T00:41:43.159Z",
"successful" : true
},
"last_successful_execution" : {
"timestamp" : "2020-04-24T00:41:43.159Z",
"successful" : true
}
},
"my_webhook" : {
"ack" : {
"timestamp" : "2020-04-24T00:42:43.206Z",
"state" : "awaits_successful_execution"
},
"last_execution" : {
"timestamp" : "2020-04-24T00:40:43.516Z",
"successful" : true
},
"last_successful_execution" : {
"timestamp" : "2020-04-24T00:40:43.516Z",
"successful" : true
},
"last_throttle" : {
"reason" : "throttling interval is set to [10m] but time elapsed since last execution is [59.6s]",
"timestamp" : "2020-04-24T00:41:43.159Z"
}
}
},
"headers" : {
"_xpack_security_authentication" : "49itAwALemh1LnhpYW9sZWkGCndyaXRlcl9kZXYNa2liYW5hX3N5c3RlbQ9tb25pdG9yaW5nX3VzZXIWbWFjaGluZV9sZWFybmluZ19hZG1pbg13YXRjaGVyX2FkbWluDmNsdXN0ZXJfbWFuYWdlCgABCnpodXhpYW9sZWkBAAEAJWVzLWNuLXYwaDFlMmx2MDAwMnJjZ2VoLTI4M2JjN2U2LTAwMDIHbmF0aXZlMQZuYXRpdmUAAAoA"
},
"version" : -1,
"last_checked" : "2020-04-24T00:57:43.283Z",
"execution_state" : "execution_not_needed",
"last_met_condition" : "2020-04-24T00:41:43.159Z"
}
}
}
-
常见返回错误
-
修正数据
POST event_user/_update_by_query?pipeline=cid_20181
{
"query": {
"bool": {
"must": [
{
"range": {
"datatime": {
"gte": "2020-08-18",
"time_zone": "+08:00",
"format": "yyyy-MM-dd"
}
}
},
{
"terms": {
"token.number": [
"198513907208"
]
}
},
{
"term": {
"token.pd_created": {
"value": true
}
}
},
{
"term": {
"action": {
"value": "user_login"
}
}
}
]
}
}
}
PUT _ingest/pipeline/cid_20181
{
"processors": [
{
"script": {
"source": """
ctx.token.cid=20181;
ctx.token.channel_title="fq-duanxinlahuo01_cpc_sj_20181";
ctx.token.channel_abbr="乐享借短信拉活";
"""
}
}
]
}
本文摘自 :https://www.cnblogs.com/