ES

ES(Elasticsearch)

核心概念

集群相关

倒排索引

倒排索引源于实际应用中需要根据属性的值来查找记录。这种索引表中的每一项都包括一个属性值和包含该属性值的各个记录地址。由于不是根据记录来确定属性,而是根据属性来确定记录的位置,所以称之为倒排索引。

安装

tar -zxvf elasticsearch-...
# 集群名称,默认是elasticsearch
cluster.name: elasticsearch
# es节点名称
node.name: es-node0
# data数据保存地址
path.data: /usr/local/es/data
# 日志数据保存地址
path.logs: /usr/local/es/logs
# 绑定es网络IP
network.host: 0.0.0.0
# 端口号
http.port: 9200
# 集群节点
cluster.initial_master_nodes: ["es-node0"]
useradd esuser
chown -R esuser:esuser /usr/local/es

基本操作

索引操作

PUT /index
{
    "settings": {
        "index": {
            "number_of_shards: "2",
            "number_of_replicas": "0"
        }
    }
}

索引的mappings映射

PUT /index
{
    "mappings": {
        "properties: {
            "realname": {
                "type": "text",
                "index":true
            },
            "username": {
                "type":"keyword",
                "index":false
            }
        }
    }
}
GET /index_mapping/_analyze
{
    "field":"realname",
    "text":"java is good"
}
# 注:某个属性一旦被建立就不能修改了,但是可以新增额外属性
POST /index/_mapping
{
    "properties": {
        "id":{
            "type":""long
        },
        "age":{
            "type":"integer"
        }
    }
}

添加文档

POST /index/_doc/1 ->{索引名}/_doc/{索引id}(es中的id,非记录id)
{
    "id":1001,
    "name":"es",
    "desc":"elasticsearch"
}

删除文档

文档删除不是立即删除,文档还是保存在磁盘上,索引增长越来越多才会把那些曾经标识过删除的进行清理,从磁盘上移出去

DELETE index/_doc/1

修改文档

POST /index/_doc/1/_update
{
    "doc": {
        "name":"update"
    }
}
PUT /index/_doc/1
{
    "id":100,
    "name":"swap",
    "desc":"swap update"
}

文档基本查询

GET /index/_doc/1
GET /index/_doc/_search
GET /index/_doc/1?_source=id,name
GET /index/_doc/_search?_source=id,name
POST /index/_doc/{_id}/_update?if_seq_no={数值}&if_primary_term={数值}
* 版本元数据
* if_seq_no:文档版本号,作用同_version
* if_primary_term:文档所在位置

分词

把文本转为一个一个的单词,称之为分词(analysis)。es默认只对英文语句做分词,中文不支持,每个中文都会被拆分为独立的个体

dsl搜索

入门语法

GET /index/_doc/_search?q=desc:test
GET /index/_doc/_search?q=nickname:test&q=age:25
GET /index/_doc/_search?q=nickname:this is test

查询所有

GET /index/_doc/_search
或者
POST /index/_doc/_search
{
    "query":{
        "match_all":{}
    },
    "_source":["id","nickname","age"]
}
可以添加from和size
"from":5,
"size":10

term与match

POST /index/_doc/_search
{
    "query":{
        "term":{
            "desc":"test"
        }
    }
}
# 多个词语匹配搜索
POST /index/_doc/_search
{
    "query":{
        "terms":{
            "desc":["test1","test2","test3"]
        }
    }
}
POST /index/_doc/_search
{
    "query":{
        "match":{
            ""desc:"test"
        }
    }
}

match_phrase

# slop允许词语跳过的数量
POST /index/_doc/_search
{
    "query":{
        "match_phrase":{
            "desc":{
                "query":"test1 test testfor",
                "slop":2
            }
        }
    }
}

match(operator)/ids

# or搜索内容分词后,只要存在一个词语匹配就展示结果(operator默认为or)
# and搜索内容分词后,都要满足词语匹配
POST /index/_doc/_search
{
    "query":{
        "match":{
            "desc":{
                "query":"test",
                "operator":"or"
            }
        }
    }
}
# minimum_should_match最低匹配精度,至少有[分词后的词语个数]*百分百,
得出值取整;也可以设置具体的值,表示个数
{
    "query":{
        "match":{
            "desc":{
                "query":"test",
                "minimum_should_match":"60%"
            }
        }
    }
}
POST /index/_doc/_search
{
    "query":{
        "ids":{
            "type":"_doc",
            "values":["1001","1002","1003"]
        }
    }
}

multi_match/boost

POST /index/_dodc/_search
{
    "query":{
        "multi_match":{
            "query":"perter test a test",
            "fields":["desc","nickname"]
        }
    }
}
# nickname^10代表搜索提高10倍相关性
{
    "query":{
        "multi_match":{
            "query":"perter test a test",
            "fields":["desc","nickname^10"]
        }
    }
}

布尔查询

POST /index/_doc/_search
{
    "query"{
        "bool":{
            "must":[
                "multi_match":{
                    "query":"test",
                    "fields":["desc","nickname"]
                },
                "term":{
                    "sex":1
                }
            ]
        }
    }
}

POST /index/_doc/_search
{
    "query"{
        "bool":{
            "should":[
                "multi_match":{
                    "query":"test",
                    "fields":["desc","nickname"]
                },
                "term":{
                    "sex":1
                }
            ]
        }
    }
}
POST /index/_doc/_search
{
    "query"{
        "bool":{
            "should":[
                "multi_match":{
                    "query":"test",
                    "fields":["desc","nickname"],
                    "boost":18
                },
                "term":{
                    "sex":1,
                    "boost":2
                }
            ]
        }
    }

过滤器

POST /index/_doc/_search
{
    "query":{
        "match":{
            "desc":"test"
        }
    },
    "post_filter":{
        "range":{
            "money":{
                "gt":60.
                "lt":1000
            }
        }
    }
}

排序

POST /index/_doc/_search
{
    "query":{
        "match":{
            "desc":"test"
        }
    },
    "sort":[
        {
            "age":"desc"
        },
        {
            "money":"asc"
        }
    ]
}
# 对文本排序
# 由于文本会被分词,这时排序需要为字段附加一个额外属性
# 创建索引
POST /index/_mapping
{
    "properties":{
        "id":{
            "type":"long"
        },
        "nickname":{
            "type":"text",
            "analyzer":"ik_max_word",
            "fields":{
                "keyword":{
                    "type":"keyword"
                }
            }
        }
    }
}
# 文本排序
{
    "sort":[
        {
            "nickname.keyword":"desc"
        }
    ]
}

高亮

POST /index/_doc/_search
{
    "query":{
        "match":{
            "desc":"test"
        }
    },
    "highlight":{
        "pre_tags":["<tag>"],
        "post_tags":["</tag>"],
        "fields":{
            "desc":{}
        }
    }
}

prefix&fuzzy&wildcard

POST /index/_doc/_search
{
    "query":{
        "prefix":{
            "desc":"tes"
        }
    }
}
POST /index/_doc/_search
{
    "query":{
        "fuzzy":{
            "desc":"tev"
        }
    }
}
# 多字段
{
    "query":{
        "multi_match":{
            "fields":["desc","nickname"],
            "query":"test",
            "fuzziness":"AUTO"
        }
    }
}
{
    "query":{
        "multi_match":{
            "fields":["desc","nickname"],
            "query":"test",
            "fuzziness":"1"
        }
    }
}
POST /index/_doc/_search
{
    "query":{
        "wildcard":{
            "desc":"*t?"
        }
    }
}

其它

深度分页

# 通过设置index.max_result_window来突破10000数据
GET /index/_settings

PUT /index/_settings
{
    "index.max_result_window":"20000"
}
# scroll=1m,相当于是一个session会话时间,搜索保持的上下文时间为1分钟
POST /index/_search?scroll=1m
{
    "query":{
        "match_all":{}
    },
    "sort":["_doc"],
    "size":5
}
POST /_search/scroll
{
    "scroll":"1m",
    "scroll_id":"上一次scroll_id"
}

批量操作bulk

# 批量操作类型,新增、删除或修改
# \n是每行结尾必须填写的一个规范包括最后一行
{action:{metadata}}\n
# 请求body,增加和修改操作需要,删除则不需要
{request body}\n
{action:{metadata}}\n
{request body}\n
...
# create新增文档数据,在metadata中指定index和type
POST /_bulk
{"create":{"_index":"index","_type":"_doc","_id":"2001"}}
{"id":"2001","nickname":"name2001"}
{"create":{"_index":"index","_type":"_doc","_id":"2002"}}
{"id":"2002","nickname":"name2002"}
{"create":{"_index":"index","_type":"_doc","_id":"2003"}}
{"id":"2003","nickname":"name2003"}
# create 在url中指定index和type
POST /index/_doc/_bulk
{"create":{"_id":"2003"}}
{"id":"2003","nickname":"name2003"}
{"create":{"_id":"2004"}}
{"id":"2004","nickname":"name2004"}
{"create":{"_id":"2005"}}
{"id":"2005","nickname":"name2005"}
# index创建,已存在则覆盖,不存在则新增
POST /index/_doc/_bulk
{"index":{"_id":"2004"}}
{"id":"2004","nickname":"index2004"}
{"index":{"_id":"2006"}}
{"id":"2006","nickname":"index2006"}
# update更新部分文档数据
POST /index/_doc/_bulk
{"update":{"_id":"2004"}}
{"doc":{"id":"2021"}}
{"update":{"_id":"2006"}}
{"doc":{"nickname":"test"}}
# delete批量删除
POST /index/_doc/_bulk
{"delete":{"_id":"2004"}}
{"delete":{"_id":"2005"}}
# 综合批量操作
POST /index/_doc/_bulk
{"create":{"_id":"2005"}}
{"id":"2005","nickname":"name2005"}
{"update":{"_id":"2004"}}
{"doc":{"id":"2021"}}
{"delete":{"_id":"2004"}}

ES集群

搭建ES集群

# 修改配置elasticsearch.yml
# 配置集群名称,保证每个节点相同,如此就能处于一个集群之内
cluster.name: es-cluster-test
# 每个节点名称必须不一样
node.name: es-node1
# http端口(默认)
http.port: 9200
# 主节点,用于管理整个集群,负责创建索引或删除索引,管理其它非master节点
node.master: true
# 数据节点,用于对文档数据的增删改查
node.data: true
# 集群列表
discovery.seed_hosts: ["192.168.1.1","192.168.1.2","192.168.1.3"]
# 启动的时候使用一个master节点
cluster.initial_master_nodes: ["192.168.1.1"]

ES脑裂

ES整合spring boot

<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    <version>2.2.2.RELEASE</version>
</dependency>
spring:
  data:
    elasticsearch:
      cluster-name: es-cluster
      cluster-node: 192.168.1.1:9300
@Configuration
public class ESConfig {
    /*** 解决netty引起的issue */
    @PostConstruct
    void init() {
        System.setProperty("es.set.netty.runtime.available.processors",
            "false");
    }
}

logstash

input {
    jdbc {
        # 设置数据库url及数据库名称
        jdbc_connection_string => "jdbc:mysql://192.168.1.1:3306/test"
        # 用户名和密码
        jdbc_user => "root"
        # jdbc_password => "root"
        # 数据库驱动位置
        jdbc_driver_libraay => "/usr/local/logstash-6.4.3/sync/mysql-connector-java-5.1.41.jar"
        # 驱动类名称
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        # 开启分页
        jdbc_paging_enabled => "true"
        # 分页每页数量
        jdbc_page_size => "10000"
        # 执行的sql路径
        statement_filepath => "/usr/local/logstash-6.4.3/sync/test.sql"
        # 设置定时任务,每分钟执行一次
        schedule => "* * * * *"
        # 索引类型
        type => "_doc"
        # 开启记录上次最终的结果
        use_column_value => true
        # 记录上一次追踪的结果
        last_run_metadata_path => "/url/local/logstash-6.4.2/sync/track_time"
        # 追踪column名称
        tracking_column => "update_time"
        # 追踪column类型
        tracking_column_type => "timestamp"
        # 不清除追踪的结果
        clean_run => false
        # 数据库字段名称大写转小写
        lowercase_column_names => false
    }

    output {
        elasticsearch {
            # es地址
            hosts => ["192.168.1.1:9200"]
            # 同步索引名称
            index => "test"
            # 设置_docId和数据id相同
            document_id => "%{id}"
        }
        # 日志输出
        stdout {
            codec => json_lines
        }
    }
}
# 新增如下配置
# 定义模板名称
template_name => "ik"
# 模板所在位置
template => "/usr/local/logstash-6.4.3/sync/logstash-ik.json"
# 重写模板
template_overwrit => true
# 关闭logstash自动管理模板功能
manage_template => false

发表评论

发表
Table of Contents