使用 https://github.com/taowen/es-monitor 可以用 SQL 进行 elasticsearch 的查询。前面我们了解到在聚合之前可以做很多种变换,然后把变换之后的key再去分桶。这种变换的终极形式就是自定义表达式,当然自定义表达式的效率也是最低的。
GROUP BY ipo_year % AS ipo_year_rem
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
SELECT ipo_year_rem, COUNT(*) FROM symbol GROUP BY ipo_year % 5 AS ipo_year_rem
EOF
{"COUNT(*)": 715, "ipo_year_rem": 4.0}
{"COUNT(*)": 677, "ipo_year_rem": 0.0}
{"COUNT(*)": 537, "ipo_year_rem": 2.0}
{"COUNT(*)": 523, "ipo_year_rem": 3.0}
{"COUNT(*)": 446, "ipo_year_rem": 1.0}
Elasticsearch
{
"aggs": {
"ipo_year_rem": {
"terms": {
"field": "ipo_year",
"size": 0,
"script": {
"lang": "expression",
"inline": "_value % 5"
}
},
"aggs": {}
}
},
"size": 0
}
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 2,
"aggregations": {
"ipo_year_rem": {
"buckets": [
{
"key": 4.0,
"doc_count": 715
},
{
"key": 0.0,
"doc_count": 677
},
{
"key": 2.0,
"doc_count": 537
},
{
"key": 3.0,
"doc_count": 523
},
{
"key": 1.0,
"doc_count": 446
}
],
"sum_other_doc_count": 0,
"doc_count_error_upper_bound": 0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "1.046876000ms",
"breakdown": {
"score": 0,
"create_weight": 16550,
"next_doc": 835828,
"match": 0,
"build_scorer": 194498,
"advance": 0
}
}
],
"rewrite_time": 8693,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "2.994827000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.3955230000ms"
},
{
"name": "DoubleTermsAggregator: [ipo_year_rem]",
"reason": "aggregation",
"time": "0.9664500000ms"
}
]
}
]
}
]
GROUP BY floor(market_cap / last_sale)
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200
SELECT shares_count, COUNT(*) FROM symbol GROUP BY floor(market_cap / last_sale / 1000000) AS shares_count ORDER BY shares_count LIMIT 3
EOF
{"shares_count": "0.0", "COUNT(*)": 6007}
{"shares_count": "1.0", "COUNT(*)": 328}
{"shares_count": "10.0", "COUNT(*)": 6}
Elasticsearch
{
"aggs": {
"shares_count": {
"terms": {
"size": 3,
"order": {
"_term": "asc"
},
"script": {
"lang": "expression",
"inline": "floor(doc['market_cap'].value / doc['last_sale'].value / 1000000)"
}
},
"aggs": {}
}
},
"size": 0
}
如果引用了多个字段,则无法使用_value,而只能用doc['market_cap'].value 这样来引用字段。
{
"hits": {
"hits": [],
"total": 6714,
"max_score": 0.0
},
"_shards": {
"successful": 1,
"failed": 0,
"total": 1
},
"took": 6,
"aggregations": {
"shares_count": {
"buckets": [
{
"key": "0.0",
"doc_count": 6007
},
{
"key": "1.0",
"doc_count": 328
},
{
"key": "10.0",
"doc_count": 6
}
],
"sum_other_doc_count": 373,
"doc_count_error_upper_bound": 0
}
},
"timed_out": false
}
Profile
[
{
"query": [
{
"query_type": "MatchAllDocsQuery",
"lucene": "*:*",
"time": "0.5422700000ms",
"breakdown": {
"score": 0,
"create_weight": 14630,
"next_doc": 475299,
"match": 0,
"build_scorer": 52341,
"advance": 0
}
}
],
"rewrite_time": 5085,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "7.627612000ms",
"children": [
{
"name": "TotalHitCountCollector",
"reason": "search_count",
"time": "0.4945110000ms"
},
{
"name": "StringTermsAggregator: [shares_count]",
"reason": "aggregation",
"time": "5.193886000ms"
}
]
}
]
}
]
可以看到这里使用的是 StringTermsAggregator,这说明了计算结果是用字符串来排序的。