【02】把 Elasticsearch 当数据库使:过滤和排序

2028 查看

使用 https://github.com/taowen/es-monitor 可以用 SQL 进行 elasticsearch 的查询。本章介绍简单的文档过滤条件

exchange='nyse'

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select * from symbol where exchange='nyse' limit 1
EOF
{"sector": "n/a", "_type": "symbol", "market_cap": 4469064, "name": "3D Systems Corporation", "exchange": "nyse", "symbol": "DDD", "last_sale": 807, "_index": "symbol", "ipo_year": null, "_id": "AVLqbUjEQ3iIyyVFLQgV", "industry": "Technology"}

Elasticsearch

{
  "query": {
    "term": {
      "exchange": "nyse"
    }
  }, 
  "size": 1
}
{
  "hits": {
    "hits": [
      {
        "_score": 1.728313, 
        "_type": "symbol", 
        "_id": "AVLqbUjEQ3iIyyVFLQgV", 
        "_source": {
          "sector": "n/a", 
          "market_cap": 4469064, 
          "name": "3D Systems Corporation", 
          "exchange": "nyse", 
          "symbol": "DDD", 
          "last_sale": 807, 
          "ipo_year": null, 
          "industry": "Technology"
        }, 
        "_index": "symbol"
      }
    ], 
    "total": 3240, 
    "max_score": 1.728313
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 6, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "TermQuery",
        "lucene": "exchange:nyse",
        "time": "2.336230000ms",
        "breakdown": {
          "score": 470159,
          "create_weight": 417360,
          "next_doc": 786055,
          "match": 0,
          "build_scorer": 662656,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 3566,
    "collector": [
      {
        "name": "SimpleTopScoreDocCollector",
        "reason": "search_top_hits",
        "time": "1.561495000ms"
      }
    ]
  }
]

exchange='nyse' AND sector='Technology'

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select * from symbol where exchange='nyse' and sector='Technology' limit 1
EOF
{"sector": "Technology", "_type": "symbol", "market_cap": 4469064, "name": "3D Systems Corporation", "exchange": "nyse", "symbol": "DDD", "last_sale": 807, "_index": "symbol", "ipo_year": null, "_id": "AVLqdtWhQ3iIyyVFLSJQ", "industry": "Computer Software: Prepackaged Software"}

Elasticsearch

{
  "query": {
    "bool": {
      "filter": [
        {
          "term": {
            "exchange": "nyse"
          }
        }, 
        {
          "term": {
            "sector": "Technology"
          }
        }
      ]
    }
  }, 
  "size": 1
}
{
  "hits": {
    "hits": [
      {
        "_score": 0.0, 
        "_type": "symbol", 
        "_id": "AVLqdtWhQ3iIyyVFLSJQ", 
        "_source": {
          "sector": "Technology", 
          "market_cap": 4469064, 
          "name": "3D Systems Corporation", 
          "exchange": "nyse", 
          "symbol": "DDD", 
          "last_sale": 807, 
          "ipo_year": null, 
          "industry": "Computer Software: Prepackaged Software"
        }, 
        "_index": "symbol"
      }
    ], 
    "total": 186, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 5, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "BooleanQuery",
        "lucene": "#exchange:nyse #sector:Technology",
        "time": "3.999703000ms",
        "breakdown": {
          "score": 54437,
          "create_weight": 413531,
          "next_doc": 1213355,
          "match": 0,
          "build_scorer": 736504,
          "advance": 0
        },
        "children": [
          {
            "query_type": "TermQuery",
            "lucene": "exchange:nyse",
            "time": "1.175638000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 186745,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 616840,
              "advance": 372053
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "sector:Technology",
            "time": "0.4062380000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 125321,
              "next_doc": 115632,
              "match": 0,
              "build_scorer": 45136,
              "advance": 120149
            }
          }
        ]
      }
    ],
    "rewrite_time": 23008,
    "collector": [
      {
        "name": "SimpleTopScoreDocCollector",
        "reason": "search_top_hits",
        "time": "0.1031780000ms"
      }
    ]
  }
]

last_sale > 985

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select * from symbol where last_sale > 985 limit 1
EOF
{"sector": "Finance", "_type": "symbol", "market_cap": 42010000, "name": "1347 Capital Corp.", "exchange": "nasdaq", "symbol": "TFSCU", "last_sale": 1005, "_index": "symbol", "ipo_year": 2014, "_id": "AVLqdtWdQ3iIyyVFLRZB", "industry": "Business Services"}

Elasticsearch

{
  "query": {
    "range": {
      "last_sale": {
        "gt": 985.0
      }
    }
  }, 
  "size": 1
}
{
  "hits": {
    "hits": [
      {
        "_score": 1.0, 
        "_type": "symbol", 
        "_id": "AVLqdtWdQ3iIyyVFLRZB", 
        "_source": {
          "sector": "Finance", 
          "market_cap": 42010000, 
          "name": "1347 Capital Corp.", 
          "exchange": "nasdaq", 
          "symbol": "TFSCU", 
          "last_sale": 1005, 
          "ipo_year": 2014, 
          "industry": "Business Services"
        }, 
        "_index": "symbol"
      }
    ], 
    "total": 4285, 
    "max_score": 1.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 2, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "MultiTermQueryConstantScoreWrapper",
        "lucene": "last_sale:{985 TO *]",
        "time": "4.684710000ms",
        "breakdown": {
          "score": 423966,
          "create_weight": 21384,
          "next_doc": 469032,
          "match": 0,
          "build_scorer": 3770328,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 10493,
    "collector": [
      {
        "name": "SimpleTopScoreDocCollector",
        "reason": "search_top_hits",
        "time": "1.261998000ms"
      }
    ]
  }
]

last_sale != 985

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select * from symbol where last_sale != 985 limit 1
EOF
{"sector": "Finance", "_type": "symbol", "market_cap": null, "name": "1347 Capital Corp.", "exchange": "nasdaq", "symbol": "TFSCR", "last_sale": 16, "_index": "symbol", "ipo_year": 2014, "_id": "AVLqdtWdQ3iIyyVFLRZA", "industry": "Business Services"}

Elasticsearch

{
  "query": {
    "bool": {
      "must_not": {
        "term": {
          "last_sale": 985
        }
      }
    }
  }, 
  "size": 1
}
{
  "hits": {
    "hits": [
      {
        "_score": 1.0, 
        "_type": "symbol", 
        "_id": "AVLqdtWdQ3iIyyVFLRZA", 
        "_source": {
          "sector": "Finance", 
          "market_cap": null, 
          "name": "1347 Capital Corp.", 
          "exchange": "nasdaq", 
          "symbol": "TFSCR", 
          "last_sale": 16, 
          "ipo_year": 2014, 
          "industry": "Business Services"
        }, 
        "_index": "symbol"
      }
    ], 
    "total": 6708, 
    "max_score": 1.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 1, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "BooleanQuery",
        "lucene": "+*:* -last_sale: Y",
        "time": "14.10871500ms",
        "breakdown": {
          "score": 3397824,
          "create_weight": 401619,
          "next_doc": 7993655,
          "match": 0,
          "build_scorer": 141022,
          "advance": 0
        },
        "children": [
          {
            "query_type": "MatchAllDocsQuery",
            "lucene": "*:*",
            "time": "1.694079000ms",
            "breakdown": {
              "score": 974401,
              "create_weight": 5175,
              "next_doc": 705676,
              "match": 0,
              "build_scorer": 8827,
              "advance": 0
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "last_sale: Y",
            "time": "0.4805160000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 342905,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 104919,
              "advance": 32692
            }
          }
        ]
      }
    ],
    "rewrite_time": 32619,
    "collector": [
      {
        "name": "SimpleTopScoreDocCollector",
        "reason": "search_top_hits",
        "time": "4.604778000ms"
      }
    ]
  }
]

exchange='nyse' AND NOT sector='Technology'

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select * from symbol where exchange='nyse' AND NOT sector='Technology' limit 1
EOF
{"sector": "Health Care", "_type": "symbol", "market_cap": 3865437245, "name": "3M Company", "exchange": "nyse", "symbol": "MMM", "last_sale": 15244, "_index": "symbol", "ipo_year": null, "_id": "AVLqdtWhQ3iIyyVFLSJR", "industry": "Medical/Dental Instruments"}

Elasticsearch

{
  "query": {
    "bool": {
      "filter": [
        {
          "term": {
            "exchange": "nyse"
          }
        }
      ], 
      "must_not": [
        {
          "term": {
            "sector": "Technology"
          }
        }
      ]
    }
  }, 
  "size": 1
}
{
  "hits": {
    "hits": [
      {
        "_score": 0.0, 
        "_type": "symbol", 
        "_id": "AVLqdtWhQ3iIyyVFLSJR", 
        "_source": {
          "sector": "Health Care", 
          "market_cap": 3865437245, 
          "name": "3M Company", 
          "exchange": "nyse", 
          "symbol": "MMM", 
          "last_sale": 15244, 
          "ipo_year": null, 
          "industry": "Medical/Dental Instruments"
        }, 
        "_index": "symbol"
      }
    ], 
    "total": 3054, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 5, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "BooleanQuery",
        "lucene": "#exchange:nyse -sector:Technology",
        "time": "3.078346000ms",
        "breakdown": {
          "score": 164364,
          "create_weight": 216094,
          "next_doc": 1872817,
          "match": 0,
          "build_scorer": 114716,
          "advance": 0
        },
        "children": [
          {
            "query_type": "TermQuery",
            "lucene": "exchange:nyse",
            "time": "0.4679310000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 119897,
              "next_doc": 290703,
              "match": 0,
              "build_scorer": 57331,
              "advance": 0
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "sector:Technology",
            "time": "0.2424240000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 62239,
              "next_doc": 0,
              "match": 0,
              "build_scorer": 32978,
              "advance": 147207
            }
          }
        ]
      }
    ],
    "rewrite_time": 19262,
    "collector": [
      {
        "name": "SimpleTopScoreDocCollector",
        "reason": "search_top_hits",
        "time": "0.5360760000ms"
      }
    ]
  }
]

exchange='nyse' OR NOT sector='Technology'

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select * from symbol where exchange='nyse' OR NOT sector='Technology' limit 1
EOF
{"sector": "Health Care", "_type": "symbol", "market_cap": 3865437245, "name": "3M Company", "exchange": "nyse", "symbol": "MMM", "last_sale": 15244, "_index": "symbol", "ipo_year": null, "_id": "AVLqdtWhQ3iIyyVFLSJR", "industry": "Medical/Dental Instruments"}

Elasticsearch

{
  "query": {
    "bool": {
      "should": [
        {
          "term": {
            "exchange": "nyse"
          }
        }, 
        {
          "bool": {
            "must_not": [
              {
                "term": {
                  "sector": "Technology"
                }
              }
            ]
          }
        }
      ]
    }
  }, 
  "size": 1
}
{
  "hits": {
    "hits": [
      {
        "_score": 0.99838185, 
        "_type": "symbol", 
        "_id": "AVLqdtWhQ3iIyyVFLSJR", 
        "_source": {
          "sector": "Health Care", 
          "market_cap": 3865437245, 
          "name": "3M Company", 
          "exchange": "nyse", 
          "symbol": "MMM", 
          "last_sale": 15244, 
          "ipo_year": null, 
          "industry": "Medical/Dental Instruments"
        }, 
        "_index": "symbol"
      }
    ], 
    "total": 6245, 
    "max_score": 0.99838185
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 9, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "BooleanQuery",
        "lucene": "exchange:nyse (-sector:Technology +*:*)",
        "time": "21.24846700ms",
        "breakdown": {
          "score": 4197895,
          "create_weight": 815882,
          "next_doc": 5963510,
          "match": 0,
          "build_scorer": 3902610,
          "advance": 0
        },
        "children": [
          {
            "query_type": "TermQuery",
            "lucene": "exchange:nyse",
            "time": "1.099460000ms",
            "breakdown": {
              "score": 219612,
              "create_weight": 529960,
              "next_doc": 266196,
              "match": 0,
              "build_scorer": 83692,
              "advance": 0
            }
          },
          {
            "query_type": "BooleanQuery",
            "lucene": "-sector:Technology +*:*",
            "time": "5.269110000ms",
            "breakdown": {
              "score": 1221522,
              "create_weight": 209290,
              "next_doc": 1693148,
              "match": 0,
              "build_scorer": 521166,
              "advance": 0
            },
            "children": [
              {
                "query_type": "TermQuery",
                "lucene": "sector:Technology",
                "time": "0.6576720000ms",
                "breakdown": {
                  "score": 0,
                  "create_weight": 141704,
                  "next_doc": 0,
                  "match": 0,
                  "build_scorer": 274371,
                  "advance": 241597
                }
              },
              {
                "query_type": "MatchAllDocsQuery",
                "lucene": "*:*",
                "time": "0.9663120000ms",
                "breakdown": {
                  "score": 441766,
                  "create_weight": 5317,
                  "next_doc": 508525,
                  "match": 0,
                  "build_scorer": 10704,
                  "advance": 0
                }
              }
            ]
          }
        ]
      }
    ],
    "rewrite_time": 61389,
    "collector": [
      {
        "name": "SimpleTopScoreDocCollector",
        "reason": "search_top_hits",
        "time": "4.847758000ms"
      }
    ]
  }
]

date > now() - INTERVAL '60 DAYS'

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select * from quote where "date" > now() - interval '30 DAYS' limit 1
EOF

这个 now() 和 interval 的语法是和postgresql一致的(http://www.postgresql.org/docs/9.1/static/functions-datetime.html

{"volume": 118900, "_type": "quote", "_index": "quote", "symbol": "AMRS", "adj_close": 147, "high": 149, "low": 138, "date": "2016-02-16", "close": 147, "_id": "AMRS-2016-02-16", "open": 140}

Elasticsearch

{
  "query": {
    "range": {
      "date": {
        "gt": 1453212988000
      }
    }
  }, 
  "size": 1
}

查询是和普通的大于一样的。

symbol LIKE 'AAP%'

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select * from symbol WHERE symbol LIKE 'AAP%'
EOF
{"sector": "Technology", "_type": "symbol", "market_cap": 522690000000, "name": "Apple Inc.", "exchange": "nasdaq", "symbol": "AAPL", "last_sale": 9427, "_index": "symbol", "ipo_year": 1980, "_id": "AVL0FqFJog4u0JP7lzka", "industry": "Computer Manufacturing"}
{"sector": "Finance", "_type": "symbol", "market_cap": 105960000, "name": "Atlantic Alliance Partnership Corp.", "exchange": "nasdaq", "symbol": "AAPC", "last_sale": 1019, "_index": "symbol", "ipo_year": 2015, "_id": "AVL0FqFJog4u0JP7lzlf", "industry": "Business Services"}
{"sector": "Consumer Services", "_type": "symbol", "market_cap": 148395246, "name": "Advance Auto Parts Inc", "exchange": "nyse", "symbol": "AAP", "last_sale": 13857, "_index": "symbol", "ipo_year": null, "_id": "AVL0FqFQog4u0JP7l0R7", "industry": "Other Specialty Stores"}

Elasticsearch

{
  "query": {
    "wildcard": {
      "symbol": "AAP*"
    }
  }
}
{
  "hits": {
    "hits": [
      {
        "_score": 1.0, 
        "_type": "symbol", 
        "_id": "AVL0FqFJog4u0JP7lzka", 
        "_source": {
          "sector": "Technology", 
          "market_cap": 522690000000, 
          "name": "Apple Inc.", 
          "exchange": "nasdaq", 
          "symbol": "AAPL", 
          "last_sale": 9427, 
          "ipo_year": 1980, 
          "industry": "Computer Manufacturing"
        }, 
        "_index": "symbol"
      }, 
      {
        "_score": 1.0, 
        "_type": "symbol", 
        "_id": "AVL0FqFJog4u0JP7lzlf", 
        "_source": {
          "sector": "Finance", 
          "market_cap": 105960000, 
          "name": "Atlantic Alliance Partnership Corp.", 
          "exchange": "nasdaq", 
          "symbol": "AAPC", 
          "last_sale": 1019, 
          "ipo_year": 2015, 
          "industry": "Business Services"
        }, 
        "_index": "symbol"
      }, 
      {
        "_score": 1.0, 
        "_type": "symbol", 
        "_id": "AVL0FqFQog4u0JP7l0R7", 
        "_source": {
          "sector": "Consumer Services", 
          "market_cap": 148395246, 
          "name": "Advance Auto Parts Inc", 
          "exchange": "nyse", 
          "symbol": "AAP", 
          "last_sale": 13857, 
          "ipo_year": null, 
          "industry": "Other Specialty Stores"
        }, 
        "_index": "symbol"
      }
    ], 
    "total": 3, 
    "max_score": 1.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 29, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "MultiTermQueryConstantScoreWrapper",
        "lucene": "symbol:AAP*",
        "time": "5.018644000ms",
        "breakdown": {
          "score": 3186,
          "create_weight": 34308,
          "next_doc": 31760,
          "match": 0,
          "build_scorer": 4949390,
          "advance": 0
        }
      },
      {
        "query_type": "BooleanQuery",
        "lucene": "symbol:AAP symbol:AAPC symbol:AAPL",
        "time": "4.731495000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 205709,
          "next_doc": 26459,
          "match": 0,
          "build_scorer": 4299185,
          "advance": 0
        },
        "children": [
          {
            "query_type": "TermQuery",
            "lucene": "symbol:AAP",
            "time": "0.09350600000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 15337,
              "next_doc": 4021,
              "match": 0,
              "build_scorer": 74148,
              "advance": 0
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "symbol:AAPC",
            "time": "0.07045600000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 6874,
              "next_doc": 1225,
              "match": 0,
              "build_scorer": 62357,
              "advance": 0
            }
          },
          {
            "query_type": "TermQuery",
            "lucene": "symbol:AAPL",
            "time": "0.03618000000ms",
            "breakdown": {
              "score": 0,
              "create_weight": 4934,
              "next_doc": 1253,
              "match": 0,
              "build_scorer": 29993,
              "advance": 0
            }
          }
        ]
      }
    ],
    "rewrite_time": 25703,
    "collector": [
      {
        "name": "SimpleTopScoreDocCollector",
        "reason": "search_top_hits",
        "time": "0.04072600000ms"
      }
    ]
  }
]
  }
]

从profile结果可以看出,所谓的LIKE查询,其实不是一个个去查文档。而是先从字典表里把这个字段的所有的符合LIKE条件的term查出来,然后去查满足这些term的文档。把LIKE变成一堆=条件的OR来处理的。

ORDER BY name

SQL

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
select * from symbol order by name limit 1
EOF
{"sector": "n/a", "_type": "symbol", "market_cap": 72460000, "name": "iShares 0-5 Year Investment Grade Corporate Bond ETF", "exchange": "nasdaq", "symbol": "SLQD", "last_sale": 4997, "_index": "symbol", "ipo_year": null, "_id": "AVLqdtWfQ3iIyyVFLRwf", "industry": "n/a"}

Elasticsearch

{
  "sort": [
    {
      "name": "asc"
    }
  ], 
  "size": 1
}
{
  "hits": {
    "hits": [
      {
        "sort": [
          "0"
        ], 
        "_type": "symbol", 
        "_source": {
          "sector": "n/a", 
          "market_cap": 72460000, 
          "name": "iShares 0-5 Year Investment Grade Corporate Bond ETF", 
          "exchange": "nasdaq", 
          "symbol": "SLQD", 
          "last_sale": 4997, 
          "ipo_year": null, 
          "industry": "n/a"
        }, 
        "_score": null, 
        "_index": "symbol", 
        "_id": "AVLqdtWfQ3iIyyVFLRwf"
      }
    ], 
    "total": 6714, 
    "max_score": null
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 4, 
  "timed_out": false
}

Profile

[
  {
    "query": [
      {
        "query_type": "MatchAllDocsQuery",
        "lucene": "*:*",
        "time": "1.379354000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 32506,
          "next_doc": 850386,
          "match": 0,
          "build_scorer": 496462,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 18167,
    "collector": [
      {
        "name": "SimpleFieldCollector",
        "reason": "search_top_hits",
        "time": "1.225077000ms"
      }
    ]
  }
]