Elasticsearch Commands Reference
Elasticsearch Commands Reference
# Contents
# 1. JSON Documents
# a. Create
# b. Read
# - Basic searches
# - Intermediate searches
# c. Update
# d. Delete
# - Deleting documents
# 3. Mappings
# 4. Analyzers
# This is the Kibana Dev tools console, we'll use this to interact with Elasticsearch
"name" : "Elastic",
"location" : {
"state" : "Co",
"zipcode" : 80006
"name" : "Elastic",
...
<field> : <value>
# And each value must be one of 6 types to be valid JSON (string, number, object, array, boolean, null)
# http://www.json.org/
# We'll use restaurant food safety violations from the City of San Francisco, let's index one
POST /inspections/_doc
"business_latitude": "37.793698",
"business_location": {
"type": "Point",
"coordinates": [
-122.403984,
37.793698
},
"business_longitude": "-122.403984",
"business_postal_code": "94111",
"business_state": "CA",
"inspection_date": "2016-02-04T00:00:00.000",
"inspection_id": "2228_20160204",
"inspection_type": "Routine",
"inspection_score":96,
"violation_id": "2228_20160204_103142"
# See the structure of the JSON document, there is a geopoint, dates, and numbers
GET /inspections/_search
# We'll dive deeper into the search API soon, for now, let's focus on indexing documents
# A lot just happened, let's discuss
# Elasticsearch uses a REST API, and it matters whether we use POST vs PUT
PUT /inspections/_doc
"business_id": "2228",
"business_latitude": "37.793698",
"business_location": {
"type": "Point",
"coordinates": [
-122.403984,
37.793698
},
"business_longitude": "-122.403984",
"business_postal_code": "94111",
"business_state": "CA",
"inspection_date": "2016-02-04T00:00:00.000",
"inspection_id": "2228_20160204",
"inspection_type": "Routine",
"inspection_score":96,
"violation_id": "2228_20160204_103142"
}
POST /inspections/_doc
"business_id": "2228",
"business_latitude": "37.793698",
"business_location": {
"type": "Point",
"coordinates": [
-122.403984,
37.793698
},
"business_longitude": "-122.403984",
"business_postal_code": "94111",
"business_state": "CA",
"inspection_date": "2016-02-04T00:00:00.000",
"inspection_id": "2228_20160204",
"inspection_type": "Routine",
"inspection_score":96,
"violation_id": "2228_20160204_103142"
}
# We can also specify it with PUT
PUT /inspections/_doc/12345
"business_id": "2228",
"business_latitude": "37.793698",
"business_location": {
"type": "Point",
"coordinates": [
-122.403984,
37.793698
},
"business_longitude": "-122.403984",
"business_postal_code": "94111",
"business_state": "CA",
"inspection_date": "2016-02-04T00:00:00.000",
"inspection_id": "2228_20160204",
"inspection_type": "Routine",
"inspection_score":96,
"violation_id": "2228_20160204_103142"
}
# Indexing the document automatically created the index for us, named "inspection"
# It is recommeneded to store only one type per index, as multiple types per index will not be supported
in the future
# Instead of dynamically creating the index based on the first document we add, we can create the index
beforehand, to set certain settings
DELETE /inspections
PUT /inspections
"settings": {
"index.number_of_shards": 1,
"index.number_of_replicas": 0
# We'll use 1 shard for this example, and no replicas, we probably wouldn't want to do this in production
# When you need to index a lot of docs, you should use the bulk API, you may see signficant
performance benefits
POST /inspections/_bulk
{ "index": { "_id": 1 }}
{ "index": { "_id": 2 }}
{ "index": { "_id": 3 }}
{ "index": { "_id": 4 }}
{ "index": { "_id": 5 }}
{ "index": { "_id": 6 }}
#__________________________________________________
GET /inspections/_search
#__________________________________________________
# Let's find all inspection reports for places that sell soup
GET /inspections/_search
"query": {
"match": {
"business_name": "soup"
GET /inspections/_search
{
"query": {
"match_phrase": {
GET /inspections/_search
"query": {
"match": {
"business_name": "soup"
#__________________________________________________
# Let's find all docs with "soup" and "san francisco" in the business name
GET /inspections/_search
{
"query": {
"bool": {
"must": [
"match": {
"business_name": "soup"
},
"match_phrase": {
# Or negate parts of a query, businesses without "soup" in the name (maybe you hate soup)
GET /inspections/_search
"query": {
"bool": {
"must_not": [
"match": {
"business_name": "soup"
}
#__________________________________________________
GET /inspections/_search
"query": {
"bool": {
"should": [
"match_phrase": {
"business_name": {
"query": "soup",
"boost" : 3
},
"match_phrase": {
"business_name": {
}
}
GET /inspections/_search
"query" : {
"match": {
"business_name": "soup"
},
"highlight": {
"fields": {
"business_name": {}
#__________________________________________________
# Finally, we can perform filtering, when we don't need text analysis (or need to do exact matches,
range queries, etc.)
"query": {
"range": {
"inspection_score": {
"gte": 80
},
"sort": [
{ "inspection_score" : "desc" }
POST /_sql?format=txt
# We won't have time to cover aggregation in depth now, but we want to get you familiar with
# Let's search for the term "soup", and bucket results by health score (similar to the facets you would
see in an ebay site)
# Show:
https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2380057.m570.l1313.TR12.TRC2.A0.H0.Xwatc
h.TRS0&_nkw=watch&_sacat=0
GET /inspections/_search
"query": {
"match": {
"business_name": "soup"
,"aggregations" : {
"inspection_score" : {
"range" : {
"field" : "inspection_score",
"ranges" : [
{
"key" : "0-80",
"from" : 0,
"to" : 80
},
"key" : "81-90",
"from" : 81,
"to" : 90
},
"key" : "91-100",
"from" : 91,
"to" : 100
GET /inspections/_search
"query": {
},
"sort": [
"_geo_distance": {
"coordinates": {
"lat": 37.783527,
"lon": -122.409061
},
"order": "asc",
"unit": "km"
# Mapping are helpful for defining the structure of our document, and more efficiently storing/searching
the data within our index
# We have numbers/dates/strings, and geopoints, let's see what elasticsearch thinks our mapping is
GET /inspections/_mapping
# Let's change the mapping, delete our index, and perform our bulk import again
# In production scenarios, you may prefer to use the reindex API, you can add new mapping fields
without needing to migrate the data
DELETE inspections
PUT /inspections
PUT inspections/_mapping/
"properties": {
"business_address": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"business_city": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"business_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"business_latitude": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"coordinates": {
"type": "geo_point"
},
"business_longitude": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"business_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"business_phone_number": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"business_postal_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"business_state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"inspection_date": {
"type": "date"
},
"inspection_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"inspection_score": {
"type": "long"
},
"inspection_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"risk_category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"violation_description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"violation_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
GET /inspections/_search
"query": {
},
"sort": [
"_geo_distance": {
"business_location": {
"lat": 37.800175,
"lon": -122.409081
},
"order": "asc",
"unit": "km",
"distance_type": "plane"
# That was a very short introduction to geo queries and mappings, the goal was to get your feet wet to
hopefuly go off and learn more
# Let's finish the CRUD components, we covered C, and R, let's show show to update and delete
documents
# Let's add a flagged field to one of our documents, using a partial document update
GET /inspections/_search
POST /inspections/_doc/5/_update
"doc" : {
"flagged" : true,
"views": 0
# To delete a document, we can just pass the document id to the DELETE API
DELETE /inspections/_doc/5
# - Analyzers
# As you saw a mapping configuration for data types in the previous example, you can also configure an
analyzer per field or an entire index!
"tokenizer": "standard",
GET /inspections/_analyze
"tokenizer": "whitespace",
GET /inspections/_analyze
"tokenizer": "standard",
GET /inspections/_analyze
"tokenizer": "standard",
"filter": ["lowercase"],
"tokenizer": "standard",
PUT _index_template/my_dynamic_index
"index_patterns": [
"my_dynamic_index-*"
],
"template": {
"mappings":{
"dynamic": "runtime",
"properties": {
"timestamp": {
"type": "date",
"format": "yyyy-MM-dd"
},
"response_code": {
"type": "integer"
#The data we’ve ingested has three fields: timestamp, #response code, and new_tla. In the past,
new_tla #wouldn’t have been added because it wasn’t defined in #the index template. Now it’s just
treated as a runtime #field.
POST my_dynamic_index-1/_bulk
{"index": {}}
{"index": {}}
{"index": {}}
{"index": {}}
{"index": {}}
{"index": {}}
#Here we’re running a normal search query for new_tla. A #query can also be run with both an indexed
field like #response_code and a runtime field like new_tla.
GET my_dynamic_index-1/_search
{
"query": {
"match": {
"new_tla": "data-1"