`
duwei118
  • 浏览: 3720 次
  • 性别: Icon_minigender_1
  • 来自: 北京
最近访客 更多访客>>
社区版块
存档分类
最新评论

MongoDB:4. Index

阅读更多

MongoDB:4. Index

优化python文档indexmongodb

摘要:MongoDB 提供了多样性的索引支持。 for ( var i=0;i30;i++){ ...u={name: user +i, ...age:20+i, ...contact:{ ...address:[ address1_ +i, address2_ +i], ...postcode:100000+i, ...} ...}; ...db.users.insert(u); ...} 索引信息被保存在 system
MongoDB 提供了多样性的索引支持。
> for (var i = 0; i < 30; i++) { 
...     u = { name : "user" + i, 
...           age : 20 + i, 
...           contact : { 
...              address : ["address1_" + i, "address2_" + i], 
...              postcode : 100000 + i, 
...           } 
...     }; 
...     db.users.insert(u); 
... }
索引信息被保存在 system.indexes 中,且默认总是为 _id 创建索引。
> show collections 
system.indexes 
users 

> db.system.indexes.find() 
{ "name" : "_id_", "ns" : "blog.users", "key" : { "_id" : 1 } }
1. ensureIndex / dropIndex / reIndex
使用 ensureIndex 创建索引,dropIndex() 删除索引,dropIndexes() 删除全部索引(不包括 _id 等系统索引)。
> db.users.ensureIndex({name:1}) 
> db.users.ensureIndex({age:1}) 

> db.system.indexes.find() 
{ "name" : "_id_", "ns" : "blog.users", "key" : { "_id" : 1 } } 
{ "_id" : ObjectId("4c4a...b798"), "ns" : "blog.users", "key" : { "name" : 1 }, "name" : "name_1" } 
{ "_id" : ObjectId("4c4a...b799"), "ns" : "blog.users", "key" : { "age" : 1 }, "name" : "age_1" } 

> db.users.dropIndex({age:1}) 
{ "nIndexesWas" : 3, "ok" : true } 

> db.users.dropIndexes() 

        "nIndexesWas" : 2, 
        "msg" : "non-_id indexes dropped for collection", 
        "ok" : true


> db.system.indexes.find() 
{ "name" : "_id_", "ns" : "blog.users", "key" : { "_id" : 1 } }
reIndex 则是重建索引。
> db.users.ensureIndex({name:1}) 
> db.users.ensureIndex({age:1}) 

> db.system.indexes.find() 
{ "name" : "_id_", "ns" : "blog.users", "key" : { "_id" : 1 } } 
{ "_id" : ObjectId("4c4a...b82a"), "ns" : "blog.users", "key" : { "name" : 1 }, "name" : "name_1" } 
{ "_id" : ObjectId("4c4a...b82b"), "ns" : "blog.users", "key" : { "age" : 1 }, "name" : "age_1" } 

> db.users.reIndex() 

        "nIndexesWas" : 3, 
        "msg" : "indexes dropped for collection", 
        "ok" : 1, 
        "nIndexes" : 3, 
        "indexes" : [ 
                { 
                        "name" : "_id_", 
                        "ns" : "blog.users", 
                        "key" : { 
                                "_id" : 1 
                        } 
                }, 
                { 
                        "_id" : ObjectId("4c4a...b82a"), 
                        "ns" : "blog.users", 
                        "key" : { 
                                "name" : 1 
                        }, 
                        "name" : "name_1"
                }, 
                { 
                        "_id" : ObjectId("4c4a...b82b"), 
                        "ns" : "blog.users", 
                        "key" : { 
                                "age" : 1 
                        }, 
                        "name" : "age_1"
                } 
        ], 
        "ok" : 1 
}
当系统已有大量数据时,创建索引就是个非常耗时的活,我们可以在后台执行。
> db.users.dropIndexes() 

        "nIndexesWas" : 3, 
        "msg" : "non-_id indexes dropped for collection", 
        "ok" : true


> db.users.ensureIndex({name:1}, {backgroud:true}) 

> db.users.reIndex({backgroud:true}) 

        "nIndexesWas" : 2, 
        "msg" : "indexes dropped for collection", 
        "ok" : 1, 
        "nIndexes" : 2, 
        "indexes" : [ 
                { 
                        "name" : "_id_", 
                        "ns" : "blog.users", 
                        "key" : { 
                                "_id" : 1 
                        } 
                }, 
                { 
                        "_id" : ObjectId("4c4a...b79c"), 
                        "ns" : "blog.users", 
                        "key" : { 
                                "name" : 1 
                        }, 
                        "name" : "name_1", 
                        "backgroud" : true
                } 
        ], 
        "ok" : 1 
}
2. explain
MongoDB 提供了一个 explain 命令让我们获知系统如何处理查询请求。
> db.users.ensureIndex({name:1}) 
> db.users.ensureIndex({age:1}) 

> db.users.find({age:{$gt:45}}, {name:1, age:1}) 
{ "_id" : ObjectId("4c4a8edeeb257107735eb826"), "name" : "user26", "age" : 46 } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb827"), "name" : "user27", "age" : 47 } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb828"), "name" : "user28", "age" : 48 } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb829"), "name" : "user29", "age" : 49 } 

> db.users.find({age:{$gt:45}}, {name:1, age:1}).explain() 

        "cursor" : "BtreeCursor age_1", 
        "nscanned" : 5, 
        "nscannedObjects" : 4, 
        "n" : 4, 
        "millis" : 0, 
        "indexBounds" : [ 
                [ 
                        { 
                                "age" : 45 
                        }, 
                        { 
                                "age" : 1.7976931348623157e+308 
                        } 
                ] 
        ] 
}
返回结果信息包括:
•cursor: 返回游标类型(BasicCursor 或 BtreeCursor)。
•nscanned: 被扫描的文档数量。
•n: 返回的文档数量。
•millis: 耗时(毫秒)。
•indexBounds: 所使用的索引。
利用 explain 命令,我们可以很好地观察系统如何使用索引来加快检索,同时可以针对性优化索引。
3. Embedded Keys Index
我们可以创建深层索引,甚至直接用文档(sub-document)作为索引键。
> db.users.ensureIndex({"contact.postcode":1}) 

> db.users.find({"contact.postcode":{$lt:100009}}, {name:1, "contact.postcode":1}) 
{ "_id" : ObjectId("4c4a8edeeb257107735eb80c"), "name" : "user0", "contact" : { "postcode" : 100000 } } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb80d"), "name" : "user1", "contact" : { "postcode" : 100001 } } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb80e"), "name" : "user2", "contact" : { "postcode" : 100002 } } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb80f"), "name" : "user3", "contact" : { "postcode" : 100003 } } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb810"), "name" : "user4", "contact" : { "postcode" : 100004 } } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb811"), "name" : "user5", "contact" : { "postcode" : 100005 } } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb812"), "name" : "user6", "contact" : { "postcode" : 100006 } } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb813"), "name" : "user7", "contact" : { "postcode" : 100007 } } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb814"), "name" : "user8", "contact" : { "postcode" : 100008 } } 

> db.users.find({"contact.postcode":{$lt:100009}}, {name:1, "contact.postcode":1}).explain() 

        "cursor" : "BtreeCursor contact.postcode_1", 
        "nscanned" : 10, 
        "nscannedObjects" : 9, 
        "n" : 9, 
        "millis" : 0, 
        "indexBounds" : [ 
                [ 
                        { 
                                "contact.postcode" : -1.7976931348623157e+308 
                        }, 
                        { 
                                "contact.postcode" : 100009 
                        } 
                ] 
        ] 
}
我们直接用 contact 创建索引,查找其下属性时可使用该索引,但需注意语法。
(附注: 在 1.5.4 mongo 中,一直无法使用 contact:{postcode:xxx} 这样的 SubObject 语法查询数据,只能用 "contact.postcode" DotNotation 语法)
> db.users.dropIndex({"contact.postcode":1}) 
{ "nIndexesWas" : 4, "ok" : true } 

> db.users.ensureIndex({contact:1}) 

> db.system.indexes.find() 
{ "name" : "_id_", "ns" : "blog.users", "key" : { "_id" : 1 } } 
{ "_id" : ObjectId("4c4a...b82a"), "ns" : "blog.users", "key" : { "name" : 1 }, "name" : "name_1" } 
{ "_id" : ObjectId("4c4a...b82b"), "ns" : "blog.users", "key" : { "age" : 1 }, "name" : "age_1" } 
{ "_id" : ObjectId("4c4a...b82d"), "ns" : "blog.users", "key" : { "contact" : 1 }, "name" : "contact_1" } 

> db.users.find({contact:{postcode:{$lt:100009}}}).explain() 

        "cursor" : "BtreeCursor contact_1", 
        "nscanned" : 0, 
        "nscannedObjects" : 0, 
        "n" : 0, 
        "millis" : 0, 
        "indexBounds" : [ 
                [ 
                        { 
                                "contact" : { 
                                        "postcode" : { 
                                                "$lt" : 100009 
                                        } 
                                } 
                        }, 
                        { 
                                "contact" : { 
                                        "postcode" : { 
                                                "$lt" : 100009 
                                        } 
                                } 
                        } 
                ] 
        ] 


> db.users.find({"contact.postcode":{$lt:100009}}).explain() // 无法使用索引 

        "cursor" : "BasicCursor", 
        "nscanned" : 30, 
        "nscannedObjects" : 30, 
        "n" : 9, 
        "millis" : 0, 
        "indexBounds" : [ ] 


> db.users.find({contact:{address:"address2_23"}}).explain() 

        "cursor" : "BtreeCursor contact_1", 
        "nscanned" : 0, 
        "nscannedObjects" : 0, 
        "n" : 0, 
        "millis" : 0, 
        "indexBounds" : [ 
                [ 
                        { 
                                "contact" : { 
                                        "address" : "address2_23"
                                } 
                        }, 
                        { 
                                "contact" : { 
                                        "address" : "address2_23"
                                } 
                        } 
                ] 
        ] 


> db.users.find({"contact.address":"address2_23"}).explain() // 无法使用索引 

        "cursor" : "BasicCursor", 
        "nscanned" : 30, 
        "nscannedObjects" : 30, 
        "n" : 1, 
        "millis" : 0, 
        "indexBounds" : [ ] 
}
同样的语法问题在 Python 中一样生效。
>>> db.users.find({"contact":{"postcode":{"$lt":100009}}}).explain() 
{u'allPlans': [{u'cursor': u'BtreeCursor contact_1', 
                u'indexBounds': [[{u'contact': {u'postcode': {u'$lt': 100009}}}, 
                                  {u'contact': {u'postcode': {u'$lt': 100009}}}]]}], 
u'cursor': u'BtreeCursor contact_1', 
u'indexBounds': [[{u'contact': {u'postcode': {u'$lt': 100009}}}, 
                   {u'contact': {u'postcode': {u'$lt': 100009}}}]], 
u'millis': 0, 
u'n': 0, 
u'nscanned': 0, 
u'nscannedObjects': 0, 
u'oldPlan': {u'cursor': u'BtreeCursor contact_1', 
              u'indexBounds': [[{u'contact': {u'postcode': {u'$lt': 100009}}}, 
                                {u'contact': {u'postcode': {u'$lt': 100009}}}]]}} 

>>> db.users.find({"contact.postcode":{"$lt":100009}}).explain() 
{u'allPlans': [{u'cursor': u'BasicCursor', u'indexBounds': []}], 
u'cursor': u'BasicCursor', 
u'indexBounds': [], 
u'millis': 0, 
u'n': 8, 
u'nscanned': 30, 
u'nscannedObjects': 30, 
u'oldPlan': {u'cursor': u'BasicCursor', u'indexBounds': []}}
4. Compound Keys Index
创建复合索引也很简单 (1: ascending; -1: descending)。
> db.users.ensureIndex({name:1, age:-1}) 

> db.system.indexes.find() 
{ "name" : "_id_", "ns" : "blog.users", "key" : { "_id" : 1 } } 
{ "_id" : ..., "ns" : "blog.users", "key" : { "name" : 1 }, "name" : "name_1" } 
{ "_id" : ..., "ns" : "blog.users", "key" : { "age" : 1 }, "name" : "age_1" } 
{ "_id" : ..., "ns" : "blog.users", "key" : { "contact" : 1 }, "name" : "contact_1" } 
{ "_id" : ..., "ns" : "blog.users", "key" : { "name" : 1, "age" : -1 }, "name" : "name_1_age_-1" } 

> db.users.find({age:{$lt:25}, name:"user2"}, {name:1, age:1}) 
{ "_id" : ObjectId("4c4a8edeeb257107735eb80e"), "name" : "user2", "age" : 22 } 

> db.users.find({age:{$lt:25}, name:"user2"}, {name:1, age:1}).explain() 

        "cursor" : "BtreeCursor name_1_age_-1", 
        "nscanned" : 1, 
        "nscannedObjects" : 1, 
        "n" : 1, 
        "millis" : 0, 
        "indexBounds" : [ 
                [ 
                        { 
                                "name" : "user2", 
                                "age" : 25 
                        }, 
                        { 
                                "name" : "user2", 
                                "age" : -1.7976931348623157e+308 
                        } 
                ] 
        ] 
}
复合索引同样可用于局部属性的搜索,但必须依照索引字段顺序。比如创建索引字段顺序 "a,b,c",那么仅对 "a,b,c"、"a,b"、"a" 查询有效,而对 "b,c" 之类的组合无效。
> db.users.dropIndex({name:1}) 
{ "nIndexesWas" : 5, "ok" : true } 

> db.users.dropIndex({age:1}) 
{ "nIndexesWas" : 4, "ok" : true } 

> db.users.dropIndex({contact:1}) 
{ "nIndexesWas" : 3, "ok" : true } 

> db.system.indexes.find() 
{ "name" : "_id_", "ns" : "blog.users", "key" : { "_id" : 1 } } 
{ "_id" : ObjectId("4c4a9...b82e"), "ns" : "blog.users", "key" : { "name" : 1, "age" : -1 }, "name" : "name_1_age_-1" } 

> db.users.find({name:"user12"}).explain() // 索引有效 

        "cursor" : "BtreeCursor name_1_age_-1", 
        "nscanned" : 1, 
        "nscannedObjects" : 1, 
        "n" : 1, 
        "millis" : 0, 
        "indexBounds" : [ 
                [ 
                        { 
                                "name" : "user12", 
                                "age" : { 
                                        "$maxElement" : 1 
                                } 
                        }, 
                        { 
                                "name" : "user12", 
                                "age" : { 
                                        "$minElement" : 1 
                                } 
                        } 
                ] 
        ] 


> db.users.find({age:18}).explain() // 索引无效 

        "cursor" : "BasicCursor", 
        "nscanned" : 30, 
        "nscannedObjects" : 30, 
        "n" : 0, 
        "millis" : 0, 
        "indexBounds" : [ ] 
}
5. Unique Index
只需在 ensureIndex 命令中指定 unique 即可创建唯一索引。
> db.users.ensureIndex({name:1}, {unique:true}) 

> db.system.indexes.find() 
{ "name" : "_id_", "ns" : "blog.users", "key" : { "_id" : 1 } } 
{ "_id" : ..., "ns" : "blog.users", "key" : { "name" : 1, "age" : -1 }, "name" : "name_1_age_-1" } 
{ "_id" : ..., "ns" : "blog.users", "key" : { "name" : 1 }, "name" : "name_1", "unique" : true } 

> db.users.insert({name:"user1"}) 
E11000 duplicate key error index: blog.users.$name_1  dup key: { : "user1" }
如果创建唯一索引前已经有重复文档,那么可以用 dropDups 删除多余的数据。
> db.users.dropIndexes() 

        "nIndexesWas" : 3, 
        "msg" : "non-_id indexes dropped for collection", 
        "ok" : true


> db.users.insert({name:"user1"}) 

> db.users.find({name:"user1"}, {name:1}) 
{ "_id" : ObjectId("4c4a9573eb257107735eb831"), "name" : "user1" } 
{ "_id" : ObjectId("4c4a8edeeb257107735eb80d"), "name" : "user1" } 

> db.users.ensureIndex({name:1}, {unique:true, dropDups:true}) 
E11000 duplicate key error index: blog.users.$name_1  dup key: { : "user1" } 

> db.users.find({name:"user1"}, {name:1}) 
{ "_id" : ObjectId("4c4a9573eb257107735eb831"), "name" : "user1" }
6. Multikeys
对于数组类型属性,会自动索引全部数组元素。
> db.users.dropIndexes() 

        "nIndexesWas" : 1, 
        "msg" : "non-_id indexes dropped for collection", 
        "ok" : true


> db.users.ensureIndex({"contact.address":1}) 

> db.users.find({"contact.address":"address2_13"}).explain() 

        "cursor" : "BtreeCursor contact.address_1", 
        "nscanned" : 1, 
        "nscannedObjects" : 1, 
        "n" : 1, 
        "millis" : 0, 
        "indexBounds" : [ 
                [ 
                        { 
                                "contact.address" : "address2_13"
                        }, 
                        { 
                                "contact.address" : "address2_13"
                        } 
                ] 
        ] 
}
7. hint
hint 命令可以强制使用某个索引。
> db.users.dropIndexes() 

        "nIndexesWas" : 2, 
        "msg" : "non-_id indexes dropped for collection", 
        "ok" : true


> db.users.ensureIndex({name:1, age:1}) 

> db.users.find({age:{$lt:30}}).explain() 

        "cursor" : "BasicCursor", 
        "nscanned" : 30, 
        "nscannedObjects" : 30, 
        "n" : 9, 
        "millis" : 0, 
        "indexBounds" : [ ] 


> db.users.find({age:{$lt:30}}).hint({name:1, age:1}).explain() 

        "cursor" : "BtreeCursor name_1_age_1", 
        "nscanned" : 30, 
        "nscannedObjects" : 9, 
        "n" : 9, 
        "millis" : 0, 
        "indexBounds" : [ 
                [ 
                        { 
                                "name" : { 
                                        "$minElement" : 1 
                                }, 
                                "age" : -1.7976931348623157e+308 
                        }, 
                        { 
                                "name" : { 
                                        "$maxElement" : 1 
                                }, 
                                "age" : 30 
                        } 
                ] 
        ] 
}
注意 Python 代码中写法。
>>> db.users.find({"age":{"$lt":30}}).hint([("name", ASCENDING), ("age", ASCENDING)]).explain() 
{u'cursor': u'BtreeCursor name_1_age_1', 
u'indexBounds': [[{u'age': -1.7976931348623157e+308, 
                    u'name': {u'$minElement': 1}}, 
                   {u'age': 30, u'name': {u'$maxElement': 1}}]], 
u'millis': 0, 
u'n': 9, 
u'nscanned': 30, 
u'nscannedObjects': 9}
8. totalIndexSize
MongoDB 会将索引数据载入内存,以提高查询速度。我们可以用 totalIndexSize 获取全部索引数据大小。
> db.users.totalIndexSize() 
16384

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics