Infer a probabilistic schema for a MongoDB collection.
A high-level view of the class interactions is as follows:
mongodb-schema
doesn't do anything directly with mongodb
so to try the examples we'll install the node.js driver.
As well, we'll need some data in a collection to derive the schema of.
Make sure you have a mongod
running on localhost on port 27017 (or change the example accordingly). Then, do:
npm install mongodb mongodb-schema
mongo --eval "db.test.insert([{_id: 1, a: true}, {_id: 2, a: 'true'}, {_id: 3, a: 1}, {_id: 4}])" localhost:27017/test
- Create a new file
parse-schema.js
and paste in the following code:
var parseSchema = require('mongodb-schema');
var connect = require('mongodb');
connect('mongodb://localhost:27017/test', function(err, db){
if(err) return console.error(err);
parseSchema('test.test', db.collection('test').find(), function(err, schema){
if(err) return console.error(err);
console.log(JSON.stringify(schema, null, 2));
db.close();
});
});
- When we run the above with
node parse-schema.js
, we'll see something like the following (some fields not present here for clarity):
{
"count": 4, // parsed 4 documents
"ns": "test.test", // namespace
"fields": [ // an array of Field objects, @see `./lib/field.js`
{
"name": "_id",
"count": 4, // 4 documents counted with _id
"type": "Number", // the type of _id is `Number`
"probability": 1, // all documents had an _id field
"unique": 4, // 4 unique values found
"has_duplicates": false, // therefore no duplicates
"types": [ // an array of Type objects, @see `./lib/types/`
{
"name": "Number", // name of the type
"count": 4, // 4 numbers counted
"probability": 1,
"unique": 4,
"values": [ // array of encountered values
1,
2,
3,
4
]
}
]
},
{
"name": "a",
"count": 3, // only 3 documents with field `a` counted
"probability": 0.75, // hence probability 0.75
"type": [ // found these types
"Boolean",
"String",
"Number",
"Undefined" // for convenience, we treat Undefined as its own type
],
"unique": 3,
"has_duplicates": false, // there were no duplicate values
"types": [
{
"name": "Boolean",
"count": 1,
"probability": 0.25, // probabilities for types are calculated factoring in Undefined
"unique": 1,
"values": [
true
]
},
{
"name": "String",
"count": 1,
"probability": 0.25,
"unique": 1,
"values": [
"true"
]
},
{
"name": "Number",
"count": 1,
"probability": 0.25,
"unique": 1,
"values": [
1
]
},
{
"name": "Undefined",
"count": 1,
"probability": 0.25,
"unique": 0
}
]
}
]
}
mongodb-schema
supports all BSON types.
Checkout the tests for more usage examples.
npm install --save mongodb-schema
npm test
Apache 2.0
Under the hood, mongodb-schema
uses ampersand-state and
ampersand-collection for modeling Schema, Field's, and Type's.