当前位置: 动力学知识库 > 问答 > 编程问答 >

mapreduce - Is it possible to retrieve a 'time span' from a MongoDB query, using the timestamp within an ObjectId?

问题描述:

We have a basic enquiry management tool that we're using to track some website enquiries in our administration suite, and we're using the ObjectId of each document in our enquiries collection to sort the enquiries by the date they were added.

{

"_id" : ObjectId("53a007db144ff47be1000003"),

"comments" : "This is a test enquiry. Please ignore. We'll delete it shortly.",

"customer" : {

"name" : "Test Enquiry",

"email" : "[email protected]",

"telephone" : "07890123456",

"mobile" : "07890123456",

"quote" : false,

"valuation" : false

},

"site" : [],

"test" : true,

"updates" : [

{

"_id" : ObjectId("53a007db144ff47be1000001"),

"status" : "New",

"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),

"status_index" : 100,

"substatus" : "New Web Enquiry",

"substatus_id" : ObjectId("5396bb9fa5e6e668ffc23388"),

"notes" : "New enquiry received from website.",

},

{

"_id" : ObjectId("53a80c977d299cfe91bacf81"),

"status" : "New",

"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),

"status_index" : 100,

"substatus" : "Attempted Contact",

"substatus_id" : ObjectId("53a80e06a5e6e668ffc2339e"),

"notes" : "In this test, we pretend that we've not managed to get hold of the customer on the first attempt.",

},

{

"_id" : ObjectId("53a80e539b966b8da5c40c36"),

"status" : "Approved",

"status_id" : ObjectId("52e77a49d85e95f00ebf6c72"),

"status_index" : 200,

"substatus" : "Enquiry Confirmed",

"substatus_id" : ObjectId("53901f1ba5e6e668ffc23372"),

"notes" : "In this test, we pretend that we've got hold of the customer after failing to contact them on the first attempt.",

}

]

}

Within each enquiry is an updates array of objects which also have an ObjectId as their main identity field. We're using an $unwind and $group aggregation to pull the first and latest updates, as well as the count of updates, making sure we only take enquiries where there have been more than one update (as one is automatically inserted when the enquiry is made):

db.enquiries.aggregate([

{

$match: {

"test": true

}

},

{

$unwind: "$updates"

},

{

$group: {

"_id": "$_id",

"latest_update_id": {

$last: "$updates._id"

},

"first_update_id": {

$first: "$updates._id"

},

"update_count": {

$sum: 1

}

}

},

{

$match: {

"update_count": {

$gt: 1

}

}

}

])

This results in the following output:

{

"result" : [

{

"_id" : ObjectId("53a295ad122ea80200000005"),

"latest_update_id" : ObjectId("53a80bdc7d299cfe91bacf7e"),

"first_update_id" : ObjectId("53a295ad122ea80200000003"),

"update_count" : 2

},

{

"_id" : ObjectId("53a007db144ff47be1000003"),

"latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),

"first_update_id" : ObjectId("53a007db144ff47be1000001"),

"update_count" : 3

}

],

"ok" : 1

}

This is then passed through to our code (node.js, in this case) where we perform a few operations on it and then present some information on our dashboard.

Ideally, I'd like to add another $group pipeline aggregation to the query which would subtract the timestamp of first_update_id from the timestamp of latest_update_id to give us a timespan, which we could then use $avg on.

Can anyone tell me if this is possible? (Thank you!)

网友答案:

As Neil already pointed out, you can't get to the timestamp from the ObjectId in the aggregation framework.

You said that speed is not important, so using MapReduce you can get what you want:

var map  = function() {
    if (this.updates.length > 1) {

        var first = this.updates[0];
        var last = this.updates[this.updates.length - 1];

        var diff = last._id.getTimestamp() - first._id.getTimestamp();

        var val = {
            latest_update_id : last._id,
            first_update_id : first._id,
            update_count : this.updates.length,
            diff: diff 
        }

        emit(this._id, val);
    } 
};

var reduce = function() { };

db.runCommand(
    {
        mapReduce: "enquiries",
        map: map,
        reduce: reduce,
        out: "mrresults",
        query: { test : true}
    }
);

This are the results:

{
    "_id" : ObjectId("53a007db144ff47be1000003"),
    "value" : {
        "latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
        "first_update_id" : ObjectId("53a007db144ff47be1000001"),
        "update_count" : 3,
        "diff" : 525944000
    }
}

Edit:

If you want to get the average diff for all documents you can do it like this:

var map  = function() {
    if (this.updates.length > 1) {

        var first = this.updates[0];
        var last = this.updates[this.updates.length - 1];

        var diff = last._id.getTimestamp() - first._id.getTimestamp();

        emit("1", {diff : diff});
    } 
};

var reduce = function(key, values) {
    var reducedVal = { count: 0, sum: 0 };

    for (var idx = 0; idx < values.length; idx++) {
        reducedVal.count += 1;
        reducedVal.sum += values[idx].diff;
    }

    return reducedVal;
};

var finalize = function (key, reducedVal) {
    reducedVal.avg = reducedVal.sum/reducedVal.count;

    return reducedVal;
};

db.runCommand(
    {
        mapReduce: "y",
        map: map,
        reduce: reduce,
        finalize : finalize,
        out: "mrtest",
        query: { test : true}
    }
);

And the example output:

> db.mrtest.find().pretty()
{
    "_id" : "1",
    "value" : {
        "count" : 2,
        "sum" : 1051888000,
        "avg" : 525944000
    }
}
分享给朋友:
您可能感兴趣的文章:
随机阅读: