fix: Consider only sessions from last few days for SAU/MAU (#30354)

This commit is contained in:
Diego Sampaio 2023-09-11 21:24:29 -03:00
parent 4cb0b6ba6f
commit 92d25b9c7a
No known key found for this signature in database
GPG Key ID: B71D302EB7F5183C
4 changed files with 131 additions and 128 deletions

View File

@ -0,0 +1,8 @@
---
'@rocket.chat/model-typings': patch
'@rocket.chat/meteor': patch
---
Change SAU aggregation to consider only sessions from few days ago instead of the whole past.
This is particularly important for large workspaces in case the cron job did not run for some time, in that case the amount of sessions would accumulate and the aggregation would take a long time to run.

View File

@ -318,33 +318,19 @@ export class SAUMonitorClass {
return;
}
logger.info('[aggregate] - Aggregating data.');
const today = new Date();
const date = new Date();
date.setDate(date.getDate() - 0); // yesterday
const yesterday = getDateObj(date);
// get sessions from 3 days ago to make sure even if a few cron jobs were skipped, we still have the data
const threeDaysAgo = new Date(today.getFullYear(), today.getMonth(), today.getDate() - 3, 0, 0, 0, 0);
for await (const record of aggregates.dailySessionsOfYesterday(Sessions.col, yesterday)) {
await Sessions.updateOne(
{ _id: `${record.userId}-${record.year}-${record.month}-${record.day}` },
{ $set: record },
{ upsert: true },
);
const period = { start: getDateObj(threeDaysAgo), end: getDateObj(today) };
logger.info({ msg: '[aggregate] - Aggregating data.', period });
for await (const record of aggregates.dailySessions(Sessions.col, period)) {
await Sessions.updateDailySessionById(`${record.userId}-${record.year}-${record.month}-${record.day}`, record);
}
await Sessions.updateMany(
{
type: 'session',
year: { $lte: yesterday.year },
month: { $lte: yesterday.month },
day: { $lte: yesterday.day },
},
{
$set: {
type: 'computed-session',
_computedAt: new Date(),
},
},
);
await Sessions.updateAllSessionsByDateToComputed(period);
}
}

View File

@ -167,9 +167,9 @@ const getProjectionByFullDate = (): { day: string; month: string; year: string }
});
export const aggregates = {
dailySessionsOfYesterday(
dailySessions(
collection: Collection<ISession>,
{ year, month, day }: DestructuredDate,
{ start, end }: DestructuredRange,
): AggregationCursor<
Pick<ISession, 'mostImportantRole' | 'userId' | 'day' | 'year' | 'month' | 'type'> & {
time: number;
@ -178,6 +178,93 @@ export const aggregates = {
_computedAt: string;
}
> {
const pipeline = [
{
$match: {
userId: { $exists: true },
lastActivityAt: { $exists: true },
device: { $exists: true },
type: 'session',
...matchBasedOnDate(start, end),
},
},
{
$project: {
userId: 1,
device: 1,
day: 1,
month: 1,
year: 1,
mostImportantRole: 1,
time: { $trunc: { $divide: [{ $subtract: ['$lastActivityAt', '$loginAt'] }, 1000] } },
},
},
{
$match: {
time: { $gt: 0 },
},
},
{
$group: {
_id: {
userId: '$userId',
device: '$device',
day: '$day',
month: '$month',
year: '$year',
},
mostImportantRole: { $first: '$mostImportantRole' },
time: { $sum: '$time' },
sessions: { $sum: 1 },
},
},
{
$sort: {
time: -1,
},
},
{
$group: {
_id: {
userId: '$_id.userId',
day: '$_id.day',
month: '$_id.month',
year: '$_id.year',
},
mostImportantRole: { $first: '$mostImportantRole' },
time: { $sum: '$time' },
sessions: { $sum: '$sessions' },
devices: {
$push: {
sessions: '$sessions',
time: '$time',
device: '$_id.device',
},
},
},
},
{
$sort: {
_id: 1,
},
},
{
$project: {
_id: 0,
type: { $literal: 'user_daily' },
_computedAt: { $literal: new Date() },
day: '$_id.day',
month: '$_id.month',
year: '$_id.year',
userId: '$_id.userId',
mostImportantRole: 1,
time: 1,
sessions: 1,
devices: 1,
},
},
];
return collection.aggregate<
Pick<ISession, 'mostImportantRole' | 'userId' | 'day' | 'year' | 'month' | 'type'> & {
time: number;
@ -185,108 +272,7 @@ export const aggregates = {
devices: ISession['device'][];
_computedAt: string;
}
>(
[
{
$match: {
userId: { $exists: true },
lastActivityAt: { $exists: true },
device: { $exists: true },
type: 'session',
$or: [
{
year: { $lt: year },
},
{
year,
month: { $lt: month },
},
{
year,
month,
day: { $lte: day },
},
],
},
},
{
$project: {
userId: 1,
device: 1,
day: 1,
month: 1,
year: 1,
mostImportantRole: 1,
time: { $trunc: { $divide: [{ $subtract: ['$lastActivityAt', '$loginAt'] }, 1000] } },
},
},
{
$match: {
time: { $gt: 0 },
},
},
{
$group: {
_id: {
userId: '$userId',
device: '$device',
day: '$day',
month: '$month',
year: '$year',
},
mostImportantRole: { $first: '$mostImportantRole' },
time: { $sum: '$time' },
sessions: { $sum: 1 },
},
},
{
$sort: {
time: -1,
},
},
{
$group: {
_id: {
userId: '$_id.userId',
day: '$_id.day',
month: '$_id.month',
year: '$_id.year',
},
mostImportantRole: { $first: '$mostImportantRole' },
time: { $sum: '$time' },
sessions: { $sum: '$sessions' },
devices: {
$push: {
sessions: '$sessions',
time: '$time',
device: '$_id.device',
},
},
},
},
{
$sort: {
_id: 1,
},
},
{
$project: {
_id: 0,
type: { $literal: 'user_daily' },
_computedAt: { $literal: new Date() },
day: '$_id.day',
month: '$_id.month',
year: '$_id.year',
userId: '$_id.userId',
mostImportantRole: 1,
time: 1,
sessions: 1,
devices: 1,
},
},
],
{ allowDiskUse: true },
);
>(pipeline, { allowDiskUse: true });
},
async getUniqueUsersOfYesterday(
@ -1616,4 +1602,23 @@ export class SessionsRaw extends BaseRaw<ISession> implements ISessionsModel {
return this.col.bulkWrite(ops, { ordered: false });
}
async updateDailySessionById(_id: ISession['_id'], record: Partial<ISession>): Promise<UpdateResult> {
return this.updateOne({ _id }, { $set: record }, { upsert: true });
}
async updateAllSessionsByDateToComputed({ start, end }: DestructuredRange): Promise<UpdateResult | Document> {
return this.updateMany(
{
type: 'session',
...matchBasedOnDate(start, end),
},
{
$set: {
type: 'computed-session',
_computedAt: new Date(),
},
},
);
}
}

View File

@ -145,4 +145,8 @@ export interface ISessionsModel extends IBaseModel<ISession> {
}): Promise<UpdateResult | Document>;
createBatch(sessions: OptionalId<ISession>[]): Promise<BulkWriteResult | undefined>;
updateDailySessionById(_id: ISession['_id'], record: Partial<ISession>): Promise<UpdateResult>;
updateAllSessionsByDateToComputed({ start, end }: DestructuredRange): Promise<UpdateResult | Document>;
}