Skip to main content

MongoDB Rebuild Index

  1. Enter the storage component container

    docker exec -it $(docker ps | grep mingdaoyun-sc | awk '{print $1}') bash
  1. Create a reIndex.js script file to rebuild user-defined indexes under the mdwsrows database

    Content of reIndex.js Script File
    var targetDbName = "mdwsrows";  // Target database

    // Collection whitelist; collections in the whitelist will not have their indexes rebuilt
    var collectionWhitelist = [
    "discussion",
    "rowrelations",
    "workSheetRowTopic",
    "workSheetTopic",
    "wslogs"
    ];

    // Index whitelist; indexes in the whitelist will not be rebuilt
    var indexWhitelist = [
    "_id_",
    "idx_ctime",
    "idx_utime",
    "uk_rowid",
    "idx_tp_status",
    "idx_thirdprimary"
    ];

    // DateTime formatting function
    function formatDateTime() {
    var now = new Date();
    var utc8Time = new Date(now.getTime() + (8 * 60 * 60 * 1000));
    return utc8Time.toISOString().replace('Z', '+08:00');
    }

    // Formatting output function
    function printHeader(text) {
    print("\n" + "=".repeat(100));
    print(text);
    print("=".repeat(100));
    }

    function printSection(text) {
    print("\n" + "-".repeat(80));
    print(text);
    print("-".repeat(80));
    }

    function printTimedAction(time, action) {
    print(`\n[${time}] ${action}`);
    }

    // Format JSON with consistent indentation
    function formatJSON(obj, indent = 5) {
    return JSON.stringify(obj, null, 2)
    .split('\n')
    .map((line, i) => i === 0 ? line : ' '.repeat(indent) + line)
    .join('\n');
    }

    // Format createIndex command
    function formatCreateIndexCommand(collName, key, options) {
    return `db.${collName}.createIndex(${formatJSON(key)},\n ${formatJSON(options)})`;
    }

    function printCommand(command) {
    print(" └─ Execute:");
    print(" " + command);
    }

    function printCompletion(seconds) {
    print(` └─ ✓ Completed in ${seconds.toFixed(3)} seconds\n`);
    }

    // Connect to the target database
    var targetDb = db.getSiblingDB(targetDbName);
    var startTime = formatDateTime();

    printHeader("MongoDB Index Rebuild Process");
    print(`\n• Start Time: ${startTime}`);
    print(`• Target Database: ${targetDb.getName()}`);

    // Get all collections to rebuild by filtering the collection whitelist
    var collections = targetDb.getCollectionNames().filter(function(collName) {
    return !collectionWhitelist.includes(collName) &&
    !collName.startsWith('system.');
    });

    print(`• Total Collections: ${collections.length}`);

    collections.forEach(function(collName, index) {
    var coll = targetDb.getCollection(collName);
    var stats = coll.stats();

    // Output collection info and progress
    printSection(`Processing Collection [${index + 1}/${collections.length}]: ${collName}`);
    print(`\n• Document Count: ${stats.count}`);
    print(`• Storage Size: ${stats.storageSize} bytes`);

    // Get indexes to rebuild by filtering the index whitelist
    var indexes = coll.getIndexes();
    var rebuildIndexes = indexes.filter(function(idx) {
    return !indexWhitelist.includes(idx.name);
    });

    if (rebuildIndexes.length === 0) {
    print("\n✓ No indexes need to be rebuilt.");
    return;
    }

    // Output index rebuild plan
    print(`\n• Indexes to Rebuild (${rebuildIndexes.length}):`);
    rebuildIndexes.forEach(function(idx) {
    print(` ├─ Name: ${idx.name.padEnd(20)}`);
    print(` │ Key: ${JSON.stringify(idx.key)}`);
    });

    // Rebuild each index
    rebuildIndexes.forEach(function(idx) {
    var key = idx.key;
    var options = {};

    // Copy index configuration, excluding system attributes
    for (var prop in idx) {
    if (!["v", "ns", "background"].includes(prop)) {
    options[prop] = idx[prop];
    }
    }
    options.background = true; // Build index in background

    try {
    // Drop the old index
    var dropTime = formatDateTime();
    printTimedAction(dropTime, `Dropping Index: ${idx.name}`);
    printCommand(`db.${collName}.dropIndex("${idx.name}")`);

    var dropStart = new Date();
    coll.dropIndex(idx.name);
    var dropEnd = new Date();
    printCompletion((dropEnd - dropStart)/1000);

    // Create a new index
    var createTime = formatDateTime();
    printTimedAction(createTime, `Creating Index: ${idx.name}`);
    printCommand(formatCreateIndexCommand(collName, key, options));

    var createStart = new Date();
    coll.createIndex(key, options);
    var createEnd = new Date();
    printCompletion((createEnd - createStart)/1000);

    } catch (e) {
    print(` └─ ✗ Error: ${e.message}`);
    print(" Skipping this index...\n");
    }
    });
    });

    var endTime = formatDateTime();
    printHeader("Process Completed");
    print(`\n• End Time: ${endTime}`);
  1. Execute the script

    nohup mongo mongodb://127.0.0.1:27017/admin --quiet reIndex.js >> reIndex_output.log 2>&1 &
    • The execution log will be output to the reIndex_output.log file
  1. Monitor the log and wait for the script to complete its execution (the log will end with Process Complete and End Time)

  2. Create a reIndexWithCmd.js script file to rebuild some indexes that HAP system relies on

    Content of reIndexWithCmd.js Script File
    // ====================================================================
    // CONFIGURATION
    // ====================================================================
    // Configure the databases and their whitelist collections for index rebuilding here
    // Format: "Database Name": ["Collection to skip 1", "Collection to skip 2..."]
    // Use an empty array [] if all collections in a database need index rebuilding
    var targetDatabases = {
    "mdpost": [],
    "MDHistory": []
    };
    // ====================================================================


    // Formatting output function (no modification needed)
    function printHeader(text) {
    print("\n" + "=".repeat(100));
    print(text);
    print("=".repeat(100));
    }

    function printSection(text) {
    print("\n" + "-".repeat(80));
    print(text);
    print("-".repeat(80));
    }

    function formatDateTime() {
    var now = new Date();
    var utc8Time = new Date(now.getTime() + (8 * 60 * 60 * 1000));
    return utc8Time.toISOString().replace('Z', '+08:00');
    }

    function formatJSON(obj, indent = 5) {
    if (obj === undefined || obj === null) {
    return "Not available";
    }
    return JSON.stringify(obj, null, 2)
    .split('\n')
    .map((line, i) => i === 0 ? line : ' '.repeat(indent) + line)
    .join('\n');
    }

    function formatFileSize(bytes) {
    if (bytes === undefined || bytes === null) return "N/A";
    return (bytes / 1024 / 1024).toFixed(2) + " MB";
    }

    function printTimedAction(time, action) {
    print(`\n[${time}] ${action}`);
    }

    function printCompletion(seconds) {
    print(` └─ ✓ Completed in ${seconds.toFixed(3)} seconds\n`);
    }

    // ====================================================================
    // SCRIPT EXECUTION
    // ====================================================================

    var overallStartTime = formatDateTime();
    printHeader("MongoDB Multi-Database Index Rebuild Process Started");
    print(`\n• Overall Start Time: ${overallStartTime}`);
    print(`• Databases to Process: ${Object.keys(targetDatabases).join(', ')}`);

    // Iterate through all configured databases
    for (var dbName in targetDatabases) {
    if (targetDatabases.hasOwnProperty(dbName)) {

    printHeader(`Processing Database: [ ${dbName} ]`);

    // Connect to the target database
    var currentDb = db.getSiblingDB(dbName);

    // Get the collection blacklist (collections to skip) for the current database
    var excludedCollections = targetDatabases[dbName];

    print(`\n• Target Database: ${currentDb.getName()}`);
    print(`• Excluded Collections: ${formatJSON(excludedCollections)}`);

    // Get all collections
    var collections = currentDb.getCollectionNames();
    // Filter out collections in the blacklist and system collections
    var validCollections = collections.filter(collection =>
    !excludedCollections.includes(collection) && !collection.startsWith('system.')
    );

    print(`• Total Collections to Process in this DB: ${validCollections.length}`);

    if (validCollections.length === 0) {
    print("\nNo collections to process in this database. Moving to the next one.");
    continue;
    }

    validCollections.forEach(function(collection, index) {
    try {
    // Get collection statistics
    var stats = currentDb[collection].stats();

    // Output collection info and progress
    printSection(`Processing Collection [${index + 1}/${validCollections.length}]: ${collection}`);
    print(`\n• Collection Statistics:`);
    print(` ├─ Storage Size: ${formatFileSize(stats.storageSize)}`);
    print(` └─ Document Count: ${(stats.count || 0).toLocaleString()}`);

    // Get current index information
    var indexes = currentDb[collection].getIndexes();
    print(`\n• Current Indexes (${indexes.length}):`);
    indexes.forEach(function(idx, i) {
    const isLast = i === indexes.length - 1;
    print(` ${isLast ? '└' : '├'}─ Name: ${idx.name.padEnd(20)}`);
    print(` ${isLast ? ' ' : '│'} Key: ${formatJSON(idx.key)}`);
    });

    // Execute reIndex
    var execTime = formatDateTime();
    printTimedAction(execTime, "Executing reIndex()");
    print(" └─ Execute:");
    print(` db.getSiblingDB('${dbName}').getCollection('${collection}').reIndex()`);

    var startExec = new Date();
    var result = currentDb[collection].reIndex();
    var endExec = new Date();

    if (result.ok === 1) {
    printCompletion((endExec - startExec)/1000);

    print("• Operation Results:");
    print(` ├─ Previous Index Count: ${result.nIndexesWas}`);
    print(` ├─ Current Index Count: ${result.nIndexes}`);

    if (result.operationTime !== undefined) {
    print(` ├─ Operation Time: ${formatJSON(result.operationTime)}`);
    } else {
    print(` ├─ Operation Time: Not available (non-replica set deployment)`);
    }

    if (result.$clusterTime !== undefined) {
    print(` └─ Cluster Time: ${formatJSON(result.$clusterTime)}`);
    } else {
    print(` └─ Cluster Time: Not available (non-replica set deployment)`);
    }

    } else {
    print(` └─ ✗ ReIndex failed: ${formatJSON(result)}`);
    }
    } catch (e) {
    print(` └─ ✗ Error: ${e.message}`);
    print(" Skipping this collection...\n");
    }
    });
    }
    }

    var overallEndTime = formatDateTime();
    printHeader("Process Completed");
    print(`\n• Overall End Time: ${overallEndTime}`);

  1. Execute the script

    nohup mongo mongodb://127.0.0.1:27017/admin --quiet reIndexWithCmd.js >> reIndexWithCmd_output.log 2>&1 &
    • The execution log will be output to the reIndexWithCmd_output.log file
  1. Monitor the log and wait for the script to complete its execution (the log will end with Process Complete and End Time)