Skip to content
Snippets Groups Projects
convert.js 13 KiB
Newer Older
  • Learn to ignore specific revisions
  • var startTime = Date.now();
    
    var ueberDB = require("../src/node_modules/ueberDB");
    var mysql = require("../src/node_modules/ueberDB/node_modules/mysql");
    var async = require("../src/node_modules/async");
    
    var Changeset = require("ep_etherpad-lite/static/js/Changeset");
    var randomString = require('ep_etherpad-lite/static/js/pad_utils').randomString;
    
    var AttributePool = require("ep_etherpad-lite/static/js/AttributePool");
    
    
    var settingsFile = process.argv[2];
    var sqlOutputFile = process.argv[3];
    
    //stop if the settings file is not set
    if(!settingsFile || !sqlOutputFile)
    {
      console.error("Use: node convert.js $SETTINGSFILE $SQLOUTPUT");
      process.exit(1);
    }
    
    log("read settings file...");
    //read the settings file and parse the json
    var settings = JSON.parse(fs.readFileSync(settingsFile, "utf8"));
    log("done");
    
    log("open output file...");
    var sqlOutput = fs.openSync(sqlOutputFile, "w");
    var sql = "SET CHARACTER SET UTF8;\n" +
              "CREATE TABLE IF NOT EXISTS `store` ( \n" +
              "`key` VARCHAR( 100 ) NOT NULL , \n" + 
              "`value` LONGTEXT NOT NULL , \n" + 
              "PRIMARY KEY (  `key` ) \n" +
              ") ENGINE = INNODB;\n" + 
              "START TRANSACTION;\n\n";
    fs.writeSync(sqlOutput, sql);
    log("done");
    
    
    var etherpadDB = mysql.createConnection({
      host     : settings.etherpadDB.host,
      user     : settings.etherpadDB.user,
      password : settings.etherpadDB.password,
      database : settings.etherpadDB.database,
      port     : settings.etherpadDB.port
    });
    
    var timestamp = Date.now();
    
    
    var padIDs;
    
    async.series([
      //get all padids out of the database...
      function(callback)
      {
        log("get all padIds out of the database...");
        
    
        etherpadDB.query("SELECT ID FROM PAD_META", [], function(err, _padIDs)
    
        {
          padIDs = _padIDs;
          callback(err);
        });
      },
      function(callback)
      {
        log("done");
        
        //create a queue with a concurrency 100
        var queue = async.queue(function (padId, callback) 
        {
          convertPad(padId, function(err)
          {
            incrementPadStats();
            callback(err);
          });
        }, 100);
        
        //set the step callback as the queue callback
        queue.drain = callback;
        
        //add the padids to the worker queue
        for(var i=0,length=padIDs.length;i<length;i++)
        {
          queue.push(padIDs[i].ID);
        }
      }
    ], function(err)
    {
      if(err) throw err;
      
      //write the groups
      var sql = "";
      for(var proID in proID2groupID)
      {
        var groupID = proID2groupID[proID];
        var subdomain = proID2subdomain[proID];
        
        sql+="REPLACE INTO store VALUES (" + etherpadDB.escape("group:" + groupID) + ", " + etherpadDB.escape(JSON.stringify(groups[groupID]))+ ");\n";
        sql+="REPLACE INTO store VALUES (" + etherpadDB.escape("mapper2group:subdomain:" + subdomain) + ", " + etherpadDB.escape(groupID)+ ");\n";
      }
      
      //close transaction
      sql+="COMMIT;";
      
      //end the sql file
      fs.writeSync(sqlOutput, sql, undefined, "utf-8");
      fs.closeSync(sqlOutput);
      
      log("finished.");
      process.exit(0);
    });
    
    function log(str)
    {
    
      console.log((Date.now() - startTime)/1000 + "\t" + str);
    
        var averageTime = Math.round(padsDone/((Date.now() - startTime)/1000));
    
        log(padsDone + "/" + padIDs.length + "\t" + averageTime + " pad/s")
      }
    }
    
    var proID2groupID = {};
    var proID2subdomain = {};
    var groups = {};
    
    function convertPad(padId, callback)
    {
      var changesets = [];
      var changesetsMeta = [];
      var chatMessages = [];
      var authors = [];
      var apool;
      var subdomain;
      var padmeta;
    
      async.series([
        //get all needed db values
        function(callback)
        {
          async.parallel([
            //get the pad revisions
            function(callback)
            {
              var sql = "SELECT * FROM `PAD_CHAT_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_CHAT_META` WHERE ID=?)";
              
              etherpadDB.query(sql, [padId], function(err, results)
              {
                if(!err) 
                {
    
                    //parse the pages
                    for(var i=0,length=results.length;i<length;i++)
                    {
                      parsePage(chatMessages, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
                    }
                  }catch(e) {err = e}
    
                }
                
                callback(err);
              });
            },
            //get the chat entries
            function(callback)
            {
              var sql = "SELECT * FROM `PAD_REVS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVS_META` WHERE ID=?)";
              
              etherpadDB.query(sql, [padId], function(err, results)
              {
                if(!err) 
                {
    
                    //parse the pages
                    for(var i=0,length=results.length;i<length;i++)
                    {
                      parsePage(changesets, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, false);
                    }
                  }catch(e) {err = e}
    
                }
                
                callback(err);
              });
            },
            //get the pad revisions meta data
            function(callback)
            {
              var sql = "SELECT * FROM `PAD_REVMETA_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVMETA_META` WHERE ID=?)";
              
              etherpadDB.query(sql, [padId], function(err, results)
              {
                if(!err) 
                {
    
                    //parse the pages
                    for(var i=0,length=results.length;i<length;i++)
                    {
                      parsePage(changesetsMeta, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
                    }
                  }catch(e) {err = e}
    
                }
                
                callback(err);
              });
            },
            //get the attribute pool of this pad
            function(callback)
            {
              var sql = "SELECT `JSON` FROM `PAD_APOOL` WHERE `ID` = ?";
              
              etherpadDB.query(sql, [padId], function(err, results)
              {
                if(!err)
                {
    
                  try
                  {
                    apool=JSON.parse(results[0].JSON).x;
                  }catch(e) {err = e}
    
                }
                
                callback(err);
              });
            },
            //get the authors informations
            function(callback)
            {
              var sql = "SELECT * FROM `PAD_AUTHORS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_AUTHORS_META` WHERE ID=?)";
              
              etherpadDB.query(sql, [padId], function(err, results)
              {
                if(!err) 
                {
    
                    //parse the pages
                    for(var i=0, length=results.length;i<length;i++)
                    {
                      parsePage(authors, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
                    }
                  }catch(e) {err = e}
    
                }
                
                callback(err);
              });
            },
            //get the pad information
            function(callback)
            {
              var sql = "SELECT JSON FROM `PAD_META` WHERE ID=?";
              
              etherpadDB.query(sql, [padId], function(err, results)
              {
                if(!err) 
                {
    
                  try
                  {
                    padmeta = JSON.parse(results[0].JSON).x;
                  }catch(e) {err = e}
    
                }
                
                callback(err);
              });
            },
            //get the subdomain
            function(callback)
            {
              //skip if this is no proPad
              if(padId.indexOf("$") == -1)
              {
                callback();
                return;
              }
              
              //get the proID out of this padID
              var proID = padId.split("$")[0];
            
              var sql = "SELECT subDomain FROM pro_domains WHERE ID = ?";
              
              etherpadDB.query(sql, [proID], function(err, results)
              {
                if(!err)
                {
                  subdomain = results[0].subDomain;
                }
                
                callback(err);
              });
            }
          ], callback);
        },
        function(callback)
        {
          //saves all values that should be written to the database
          var values = {};
          
          //this is a pro pad, let's convert it to a group pad
          if(padId.indexOf("$") != -1)
          {
            var padIdParts = padId.split("$");
            var proID = padIdParts[0];
            var padName = padIdParts[1];
            
            var groupID
            
            //this proID is not converted so far, do it
            if(proID2groupID[proID] == null)
            {
              groupID = "g." + randomString(16);
              
              //create the mappers for this new group
              proID2groupID[proID] = groupID;
              proID2subdomain[proID] = subdomain;
              groups[groupID] = {pads: {}};
            }
            
            //use the generated groupID;
            groupID = proID2groupID[proID];
            
            //rename the pad
            padId = groupID + "$" + padName;
            
            //set the value for this pad in the group
            groups[groupID].pads[padId] = 1;
          }
          
          try
          {
            var newAuthorIDs = {};
            var oldName2newName = {};
            
            //replace the authors with generated authors
    
            // we need to do that cause where the original etherpad saves pad local authors, the new (lite) etherpad uses them global
    
            for(var i in apool.numToAttrib)
            {
              var key = apool.numToAttrib[i][0];
              var value = apool.numToAttrib[i][1];
              
              //skip non authors and anonymous authors
              if(key != "author" || value == "")
                continue;
                
              //generate new author values
              var authorID = "a." + randomString(16);
              var authorColorID = authors[i].colorId || Math.floor(Math.random()*32);
              var authorName = authors[i].name || null;
              
              //overwrite the authorID of the attribute pool
              apool.numToAttrib[i][1] = authorID;
              
              //write the author to the database
              values["globalAuthor:" + authorID] = {"colorId" : authorColorID, "name": authorName, "timestamp": timestamp};
              
              //save in mappers
              newAuthorIDs[i] = authorID;
              oldName2newName[value] = authorID;
            }
            
            //save all revisions
            for(var i=0;i<changesets.length;i++)
            {
              values["pad:" + padId + ":revs:" + i] = {changeset: changesets[i], 
                                                       meta : {
                                                         author: newAuthorIDs[changesetsMeta[i].a],
                                                         timestamp: changesetsMeta[i].t,
                                                         atext: changesetsMeta[i].atext || undefined
                                                       }};
            }
            
            //save all chat messages
            for(var i=0;i<chatMessages.length;i++)
            {
              values["pad:" + padId + ":chat:" + i] = {"text": chatMessages[i].lineText, 
                                                       "userId": oldName2newName[chatMessages[i].userId], 
                                                       "time": chatMessages[i].time}
            }
            
            //generate the latest atext
    
            var fullAPool = (new AttributePool()).fromJsonable(apool);
    
            var keyRev = Math.floor(padmeta.head / padmeta.keyRevInterval) * padmeta.keyRevInterval;
            var atext = changesetsMeta[keyRev].atext;
            var curRev = keyRev;
            while (curRev < padmeta.head) 
            {
              curRev++;
              var changeset = changesets[curRev];
              atext = Changeset.applyToAText(changeset, atext, fullAPool);
            }
            
            values["pad:" + padId] = {atext: atext, 
                                      pool: apool, 
                                      head: padmeta.head, 
                                      chatHead: padmeta.numChatMessages }
          
          }
          catch(e)
          {
    
    Manuel Knitza's avatar
    Manuel Knitza committed
            console.error("Error while converting pad " + padId + ", pad skipped");
    
            console.error(e.stack ? e.stack : JSON.stringify(e));
            callback();
            return;
          }
          
          var sql = "";
          for(var key in values)
          {
            sql+="REPLACE INTO store VALUES (" + etherpadDB.escape(key) + ", " + etherpadDB.escape(JSON.stringify(values[key]))+ ");\n";
          }
          
          fs.writeSync(sqlOutput, sql, undefined, "utf-8");
          callback();
        }
      ], callback);
    }
    
    /**
     * This parses a Page like Etherpad uses them in the databases
    
    HairyFotr's avatar
    HairyFotr committed
     * The offsets describes the length of a unit in the page, the data are
    
     * all values behind each other
     */
    function parsePage(array, pageStart, offsets, data, json)
    {
      var start = 0;
      var lengths = offsets.split(",");
      
      for(var i=0;i<lengths.length;i++)
      {
        var unitLength = lengths[i];
        
        //skip empty units
        if(unitLength == "")
          continue;
        
        //parse the number
        unitLength = Number(unitLength);
          
        //cut the unit out of data
        var unit = data.substr(start, unitLength);
        
        //put it into the array
        array[pageStart + i] = json ? JSON.parse(unit) : unit;
        
        //update start
        start+=unitLength;
      }
    }