From 0966889ab97dd00242a1db2da6d688a640375ce6 Mon Sep 17 00:00:00 2001 From: Raphael Date: Tue, 4 Jan 2011 10:58:22 -0800 Subject: [PATCH] continue work on mongo export --- lib/mongo_to_mysql.rb | 147 +++++++++++++++++++++++++++++++- lib/tasks/migrations.rake | 22 +---- spec/lib/mongo_to_mysql_spec.rb | 20 +++++ 3 files changed, 169 insertions(+), 20 deletions(-) create mode 100644 spec/lib/mongo_to_mysql_spec.rb diff --git a/lib/mongo_to_mysql.rb b/lib/mongo_to_mysql.rb index 2cc69d56c..974a08c4f 100644 --- a/lib/mongo_to_mysql.rb +++ b/lib/mongo_to_mysql.rb @@ -1,8 +1,34 @@ # Copyright (c) 2010, Diaspora Inc. This file is # licensed under the Affero General Public License version 3 or later. See # the COPYRIGHT file. - +require 'json' +require 'csv' class MongoToMysql + def dirname + "tmp/export-for-mysql" + end + def dirpath + "#{Rails.root}/#{dirname}" + end + def make_dir + `mkdir -p #{dirname}/json` + `mkdir -p #{dirname}/csv` + end + def db_name + 'diaspora-development' + end + def models + @models ||= [ {:name => :aspects}, + {:name => :comments}, + {:name => :contacts}, + {:name => :invitations}, + {:name => :notifications}, + {:name => :people}, + {:name => :posts}, + {:name => :requests}, + {:name => :users}, + ] + end def id_sed @id_sed = sed_replace('{\ \"$oid\"\ :\ \(\"[^"]*\"\)\ }') end @@ -12,4 +38,123 @@ class MongoToMysql def sed_replace(regex) "sed 's/#{regex}/\\1/g'" end + def json_for_model model_name + "mongoexport -d #{db_name} -c #{model_name} | #{id_sed} | #{date_sed}" + end + def write_json_export + models.each do |model| + filename ="#{dirpath}/json/#{model[:name]}.json" + model[:json_file] = filename + `#{json_for_model(model[:name])} > #{filename}` + debug "#{model[:name]} exported to #{dirname}/json/#{model[:name]}.json" + end + debug "Json export complete." + end + def debug string + if ['development', 'production'].include?(Rails.env) + puts string + end + Rails.logger.debug(string) if Rails.logger + end + def convert_json_files + models.each do |model| + self.send("#{model[:name]}_json_to_csv".to_sym, model) + end + end + def generic_json_to_csv model_hash + debug "Converting #{model_hash[:name]} json to csv" + json_file = File.open(model_hash[:json_file]) + + csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w') + csv << model_hash[:attrs] + + json_file.each do |aspect_json| + hash = JSON.parse(aspect_json) + csv << yield(hash) + end + json_file.close + csv.close + end + def comments_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "post_mongo_id", "person_mongo_id", "diaspora_handle", "text"] + generic_json_to_csv(model_hash) do |hash| + [hash["_id"], hash["post_id"], hash["person_id"], hash["diaspora_handle"], hash["text"]] + end + end + def contacts_json_to_csv model_hash + model_hash[:main_attrs] = ["mongo_id", "user_mongo_id", "person_mongo_id", "pending", "created_at", "updated_at"] + #Post Visibilities + model_hash[:join_table_name] = :aspect_memberships + model_hash[:join_table_attrs] = ["contact_mongo_id", "aspect_mongo_id"] + + generic_json_to_two_csvs(model_hash) do |hash| + main_row = [hash["_id"], hash["user_id"], hash["person_id"], hash["pending"], hash["created_at"], hash["updated_at"]] + aspect_membership_rows = hash["aspect_ids"].map{|id| [hash["_id"],id]} + [main_row, aspect_membership_rows] + end + #Also writes the aspect memberships csv + end + def invitations_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id", "message"] + generic_json_to_csv(model_hash) do |hash| + [hash["_id"], hash["to_id"], hash["from_id"], hash["into_id"], hash["message"]] + end + end + def notifications_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "target_id", "target_type", "unread"] + generic_json_to_csv(model_hash) do |hash| + [hash["_id"], hash["target_id"], hash["kind"], hash["unread"]] + end + end + def people_json_to_csv model_hash + #Also writes the profiles csv + end + def posts_json_to_csv model_hash + #has to handle the polymorphic stuff + end + def requests_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id"] + generic_json_to_csv(model_hash) do |hash| + [hash["_id"], hash["to_id"], hash["from_id"], hash["into_id"]] + end + end + def users_json_to_csv model_hash + model_hash[:attrs] = ["mongo_id", "username", "serialized_private_key", "encrypted_password", "invites", "invitation_token", "invitation_sent_at", "getting_started", "disable_mail", "language", "last_sign_in_ip", "last_sign_in_at", "reset_password_token", "password_salt"] + generic_json_to_csv(model_hash) do |hash| + [hash["_id"], hash["username"], hash["serialized_private_key"], hash["encrypted_password"], hash["invites"], hash["invitation_token"], hash["invitation_sent_at"], hash["getting_started"], hash["disable_mail"], hash["language"], hash["last_sign_in_ip"], hash["last_sign_in_at"], hash["reset_password_token"], hash["password_salt"]] + end + end + def aspects_json_to_csv model_hash + debug "Converting aspects json to aspects and post_visibilities csvs" + model_hash[:main_attrs] = ["mongo_id", "name", "created_at", "updated_at"] + #Post Visibilities + model_hash[:join_table_name] = :post_visibilities + model_hash[:join_table_attrs] = ["aspect_mongo_id", "post_mongo_id"] + + generic_json_to_two_csvs(model_hash) do |hash| + main_row = [hash["_id"], hash["name"], hash["created_at"], hash["updated_at"]] + post_visibility_rows = hash["post_ids"].map{|id| [hash["_id"],id]} + [main_row, post_visibility_rows] + end + end + def generic_json_to_two_csvs model_hash + debug "Converting #{model_hash[:name]} json to two csvs" + json_file = File.open(model_hash[:json_file]) + + main_csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w') + main_csv << model_hash[:main_attrs] + + join_csv = CSV.open("#{dirpath}/csv/#{model_hash[:join_table_name]}.csv",'w') + join_csv << model_hash[:join_table_attrs] + + json_file.each do |aspect_json| + hash = JSON.parse(aspect_json) + result = yield(hash) + main_csv << result.first + result.last.each{|row| join_csv << row} + end + json_file.close + main_csv.close + join_csv.close + end end diff --git a/lib/tasks/migrations.rake b/lib/tasks/migrations.rake index df11d87f3..b795c3c9c 100644 --- a/lib/tasks/migrations.rake +++ b/lib/tasks/migrations.rake @@ -10,24 +10,8 @@ namespace :migrations do task :export_for_mysql do require 'lib/mongo_to_mysql' migrator = MongoToMysql.new - db_name = "diaspora-development" - models = [ - :aspects, - :comments, - :contacts, - :invitations, - :notifications, - :people, - :posts, - :requests, - :users, - ] - `mkdir -p #{Rails.root}/tmp/export-for-mysql` - models.each do |model| - filename = "#{Rails.root}/tmp/export-for-mysql/#{model}.json" - `mongoexport -d #{db_name} -c #{model} | #{migrator.id_sed} | #{migrator.date_sed} > #{filename}` - puts "#{model} exported to #{filename}" - #`mongoexport -d #{db_name} -c #{model} -jsonArray | sed 's/\"[^"]*\"/"IAMID"/g' > #{filename}` - end + migrator.make_dir + migrator.write_json_export + migrator.convert_json_files end end diff --git a/spec/lib/mongo_to_mysql_spec.rb b/spec/lib/mongo_to_mysql_spec.rb new file mode 100644 index 000000000..8ecc73c84 --- /dev/null +++ b/spec/lib/mongo_to_mysql_spec.rb @@ -0,0 +1,20 @@ +require 'spec_helper' +require 'lib/mongo_to_mysql' +describe MongoToMysql do + before do + @migrator = MongoToMysql.new + end + describe '#sed_replace' do + before do + @test_string = '{ "_id" : { "$oid" : "4d0916c4cc8cb40e93000009" }, "name" : "Work", "created_at" : { "$date" : 1292441284000 }, "updated_at" : { "$date" : 1292546796000 }, "post_ids" : [ { "$oid" : "4d0aa87acc8cb4144b000009" }, { "$oid" : "4d0ab02ccc8cb41628000010" }, { "$oid" : "4d0ab2eccc8cb41628000011" } ], "user_id" : { "$oid" : "4d0916c2cc8cb40e93000006" } }' + end + it '#id_sed gets rid of the mongo id type specifier' do + post_sed = `echo '#{@test_string}' | #{@migrator.id_sed}` + post_sed.strip.match('"_id" : "4d0916c4cc8cb40e93000009", "name" : "Work",').should be_true + end + it '#date_sed gets rid of the mongo date type specifier' do + post_sed = `echo '#{@test_string}' | #{@migrator.date_sed}` + post_sed.strip.match('ork", "created_at" : 1292441284000, "updated_at" : 1292546796000, "post_ids" :').should be_true + end + end +end