continue work on mongo export
This commit is contained in:
parent
4e44b197c2
commit
0966889ab9
3 changed files with 169 additions and 20 deletions
|
|
@ -1,8 +1,34 @@
|
||||||
# Copyright (c) 2010, Diaspora Inc. This file is
|
# Copyright (c) 2010, Diaspora Inc. This file is
|
||||||
# licensed under the Affero General Public License version 3 or later. See
|
# licensed under the Affero General Public License version 3 or later. See
|
||||||
# the COPYRIGHT file.
|
# the COPYRIGHT file.
|
||||||
|
require 'json'
|
||||||
|
require 'csv'
|
||||||
class MongoToMysql
|
class MongoToMysql
|
||||||
|
def dirname
|
||||||
|
"tmp/export-for-mysql"
|
||||||
|
end
|
||||||
|
def dirpath
|
||||||
|
"#{Rails.root}/#{dirname}"
|
||||||
|
end
|
||||||
|
def make_dir
|
||||||
|
`mkdir -p #{dirname}/json`
|
||||||
|
`mkdir -p #{dirname}/csv`
|
||||||
|
end
|
||||||
|
def db_name
|
||||||
|
'diaspora-development'
|
||||||
|
end
|
||||||
|
def models
|
||||||
|
@models ||= [ {:name => :aspects},
|
||||||
|
{:name => :comments},
|
||||||
|
{:name => :contacts},
|
||||||
|
{:name => :invitations},
|
||||||
|
{:name => :notifications},
|
||||||
|
{:name => :people},
|
||||||
|
{:name => :posts},
|
||||||
|
{:name => :requests},
|
||||||
|
{:name => :users},
|
||||||
|
]
|
||||||
|
end
|
||||||
def id_sed
|
def id_sed
|
||||||
@id_sed = sed_replace('{\ \"$oid\"\ :\ \(\"[^"]*\"\)\ }')
|
@id_sed = sed_replace('{\ \"$oid\"\ :\ \(\"[^"]*\"\)\ }')
|
||||||
end
|
end
|
||||||
|
|
@ -12,4 +38,123 @@ class MongoToMysql
|
||||||
def sed_replace(regex)
|
def sed_replace(regex)
|
||||||
"sed 's/#{regex}/\\1/g'"
|
"sed 's/#{regex}/\\1/g'"
|
||||||
end
|
end
|
||||||
|
def json_for_model model_name
|
||||||
|
"mongoexport -d #{db_name} -c #{model_name} | #{id_sed} | #{date_sed}"
|
||||||
|
end
|
||||||
|
def write_json_export
|
||||||
|
models.each do |model|
|
||||||
|
filename ="#{dirpath}/json/#{model[:name]}.json"
|
||||||
|
model[:json_file] = filename
|
||||||
|
`#{json_for_model(model[:name])} > #{filename}`
|
||||||
|
debug "#{model[:name]} exported to #{dirname}/json/#{model[:name]}.json"
|
||||||
|
end
|
||||||
|
debug "Json export complete."
|
||||||
|
end
|
||||||
|
def debug string
|
||||||
|
if ['development', 'production'].include?(Rails.env)
|
||||||
|
puts string
|
||||||
|
end
|
||||||
|
Rails.logger.debug(string) if Rails.logger
|
||||||
|
end
|
||||||
|
def convert_json_files
|
||||||
|
models.each do |model|
|
||||||
|
self.send("#{model[:name]}_json_to_csv".to_sym, model)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def generic_json_to_csv model_hash
|
||||||
|
debug "Converting #{model_hash[:name]} json to csv"
|
||||||
|
json_file = File.open(model_hash[:json_file])
|
||||||
|
|
||||||
|
csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w')
|
||||||
|
csv << model_hash[:attrs]
|
||||||
|
|
||||||
|
json_file.each do |aspect_json|
|
||||||
|
hash = JSON.parse(aspect_json)
|
||||||
|
csv << yield(hash)
|
||||||
|
end
|
||||||
|
json_file.close
|
||||||
|
csv.close
|
||||||
|
end
|
||||||
|
def comments_json_to_csv model_hash
|
||||||
|
model_hash[:attrs] = ["mongo_id", "post_mongo_id", "person_mongo_id", "diaspora_handle", "text"]
|
||||||
|
generic_json_to_csv(model_hash) do |hash|
|
||||||
|
[hash["_id"], hash["post_id"], hash["person_id"], hash["diaspora_handle"], hash["text"]]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def contacts_json_to_csv model_hash
|
||||||
|
model_hash[:main_attrs] = ["mongo_id", "user_mongo_id", "person_mongo_id", "pending", "created_at", "updated_at"]
|
||||||
|
#Post Visibilities
|
||||||
|
model_hash[:join_table_name] = :aspect_memberships
|
||||||
|
model_hash[:join_table_attrs] = ["contact_mongo_id", "aspect_mongo_id"]
|
||||||
|
|
||||||
|
generic_json_to_two_csvs(model_hash) do |hash|
|
||||||
|
main_row = [hash["_id"], hash["user_id"], hash["person_id"], hash["pending"], hash["created_at"], hash["updated_at"]]
|
||||||
|
aspect_membership_rows = hash["aspect_ids"].map{|id| [hash["_id"],id]}
|
||||||
|
[main_row, aspect_membership_rows]
|
||||||
|
end
|
||||||
|
#Also writes the aspect memberships csv
|
||||||
|
end
|
||||||
|
def invitations_json_to_csv model_hash
|
||||||
|
model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id", "message"]
|
||||||
|
generic_json_to_csv(model_hash) do |hash|
|
||||||
|
[hash["_id"], hash["to_id"], hash["from_id"], hash["into_id"], hash["message"]]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def notifications_json_to_csv model_hash
|
||||||
|
model_hash[:attrs] = ["mongo_id", "target_id", "target_type", "unread"]
|
||||||
|
generic_json_to_csv(model_hash) do |hash|
|
||||||
|
[hash["_id"], hash["target_id"], hash["kind"], hash["unread"]]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def people_json_to_csv model_hash
|
||||||
|
#Also writes the profiles csv
|
||||||
|
end
|
||||||
|
def posts_json_to_csv model_hash
|
||||||
|
#has to handle the polymorphic stuff
|
||||||
|
end
|
||||||
|
def requests_json_to_csv model_hash
|
||||||
|
model_hash[:attrs] = ["mongo_id", "recipient_mongo_id", "sender_mongo_id", "aspect_mongo_id"]
|
||||||
|
generic_json_to_csv(model_hash) do |hash|
|
||||||
|
[hash["_id"], hash["to_id"], hash["from_id"], hash["into_id"]]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def users_json_to_csv model_hash
|
||||||
|
model_hash[:attrs] = ["mongo_id", "username", "serialized_private_key", "encrypted_password", "invites", "invitation_token", "invitation_sent_at", "getting_started", "disable_mail", "language", "last_sign_in_ip", "last_sign_in_at", "reset_password_token", "password_salt"]
|
||||||
|
generic_json_to_csv(model_hash) do |hash|
|
||||||
|
[hash["_id"], hash["username"], hash["serialized_private_key"], hash["encrypted_password"], hash["invites"], hash["invitation_token"], hash["invitation_sent_at"], hash["getting_started"], hash["disable_mail"], hash["language"], hash["last_sign_in_ip"], hash["last_sign_in_at"], hash["reset_password_token"], hash["password_salt"]]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def aspects_json_to_csv model_hash
|
||||||
|
debug "Converting aspects json to aspects and post_visibilities csvs"
|
||||||
|
model_hash[:main_attrs] = ["mongo_id", "name", "created_at", "updated_at"]
|
||||||
|
#Post Visibilities
|
||||||
|
model_hash[:join_table_name] = :post_visibilities
|
||||||
|
model_hash[:join_table_attrs] = ["aspect_mongo_id", "post_mongo_id"]
|
||||||
|
|
||||||
|
generic_json_to_two_csvs(model_hash) do |hash|
|
||||||
|
main_row = [hash["_id"], hash["name"], hash["created_at"], hash["updated_at"]]
|
||||||
|
post_visibility_rows = hash["post_ids"].map{|id| [hash["_id"],id]}
|
||||||
|
[main_row, post_visibility_rows]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def generic_json_to_two_csvs model_hash
|
||||||
|
debug "Converting #{model_hash[:name]} json to two csvs"
|
||||||
|
json_file = File.open(model_hash[:json_file])
|
||||||
|
|
||||||
|
main_csv = CSV.open("#{dirpath}/csv/#{model_hash[:name]}.csv",'w')
|
||||||
|
main_csv << model_hash[:main_attrs]
|
||||||
|
|
||||||
|
join_csv = CSV.open("#{dirpath}/csv/#{model_hash[:join_table_name]}.csv",'w')
|
||||||
|
join_csv << model_hash[:join_table_attrs]
|
||||||
|
|
||||||
|
json_file.each do |aspect_json|
|
||||||
|
hash = JSON.parse(aspect_json)
|
||||||
|
result = yield(hash)
|
||||||
|
main_csv << result.first
|
||||||
|
result.last.each{|row| join_csv << row}
|
||||||
|
end
|
||||||
|
json_file.close
|
||||||
|
main_csv.close
|
||||||
|
join_csv.close
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -10,24 +10,8 @@ namespace :migrations do
|
||||||
task :export_for_mysql do
|
task :export_for_mysql do
|
||||||
require 'lib/mongo_to_mysql'
|
require 'lib/mongo_to_mysql'
|
||||||
migrator = MongoToMysql.new
|
migrator = MongoToMysql.new
|
||||||
db_name = "diaspora-development"
|
migrator.make_dir
|
||||||
models = [
|
migrator.write_json_export
|
||||||
:aspects,
|
migrator.convert_json_files
|
||||||
:comments,
|
|
||||||
:contacts,
|
|
||||||
:invitations,
|
|
||||||
:notifications,
|
|
||||||
:people,
|
|
||||||
:posts,
|
|
||||||
:requests,
|
|
||||||
:users,
|
|
||||||
]
|
|
||||||
`mkdir -p #{Rails.root}/tmp/export-for-mysql`
|
|
||||||
models.each do |model|
|
|
||||||
filename = "#{Rails.root}/tmp/export-for-mysql/#{model}.json"
|
|
||||||
`mongoexport -d #{db_name} -c #{model} | #{migrator.id_sed} | #{migrator.date_sed} > #{filename}`
|
|
||||||
puts "#{model} exported to #{filename}"
|
|
||||||
#`mongoexport -d #{db_name} -c #{model} -jsonArray | sed 's/\"[^"]*\"/"IAMID"/g' > #{filename}`
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
20
spec/lib/mongo_to_mysql_spec.rb
Normal file
20
spec/lib/mongo_to_mysql_spec.rb
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
require 'spec_helper'
|
||||||
|
require 'lib/mongo_to_mysql'
|
||||||
|
describe MongoToMysql do
|
||||||
|
before do
|
||||||
|
@migrator = MongoToMysql.new
|
||||||
|
end
|
||||||
|
describe '#sed_replace' do
|
||||||
|
before do
|
||||||
|
@test_string = '{ "_id" : { "$oid" : "4d0916c4cc8cb40e93000009" }, "name" : "Work", "created_at" : { "$date" : 1292441284000 }, "updated_at" : { "$date" : 1292546796000 }, "post_ids" : [ { "$oid" : "4d0aa87acc8cb4144b000009" }, { "$oid" : "4d0ab02ccc8cb41628000010" }, { "$oid" : "4d0ab2eccc8cb41628000011" } ], "user_id" : { "$oid" : "4d0916c2cc8cb40e93000006" } }'
|
||||||
|
end
|
||||||
|
it '#id_sed gets rid of the mongo id type specifier' do
|
||||||
|
post_sed = `echo '#{@test_string}' | #{@migrator.id_sed}`
|
||||||
|
post_sed.strip.match('"_id" : "4d0916c4cc8cb40e93000009", "name" : "Work",').should be_true
|
||||||
|
end
|
||||||
|
it '#date_sed gets rid of the mongo date type specifier' do
|
||||||
|
post_sed = `echo '#{@test_string}' | #{@migrator.date_sed}`
|
||||||
|
post_sed.strip.match('ork", "created_at" : 1292441284000, "updated_at" : 1292546796000, "post_ids" :').should be_true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
Loading…
Reference in a new issue