Memory usage optimization for archive export

- Removed posts and non contacts from other's data
- Collections are exported in batches to lower memory footprint
- In base exporters create User object instead of keeping instance because it caches all associations

closes #7627
This commit is contained in:
cmrd Senya 2017-09-25 19:03:25 +03:00 committed by Benjamin Neff
parent ea57fc5d25
commit c6ed850a85
No known key found for this signature in database
GPG key ID: 971464C3F1A90194
18 changed files with 74 additions and 328 deletions

View file

@ -12,6 +12,7 @@
* Use Bootstrap 3 progress-bar for polls [#7600](https://github.com/diaspora/diaspora/pull/7600) * Use Bootstrap 3 progress-bar for polls [#7600](https://github.com/diaspora/diaspora/pull/7600)
* Enable frozen string literals [#7595](https://github.com/diaspora/diaspora/pull/7595) * Enable frozen string literals [#7595](https://github.com/diaspora/diaspora/pull/7595)
* Remove `rails_admin_histories` table [#7597](https://github.com/diaspora/diaspora/pull/7597) * Remove `rails_admin_histories` table [#7597](https://github.com/diaspora/diaspora/pull/7597)
* Optimize memory usage on profile export [#7627](https://github.com/diaspora/diaspora/pull/7627)
## Bug fixes ## Bug fixes
* Fix displaying polls with long answers [#7579](https://github.com/diaspora/diaspora/pull/7579) * Fix displaying polls with long answers [#7579](https://github.com/diaspora/diaspora/pull/7579)

View file

@ -4,33 +4,32 @@ module Export
class OthersDataSerializer < ActiveModel::Serializer class OthersDataSerializer < ActiveModel::Serializer
# Relayables of other people in the archive: comments, likes, participations, poll participations where author is # Relayables of other people in the archive: comments, likes, participations, poll participations where author is
# the archive owner # the archive owner
has_many :relayables, each_serializer: FederationEntitySerializer has_many :relayables, serializer: FlatMapArraySerializer, each_serializer: FederationEntitySerializer
# Parent posts of user's own relayables. We have to save metadata to use def initialize(user_id)
# it in case when posts temporary unavailable on the target pod. @user_id = user_id
has_many :posts, each_serializer: FederationEntitySerializer super(object)
end
# Authors of posts where we participated and authors are not in contacts
has_many :non_contact_authors, each_serializer: PersonMetadataSerializer
private private
def object
User.find(@user_id)
end
def relayables def relayables
%i[comments likes poll_participations].map {|relayable| %i[comments likes poll_participations].map {|relayable|
others_relayables.send(relayable) others_relayables.send(relayable).find_each(batch_size: 20)
}.sum }
end end
def others_relayables def others_relayables
@others_relayables ||= Diaspora::Exporter::OthersRelayables.new(object.person_id) @others_relayables ||= Diaspora::Exporter::OthersRelayables.new(object.person_id)
end end
def posts # Avoid calling pointless #embedded_in_root_associations method
@posts ||= Diaspora::Exporter::PostsWithActivity.new(object).query def serializable_data
end {}
def non_contact_authors
Diaspora::Exporter::NonContactAuthors.new(posts, object).query
end end
end end
end end

View file

@ -29,7 +29,7 @@ module Export
end end
def excluded_subscription_key def excluded_subscription_key
entity.public ? :subscribed_users_ids : :subscribed_pods_uris object.public? ? :subscribed_users_ids : :subscribed_pods_uris
end end
end end
end end

View file

@ -1,19 +0,0 @@
# frozen_string_literal: true
module Export
class PersonMetadataSerializer < ActiveModel::Serializer
attributes :guid,
:account_id,
:public_key
private
def account_id
object.diaspora_handle
end
def public_key
object.serialized_public_key
end
end
end

View file

@ -18,12 +18,31 @@ module Export
has_many :followed_tags has_many :followed_tags
has_many :post_subscriptions has_many :post_subscriptions
has_many :relayables, each_serializer: Export::OwnRelayablesSerializer has_many :relayables, serializer: FlatMapArraySerializer, each_serializer: Export::OwnRelayablesSerializer
def initialize(user_id, options={})
@user_id = user_id
super(object, options)
end
private private
def object
User.find(@user_id)
end
def posts
object.posts.find_each(batch_size: 20)
end
def contacts
object.contacts.find_each(batch_size: 100)
end
def relayables def relayables
[*comments, *likes, *poll_participations] [comments, likes, poll_participations].map {|relayable|
relayable.find_each(batch_size: 20)
}
end end
%i[comments likes poll_participations].each {|collection| %i[comments likes poll_participations].each {|collection|
@ -47,5 +66,10 @@ module Export
def post_subscriptions def post_subscriptions
Post.subscribed_by(object).pluck(:guid) Post.subscribed_by(object).pluck(:guid)
end end
# Avoid calling pointless #embedded_in_root_associations method
def serializable_data
{}
end
end end
end end

View file

@ -13,6 +13,6 @@ class FederationEntitySerializer < ActiveModel::Serializer
end end
def entity def entity
@entity ||= Diaspora::Federation::Entities.build(object) Diaspora::Federation::Entities.build(object)
end end
end end

View file

@ -0,0 +1,11 @@
# frozen_string_literal: true
class FlatMapArraySerializer < ActiveModel::ArraySerializer
def serializable_object(options={})
@object.flat_map do |subarray|
subarray.map do |item|
serializer_for(item).serializable_object_with_notification(options)
end
end
end
end

View file

@ -20,8 +20,8 @@ module Diaspora
def full_archive def full_archive
{version: SERIALIZED_VERSION} {version: SERIALIZED_VERSION}
.merge(Export::UserSerializer.new(@user).as_json) .merge(Export::UserSerializer.new(@user.id).as_json)
.merge(Export::OthersDataSerializer.new(@user).as_json) .merge(Export::OthersDataSerializer.new(@user.id).as_json)
end end
end end
end end

View file

@ -1,38 +0,0 @@
# frozen_string_literal: true
module Diaspora
class Exporter
# This class is capable of quering a list of people from authors of given posts that are non-contacts of a given
# user.
class NonContactAuthors
# @param posts [Post::ActiveRecord_Relation] posts that we fetch authors from to make authors list
# @param user [User] a user we fetch a contact list from
def initialize(posts, user)
@posts = posts
@user = user
end
# Create a request of non-contact authors of the posts for the user
# @return [Post::ActiveRecord_Relation]
def query
Person.where(id: non_contact_authors_ids)
end
private
def non_contact_authors_ids
posts_authors_ids - contacts_ids
end
def posts_authors_ids
posts.pluck(:author_id).uniq
end
def contacts_ids
user.contacts.pluck(:person_id)
end
attr_reader :posts, :user
end
end
end

View file

@ -1,61 +0,0 @@
# frozen_string_literal: true
module Diaspora
class Exporter
# This class allows to query posts where a person made any activity (submitted comments,
# likes, participations or poll participations).
class PostsWithActivity
# @param user [User] user who the activity belongs to (the one who liked, commented posts, etc)
def initialize(user)
@user = user
end
# Create a request of posts with activity
# @return [Post::ActiveRecord_Relation]
def query
Post.from("(#{sql_union_all_activities}) AS posts")
end
private
attr_reader :user
def person
user.person
end
def sql_union_all_activities
all_activities.map(&:to_sql).join(" UNION ")
end
def all_activities
[comments_activity, likes_activity, subscriptions, polls_activity, reshares_activity]
end
def likes_activity
other_people_posts.liked_by(person)
end
def comments_activity
other_people_posts.commented_by(person)
end
def subscriptions
other_people_posts.subscribed_by(user)
end
def reshares_activity
other_people_posts.reshared_by(person)
end
def polls_activity
StatusMessage.where.not(author_id: person.id).joins(:poll_participations)
.where(poll_participations: {author_id: person.id})
end
def other_people_posts
Post.where.not(author_id: person.id)
end
end
end
end

View file

@ -201,7 +201,7 @@ describe Diaspora::Exporter do
expect(json).to include_json(user: {posts: [serialized]}) expect(json).to include_json(user: {posts: [serialized]})
end end
it "contains a reshare and its root" do it "contains a reshare" do
reshare = FactoryGirl.create(:reshare, author: user.person) reshare = FactoryGirl.create(:reshare, author: user.person)
serialized_reshare = { serialized_reshare = {
"subscribed_pods_uris": [reshare.root.author.pod.url_to(""), AppConfig.pod_uri.to_s], "subscribed_pods_uris": [reshare.root.author.pod.url_to(""), AppConfig.pod_uri.to_s],
@ -216,21 +216,8 @@ describe Diaspora::Exporter do
} }
} }
status_message = reshare.root
serialized_parent = {
"entity_type": "status_message",
"entity_data": {
"author": status_message.diaspora_handle,
"guid": status_message.guid,
"created_at": status_message.created_at.iso8601,
"text": status_message.text,
"public": true
}
}
expect(json).to include_json( expect(json).to include_json(
user: {posts: [serialized_reshare]}, user: {posts: [serialized_reshare]}
others_data: {posts: [serialized_parent]}
) )
end end
@ -244,7 +231,7 @@ describe Diaspora::Exporter do
expect(json).to include_json(user: {post_subscriptions: [subscription.target.guid]}) expect(json).to include_json(user: {post_subscriptions: [subscription.target.guid]})
end end
it "contains a comment and the commented post" do it "contains a comment" do
comment = FactoryGirl.create(:comment, author: user.person) comment = FactoryGirl.create(:comment, author: user.person)
serialized_comment = { serialized_comment = {
"entity_type": "comment", "entity_type": "comment",
@ -258,25 +245,12 @@ describe Diaspora::Exporter do
"property_order": %w[author guid parent_guid text created_at] "property_order": %w[author guid parent_guid text created_at]
} }
status_message = comment.parent
serialized_post = {
"entity_type": "status_message",
"entity_data": {
"author": status_message.diaspora_handle,
"guid": status_message.guid,
"created_at": status_message.created_at.iso8601,
"text": status_message.text,
"public": false
}
}
expect(json).to include_json( expect(json).to include_json(
user: {relayables: [serialized_comment]}, user: {relayables: [serialized_comment]}
others_data: {posts: [serialized_post]}
) )
end end
it "contains a like and the liked post" do it "contains a like" do
like = FactoryGirl.create(:like, author: user.person) like = FactoryGirl.create(:like, author: user.person)
serialized_like = { serialized_like = {
"entity_type": "like", "entity_type": "like",
@ -290,25 +264,12 @@ describe Diaspora::Exporter do
"property_order": %w[author guid parent_guid parent_type positive] "property_order": %w[author guid parent_guid parent_type positive]
} }
status_message = like.target
serialized_post = {
"entity_type": "status_message",
"entity_data": {
"author": status_message.diaspora_handle,
"guid": status_message.guid,
"created_at": status_message.created_at.iso8601,
"text": status_message.text,
"public": false
}
}
expect(json).to include_json( expect(json).to include_json(
user: {relayables: [serialized_like]}, user: {relayables: [serialized_like]}
others_data: {posts: [serialized_post]}
) )
end end
it "contains a poll participation and post with this poll" do it "contains a poll participation" do
poll_participation = FactoryGirl.create(:poll_participation, author: user.person) poll_participation = FactoryGirl.create(:poll_participation, author: user.person)
serialized_participation = { serialized_participation = {
"entity_type": "poll_participation", "entity_type": "poll_participation",
@ -321,38 +282,8 @@ describe Diaspora::Exporter do
"property_order": %w[author guid parent_guid poll_answer_guid] "property_order": %w[author guid parent_guid poll_answer_guid]
} }
poll = poll_participation.poll
status_message = poll_participation.status_message
serialized_post = {
"entity_type": "status_message",
"entity_data": {
"author": status_message.diaspora_handle,
"guid": status_message.guid,
"created_at": status_message.created_at.iso8601,
"text": status_message.text,
"poll": {
"entity_type": "poll",
"entity_data": {
"guid": poll.guid,
"question": poll.question,
"poll_answers": poll.poll_answers.map {|answer|
{
"entity_type": "poll_answer",
"entity_data": {
"guid": answer.guid,
"answer": answer.answer
}
}
}
}
},
"public": false
}
}
expect(json).to include_json( expect(json).to include_json(
user: {relayables: [serialized_participation]}, user: {relayables: [serialized_participation]}
others_data: {posts: [serialized_post]}
) )
end end
@ -409,23 +340,6 @@ describe Diaspora::Exporter do
expect(json).to include_json(others_data: {relayables: [serialized]}) expect(json).to include_json(others_data: {relayables: [serialized]})
end end
it "contains metadata of a non-contact author of a post where we commented" do
comment = FactoryGirl.create(:comment, author: user.person)
author = comment.parent.author
expect(json).to include_json(
others_data: {
non_contact_authors: [
{
"guid": author.guid,
"account_id": author.diaspora_handle,
"public_key": author.serialized_public_key
}
]
}
)
end
def transform_value(value) def transform_value(value)
return value.iso8601 if value.is_a? Date return value.iso8601 if value.is_a? Date
value value

View file

@ -1,27 +0,0 @@
# frozen_string_literal: true
describe Diaspora::Exporter::NonContactAuthors do
describe "#query" do
let(:user) { FactoryGirl.create(:user_with_aspect) }
let(:post) { FactoryGirl.create(:status_message) }
let(:instance) {
Diaspora::Exporter::NonContactAuthors.new(Post.where(id: post.id), user)
}
context "without contact relationship" do
it "includes post author to the result set" do
expect(instance.query).to eq([post.author])
end
end
context "with contact relationship" do
before do
user.share_with(post.author, user.aspects.first)
end
it "doesn't include post author to the result set" do
expect(instance.query).to be_empty
end
end
end
end

View file

@ -1,26 +0,0 @@
# frozen_string_literal: true
describe Diaspora::Exporter::PostsWithActivity do
let(:user) { FactoryGirl.create(:user) }
let(:instance) { Diaspora::Exporter::PostsWithActivity.new(user) }
describe "#query" do
let(:activity) {
[
user.person.likes.first.target,
user.person.comments.first.parent,
user.person.poll_participations.first.parent.status_message,
user.person.participations.first.target,
user.person.posts.reshares.first.root
]
}
before do
DataGenerator.create(user, %i[activity participation])
end
it "returns all posts with person's activity" do
expect(instance.query).to match_array(activity)
end
end
end

View file

@ -6,7 +6,7 @@ describe Diaspora::Exporter do
expect_any_instance_of(Export::UserSerializer).to receive(:as_json).and_return(user: "user_data") expect_any_instance_of(Export::UserSerializer).to receive(:as_json).and_return(user: "user_data")
expect_any_instance_of(Export::OthersDataSerializer).to receive(:as_json).and_return(others_date: "others_data") expect_any_instance_of(Export::OthersDataSerializer).to receive(:as_json).and_return(others_date: "others_data")
json = Diaspora::Exporter.new(nil).execute json = Diaspora::Exporter.new(FactoryGirl.create(:user)).execute
expect(json).to include_json( expect(json).to include_json(
version: "2.0", version: "2.0",
user: "user_data", user: "user_data",

View file

@ -2,15 +2,7 @@
describe Export::OthersDataSerializer do describe Export::OthersDataSerializer do
let(:user) { FactoryGirl.create(:user) } let(:user) { FactoryGirl.create(:user) }
let(:serializer) { Export::OthersDataSerializer.new(user) } let(:serializer) { Export::OthersDataSerializer.new(user.id) }
let(:others_posts) {
[
*user.person.likes.map(&:target),
*user.person.comments.map(&:parent),
*user.person.posts.reshares.map(&:root),
*user.person.poll_participations.map(&:status_message)
]
}
it "uses FederationEntitySerializer for array serializing relayables" do it "uses FederationEntitySerializer for array serializing relayables" do
sm = DataGenerator.new(user).status_message_with_activity sm = DataGenerator.new(user).status_message_with_activity
@ -25,21 +17,5 @@ describe Export::OthersDataSerializer do
before do before do
DataGenerator.new(user).activity DataGenerator.new(user).activity
end end
it "uses FederationEntitySerializer for array serializing posts" do
expect(Export::OthersDataSerializer).to serialize_association(:posts)
.with_each_serializer(FederationEntitySerializer)
.with_objects(others_posts)
serializer.associations
end
it "uses PersonMetadataSerializer for array serializing non_contact_authors" do
non_contact_authors = others_posts.map(&:author)
expect(Export::OthersDataSerializer).to serialize_association(:non_contact_authors)
.with_each_serializer(Export::PersonMetadataSerializer)
.with_objects(non_contact_authors)
serializer.associations
end
end end
end end

View file

@ -1,14 +0,0 @@
# frozen_string_literal: true
describe Export::PersonMetadataSerializer do
let(:person) { FactoryGirl.create(:person) }
let(:serializer) { Export::PersonMetadataSerializer.new(person) }
it "has person metadata attributes" do
expect(serializer.attributes).to eq(
guid: person.guid,
account_id: person.diaspora_handle,
public_key: person.serialized_public_key
)
end
end

View file

@ -2,7 +2,7 @@
describe Export::UserSerializer do describe Export::UserSerializer do
let(:user) { FactoryGirl.create(:user) } let(:user) { FactoryGirl.create(:user) }
let(:serializer) { Export::UserSerializer.new(user, root: false) } let(:serializer) { Export::UserSerializer.new(user.id, root: false) }
it "has basic user's attributes" do it "has basic user's attributes" do
expect(serializer.attributes).to eq( expect(serializer.attributes).to eq(

View file

@ -15,6 +15,7 @@
RSpec::Matchers.define :serialize_association do |association_name| RSpec::Matchers.define :serialize_association do |association_name|
match do |root_serializer_class| match do |root_serializer_class|
association = fetch_association(root_serializer_class, association_name) association = fetch_association(root_serializer_class, association_name)
@serializer_from_options = association.serializer_from_options
execute_receive_matcher_with(association) execute_receive_matcher_with(association)
end end
@ -50,7 +51,11 @@ RSpec::Matchers.define :serialize_association do |association_name|
def with_object_expectation(object) def with_object_expectation(object)
if association_object.is_a?(Array) if association_object.is_a?(Array)
if serializer_class == FlatMapArraySerializer
expect(object.flat_map(&:to_a)).to match_array(association_object)
else
expect(object).to match_array(association_object) expect(object).to match_array(association_object)
end
elsif !association_object.nil? elsif !association_object.nil?
expect(object).to eq(association_object) expect(object).to eq(association_object)
end end
@ -66,6 +71,7 @@ RSpec::Matchers.define :serialize_association do |association_name|
def pick_serializer_class def pick_serializer_class
return association_serializer_class unless association_serializer_class.nil? return association_serializer_class unless association_serializer_class.nil?
return @serializer_from_options unless @serializer_from_options.nil?
return ActiveModel::ArraySerializer unless each_serializer_class.nil? return ActiveModel::ArraySerializer unless each_serializer_class.nil?
end end
end end