# common.rb require "active_record" require "active_support/all" require "get_process_mem" require "sqlite3" ActiveRecord::Base.establish_connection( adapter: "sqlite3", database: "people.sqlite3" ) class Person < ActiveRecord::Base; end def print_usage(description) mb = GetProcessMem.new.mb puts "#{ description } - MEMORY USAGE(MB): #{ mb.round }" end def print_usage_before_and_after print_usage("Before") yield print_usage("After") end def random_name (0...20).map { (97 + rand(26)).chr }.join end
# build_arrays.rb require_relative "./common" ARRAY_SIZE = 1_000_000 times = ARGV.first.to_i print_usage(0) (1..times).each do |n| foo = [] ARRAY_SIZE.times { foo << {some: "stuff"} } print_usage(n) end
$ ruby build_arrays.rb 10 0 - MEMORY USAGE(MB): 17 1 - MEMORY USAGE(MB): 330 2 - MEMORY USAGE(MB): 481 3 - MEMORY USAGE(MB): 492 4 - MEMORY USAGE(MB): 559 5 - MEMORY USAGE(MB): 584 6 - MEMORY USAGE(MB): 588 7 - MEMORY USAGE(MB): 591 8 - MEMORY USAGE(MB): 603 9 - MEMORY USAGE(MB): 613 10 - MEMORY USAGE(MB): 621
$ ruby build_arrays.rb 40 0 - MEMORY USAGE(MB): 9 1 - MEMORY USAGE(MB): 323 ... 32 - MEMORY USAGE(MB): 700 33 - MEMORY USAGE(MB): 699 34 - MEMORY USAGE(MB): 698 35 - MEMORY USAGE(MB): 698 36 - MEMORY USAGE(MB): 696 37 - MEMORY USAGE(MB): 696 38 - MEMORY USAGE(MB): 696 39 - MEMORY USAGE(MB): 701 40 - MEMORY USAGE(MB): 697
build_arrays.rb
we see that new memory allocations are made before the garbage collector cleans up our old, dropped objects.build_arrays.rb
example. You must first isolate the cause and only then begin to study it, because erroneous conclusions about the cause of the problem can easily be made. # people.rb require_relative "./common" def run(number) Person.delete_all names = number.times.map { random_name } names.each do |name| Person.create(name: name) end records = Person.all.to_a File.open("people.txt", "w") { |out| out << records.to_json } end
Person
records. # before_and_after.rb require_relative "./people" print_usage_before_and_after do run(ARGV.shift.to_i) end
$ ruby before_and_after.rb 10000 Before - MEMORY USAGE(MB): 37 After - MEMORY USAGE(MB): 96
#to_a
to create a large array of Active Record objects (not the best idea, but this is for demonstration) and serializing an array of Active Record objects. # profile.rb require "memory_profiler" require_relative "./people" report = MemoryProfiler.report do run(1000) end report.pretty_print(to_file: "profile.txt")
run
here is fed ten times less than in the previous example. The profiler itself consumes a lot of memory, and this can lead to its exhaustion during the profiling of code that has already occupied a large amount. allocated memory by gem ----------------------------------- 17520444 activerecord-4.2.6 7305511 activesupport-4.2.6 2551797 activemodel-4.2.6 2171660 arel-6.0.3 2002249 sqlite3-1.3.11 ... allocated memory by file ----------------------------------- 2840000 /Users/bruz/.rvm/gems/ruby-2.2.4/gems/activesupport-4.2.6/lib/activ e_support/hash_with_indifferent_access.rb 2006169 /Users/bruz/.rvm/gems/ruby-2.2.4/gems/activerecord-4.2.6/lib/active _record/type/time_value.rb 2001914 /Users/bruz/code/mem_test/people.rb 1655493 /Users/bruz/.rvm/gems/ruby-2.2.4/gems/activerecord-4.2.6/lib/active _record/connection_adapters/sqlite3_adapter.rb 1628392 /Users/bruz/.rvm/gems/ruby-2.2.4/gems/activesupport-4.2.6/lib/activ e_support/json/encoding.rb
records
array, or serializing with #to_json
. Next, we can test the memory usage without a profiler, removing suspicious places. We cannot disable the extraction of records
, so let's start with serialization.# File.open("people.txt", "w") { |out| out << records.to_json }
$ ruby before_and_after.rb 10000 Before: 36 MB After: 47 MB
# records = Person.all.to_a records = Person.all # File.open("people.txt", "w") { |out| out << records.to_json }
$ ruby before_and_after.rb 10000 Before: 36 MB After: 40 MB
JSON.parse
or Hash.from_xml
from Active Support are extremely convenient, but if there is a lot of data, a giant structure can be loaded into memory.Hash#from_xml
. # parse_with_from_xml.rb require_relative "./common" print_usage_before_and_after do # From http://www.cs.washington.edu/research/xmldatasets/data/mondial/mondial-3.0.xml file = File.open(File.expand_path("../mondial-3.0.xml", __FILE__)) hash = Hash.from_xml(file)["mondial"]["continent"] puts hash.map { |c| c["name"] }.join(", ") end $ ruby parse_with_from_xml.rb Before - MEMORY USAGE(MB): 37 Europe, Asia, America, Australia/Oceania, Africa After - MEMORY USAGE(MB): 164
# parse_with_ox.rb require_relative "./common" require "ox" class Handler < ::Ox::Sax def initialize(&block) @yield_to = block end def start_element(name) case name when :continent @in_continent = true end end def end_element(name) case name when :continent @yield_to.call(@name) if @name @in_continent = false @name = nil end end def attr(name, value) case name when :name @name = value if @in_continent end end end print_usage_before_and_after do # From http://www.cs.washington.edu/research/xmldatasets/data/mondial/mondial-3.0.xml file = File.open(File.expand_path("../mondial-3.0.xml", __FILE__)) continents = [] handler = Handler.new do |continent| continents << continent end Ox.sax_parse(handler, file) puts continents.join(", ") end $ ruby parse_with_ox.rb Before - MEMORY USAGE(MB): 37 Europe, Asia, America, Australia/Oceania, Africa After - MEMORY USAGE(MB): 37
people.rb
example people.rb
. # to_json.rb require_relative "./common" print_usage_before_and_after do File.open("people.txt", "w") { |out| out << Person.all.to_json } end
$ ruby to_json.rb Before: 36 MB After: 505 MB
#to_json
is that it creates an object instance for each record, and then encodes it in JSON. You can significantly reduce memory consumption by generating JSON record by record so that there is only one record object at a time. It seems that none of the popular Ruby JSON libraries can do this, and it is usually recommended to manually create a JSON string. A good API for this is provided by gem json-write-stream , and then our example can be converted: # json_stream.rb require_relative "./common" require "json-write-stream" print_usage_before_and_after do file = File.open("people.txt", "w") JsonWriteStream.from_stream(file) do |writer| writer.write_object do |obj_writer| obj_writer.write_array("people") do |arr_writer| Person.find_each do |people| arr_writer.write_element people.as_json end end end end end
$ ruby json_stream.rb Before: 36 MB After: 56 MB
# not_lazy.rb require_relative "./common" number = ARGV.shift.to_i print_usage_before_and_after do names = number.times .map { random_name } .map { |name| name.capitalize } .map { |name| "#{ name } Jr." } .select { |name| name[0] == "X" } .to_a end
$ ruby not_lazy.rb 1_000_000 Before: 36 MB After: 546 MB
lazy
call to the resulting enumerator: # lazy.rb require_relative "./common" number = ARGV.shift.to_i print_usage_before_and_after do names = number.times.lazy .map { random_name } .map { |name| name.capitalize } .map { |name| "#{ name } Jr." } .select { |name| name[0] == "X" } .to_a end
$ ruby lazy.rb 1_000_000 Before: 36 MB After: 52 MB
force
call at the end of the chain.times
, and then lazy
, because the first consumes very little memory, just returning the enumerator, which generates an integer at each call. So if you can use an enumerator instead of an array at the beginning of the chain, this is also a plus, reducing memory consumption. def records Enumerator.new do |yielder| has_more = true page = 1 while has_more response = fetch(page) response.records.each { |record| yielder record } page += 1 has_more = response.has_more end end end
Source: https://habr.com/ru/post/305426/
All Articles