In our company, we actively use Erlang, but often we consider other alternative languages ββand approaches to improve the quality of our own code.
Elixir is a general-purpose functional programming language that runs on the BeamVM virtual machine. It differs from Erlang in syntax more similar to Ruby and advanced metaprogramming features.
Elixir also has a great mechanism for polymorphism called Protocols , but Erlang does not have a syntax for dynamic dispatching that is necessary for their implementation.
Then how are they arranged inside? What overhead gives code using protocols? Let's try to figure it out.
There are two ways to understand what is happening inside:
- deal with how Elixir Compiler generates code for BeamVM,
- decompile the beam files and see what happened.
The second method is much simpler, we will use it.
First, create a new project.
mix new proto cd proto
Now let's edit the file lib/proto.ex
with a fairly simple example.
defprotocol Double do def double(input) end defimpl Double, for: Integer do def double(int) do int * 2 end end defimpl Double, for: List do def double(list) do list ++ list end end
Here we announced the Double
protocol with a double/1
interface and two implementations of this protocol for Integer
and List
.
Check performance:
iex(1)> Double.double(2) 4 iex(2)> Double.double([1,2,3]) [1, 2, 3, 1, 2, 3] iex(3)> Double.double(:atom) ** (Protocol.UndefinedError) protocol Double not implemented for :atom (proto) lib/proto.ex:1: Double.impl_for!/1 (proto) lib/proto.ex:2: Double.double/1
Now look at the structure of the compiled files.
$ tree _build/dev/ _build/dev/ βββ consolidated β βββ Elixir.Collectable.beam β βββ Elixir.Double.beam β βββ Elixir.Enumerable.beam β βββ Elixir.IEx.Info.beam β βββ Elixir.Inspect.beam β βββ Elixir.List.Chars.beam β βββ Elixir.String.Chars.beam βββ lib βββ proto βββ ebin βββ Elixir.Double.beam βββ Elixir.Double.Integer.beam βββ Elixir.Double.List.beam βββ proto.app
The first thing that catches your eye is the presence of modules with the same names in the consolidated and lib / proto / ebin directories. Consider their contents.
To begin, the beam files need to be decompiled. To do this, create an escript file beam_to_erl
#!/usr/bin/env escript main([BeamFile]) -> {ok,{_,[{abstract_code,{_,AC}}]}} = beam_lib:chunks(BeamFile,[abstract_code]), io:fwrite("~s~n", [erl_prettypr:format(erl_syntax:form_list(AC))]).
and run through all the beam files.
$ for f in $(find _build/ -name "*.beam"); do ./beam_to_erl $f > "${f%.beam}.erl"; done
$ tree _build/dev/ | grep -v ".beam" _build/dev/ βββ consolidated β βββ Elixir.Collectable.erl β βββ Elixir.Double.erl β βββ Elixir.Enumerable.erl β βββ Elixir.IEx.Info.erl β βββ Elixir.Inspect.erl β βββ Elixir.List.Chars.erl β βββ Elixir.String.Chars.erl βββ lib βββ proto βββ ebin βββ Elixir.Double.erl βββ Elixir.Double.Integer.erl βββ Elixir.Double.List.erl βββ proto.app
Consider the contents of the file lib/proto/ebin/Elixir.Double.erl
.
-compile(no_auto_import). -file("lib/proto.ex", 1). -module('Elixir.Double'). -compile(debug_info). -compile({inline, [{any_impl_for, 0}, {struct_impl_for, 1}, {'impl_for?', 1}]}). -protocol([{fallback_to_any, false}]). -export_type([t/0]). -type t() :: term(). -spec '__protocol__'('consolidated?') -> boolean(); (functions) -> [{double, 1}, ...]; (module) -> 'Elixir.Double'. -spec impl_for(term()) -> atom() | nil. -spec 'impl_for!'(term()) -> atom() | no_return(). -callback double(t()) -> term(). -export(['__info__'/1, '__protocol__'/1, double/1, impl_for/1, 'impl_for!'/1]). -spec '__info__'(attributes | compile | exports | functions | macros | md5 | module | native_addresses) -> atom() | [{atom(), any()} | {atom(), byte(), integer()}]. '__info__'(functions) -> [{'__protocol__', 1}, {double, 1}, {impl_for, 1}, {'impl_for!', 1}]; '__info__'(macros) -> []; '__info__'(info) -> erlang:get_module_info('Elixir.Double', info). '__protocol__'(module) -> 'Elixir.Double'; '__protocol__'(functions) -> [{double, 1}]; '__protocol__'('consolidated?') -> false. any_impl_for() -> nil. double(_@1) -> ('impl_for!'(_@1)):double(_@1). impl_for(#{'__struct__' := _@1}) when erlang:is_atom(_@1) -> struct_impl_for(_@1); impl_for(_@1) when erlang:is_tuple(_@1) -> case 'impl_for?'('Elixir.Double.Tuple') of true -> 'Elixir.Double.Tuple':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_atom(_@1) -> case 'impl_for?'('Elixir.Double.Atom') of true -> 'Elixir.Double.Atom':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_list(_@1) -> case 'impl_for?'('Elixir.Double.List') of true -> 'Elixir.Double.List':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_map(_@1) -> case 'impl_for?'('Elixir.Double.Map') of true -> 'Elixir.Double.Map':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_bitstring(_@1) -> case 'impl_for?'('Elixir.Double.BitString') of true -> 'Elixir.Double.BitString':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_integer(_@1) -> case 'impl_for?'('Elixir.Double.Integer') of true -> 'Elixir.Double.Integer':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_float(_@1) -> case 'impl_for?'('Elixir.Double.Float') of true -> 'Elixir.Double.Float':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_function(_@1) -> case 'impl_for?'('Elixir.Double.Function') of true -> 'Elixir.Double.Function':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_pid(_@1) -> case 'impl_for?'('Elixir.Double.PID') of true -> 'Elixir.Double.PID':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_port(_@1) -> case 'impl_for?'('Elixir.Double.Port') of true -> 'Elixir.Double.Port':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_reference(_@1) -> case 'impl_for?'('Elixir.Double.Reference') of true -> 'Elixir.Double.Reference':'__impl__'(target); false -> any_impl_for() end; impl_for(_) -> any_impl_for(). 'impl_for!'(_@1) -> case impl_for(_@1) of _@2 when (_@2 =:= nil) or (_@2 =:= false) -> erlang:error('Elixir.Protocol.UndefinedError':exception([{protocol, 'Elixir.Double'}, {value, _@1}])); _@3 -> _@3 end. 'impl_for?'(_@1) -> case 'Elixir.Code':'ensure_compiled?'(_@1) of true -> 'Elixir.Kernel':'function_exported?'(_@1, '__impl__', 1); false -> false; _@2 -> erlang:error({badbool, 'and', _@2}) end. struct_impl_for(_@1) -> _@2 = 'Elixir.Module':concat('Elixir.Double', _@1), case 'impl_for?'(_@2) of true -> _@2:'__impl__'(target); false -> any_impl_for() end.
And here is all the magic. Let's take a look at the double/1
function.
double(_@1) -> ('impl_for!'(_@1)):double(_@1).
It searches for a module that is suitable for the passed argument, via impl_for/1
and calls its implementation.
And how to find a module for an argument? Very simple:
- if it is a primitive or bif-type, then simply look for a module with the name 'Elixir. {ProtocolName}. {TypeName}', where ProtocolName is the name of the protocol, TypeName is the name of the type. Load it, if not already loaded, via 'Elixir.Code':'ensure_compiled?'/1
. We check whether the module is a protocol implementation through the presence of the function '__impl__'/1
, and we get the module of the implementation '__impl__'(target)
,
- if it is a structure, then we look at the __struct__
service field and in the same way look for the module 'Elixir. {ProtocolName}. {StructName}',
- if the implementation is not found, check the presence of the default implementation for any type or return an error.
The implementation of the protocol remains almost unchanged. Only a few system functions are added. For example: 'Elixir.Double.Integer'
. 'Elixir.Double.Integer'
.
-compile(no_auto_import). -file("lib/proto.ex", 5). -module('Elixir.Double.Integer'). -behaviour('Elixir.Double'). -impl([{protocol, 'Elixir.Double'}, {for, 'Elixir.Integer'}]). -spec '__impl__'(protocol) -> 'Elixir.Double'; (target) -> 'Elixir.Double.Integer'; (for) -> 'Elixir.Integer'. -export(['__impl__'/1, '__info__'/1, double/1]). -spec '__info__'(attributes | compile | exports | functions | macros | md5 | module | native_addresses) -> atom() | [{atom(), any()} | {atom(), byte(), integer()}]. '__info__'(functions) -> [{'__impl__', 1}, {double, 1}]; '__info__'(macros) -> []; '__info__'(info) -> erlang:get_module_info('Elixir.Double.Integer', info). '__impl__'(for) -> 'Elixir.Integer'; '__impl__'(target) -> 'Elixir.Double.Integer'; '__impl__'(protocol) -> 'Elixir.Double'. double(int@1) -> int@1 * 2.
In other words, all dynamic dispatching is reduced to finding a module by name, knowing the algorithm for composing this name to implement the protocol. This approach has one non-essential minus - you cannot define several protocol implementations for the same type.
Overhead at the same time is not so small, especially for high-load systems. The point is to constantly check for module availability at runtime.
To eliminate this drawback, the ability to βsewβ routing for protocol implementations known at the compilation stage was added directly to the dispatching function impl_for/1
This compiler function is called consolidated protocols and, with Elixir v1.2, is performed automatically during release build via mix.
consolidated/Elixir.Double.erl
look at consolidated/Elixir.Double.erl
.
-compile(no_auto_import). -file("lib/proto.ex", 1). -module('Elixir.Double'). -compile(debug_info). -compile({inline, [{any_impl_for, 0}, {struct_impl_for, 1}, {'impl_for?', 1}]}). -protocol([{fallback_to_any, false}]). -export_type([t/0]). -type t() :: term(). -spec '__protocol__'('consolidated?') -> boolean(); (functions) -> [{double, 1}, ...]; (module) -> 'Elixir.Double'. -spec impl_for(term()) -> atom() | nil. -spec 'impl_for!'(term()) -> atom() | no_return(). -callback double(t()) -> term(). -export(['__info__'/1, '__protocol__'/1, double/1, impl_for/1, 'impl_for!'/1]). -spec '__info__'(attributes | compile | exports | functions | macros | md5 | module | native_addresses) -> atom() | [{atom(), any()} | {atom(), byte(), integer()}]. '__info__'(functions) -> [{'__protocol__', 1}, {double, 1}, {impl_for, 1}, {'impl_for!', 1}]; '__info__'(macros) -> []; '__info__'(info) -> erlang:get_module_info('Elixir.Double', info). '__protocol__'(module) -> 'Elixir.Double'; '__protocol__'(functions) -> [{double, 1}]; '__protocol__'('consolidated?') -> true. any_impl_for() -> nil. double(_@1) -> ('impl_for!'(_@1)):double(_@1). impl_for(#{'__struct__' := x}) when erlang:is_atom(x) -> struct_impl_for(x); impl_for(x) when erlang:is_list(x) -> 'Elixir.Double.List'; impl_for(x) when erlang:is_integer(x) -> 'Elixir.Double.Integer'; impl_for(_) -> nil. 'impl_for!'(_@1) -> case impl_for(_@1) of _@2 when (_@2 =:= nil) or (_@2 =:= false) -> erlang:error('Elixir.Protocol.UndefinedError':exception([{protocol, 'Elixir.Double'}, {value, _@1}])); _@3 -> _@3 end. 'impl_for?'(_@1) -> case 'Elixir.Code':'ensure_compiled?'(_@1) of true -> 'Elixir.Kernel':'function_exported?'(_@1, '__impl__', 1); false -> false; _@2 -> erlang:error({badbool, 'and', _@2}) end. struct_impl_for(_) -> nil.
The module code is significantly smaller than the original and, importantly, impl_for
works in one step without checking for the presence of the module.
Sometimes it is useful to look at the inside of the tool. This enables us to better understand its advantages and disadvantages.
The implementation of protocols is quite simple and when using consolidated protocols, it gives a slight overhead, while providing a good abstraction over data structures. However, a similar mechanism can easily be added to Erlang, but this will require the manual writing of a dynamic dispatch function.
To use Elixir or not - the choice is yours. But we are still staying at Erlang.
Source: https://habr.com/ru/post/328528/
All Articles