πŸ“œ ⬆️ ⬇️

How protocols are arranged in Elixir

In our company, we actively use Erlang, but often we consider other alternative languages ​​and approaches to improve the quality of our own code.


Elixir is a general-purpose functional programming language that runs on the BeamVM virtual machine. It differs from Erlang in syntax more similar to Ruby and advanced metaprogramming features.


Elixir also has a great mechanism for polymorphism called Protocols , but Erlang does not have a syntax for dynamic dispatching that is necessary for their implementation.


Then how are they arranged inside? What overhead gives code using protocols? Let's try to figure it out.



There are two ways to understand what is happening inside:


- deal with how Elixir Compiler generates code for BeamVM,
- decompile the beam files and see what happened.


The second method is much simpler, we will use it.


First, create a new project.


mix new proto cd proto 

Now let's edit the file lib/proto.ex with a fairly simple example.


 defprotocol Double do def double(input) end defimpl Double, for: Integer do def double(int) do int * 2 end end defimpl Double, for: List do def double(list) do list ++ list end end 

Here we announced the Double protocol with a double/1 interface and two implementations of this protocol for Integer and List .


Check performance:


 iex(1)> Double.double(2) 4 iex(2)> Double.double([1,2,3]) [1, 2, 3, 1, 2, 3] iex(3)> Double.double(:atom) ** (Protocol.UndefinedError) protocol Double not implemented for :atom (proto) lib/proto.ex:1: Double.impl_for!/1 (proto) lib/proto.ex:2: Double.double/1 

Now look at the structure of the compiled files.


 $ tree _build/dev/ _build/dev/ β”œβ”€β”€ consolidated β”‚ β”œβ”€β”€ Elixir.Collectable.beam β”‚ β”œβ”€β”€ Elixir.Double.beam β”‚ β”œβ”€β”€ Elixir.Enumerable.beam β”‚ β”œβ”€β”€ Elixir.IEx.Info.beam β”‚ β”œβ”€β”€ Elixir.Inspect.beam β”‚ β”œβ”€β”€ Elixir.List.Chars.beam β”‚ └── Elixir.String.Chars.beam └── lib └── proto └── ebin β”œβ”€β”€ Elixir.Double.beam β”œβ”€β”€ Elixir.Double.Integer.beam β”œβ”€β”€ Elixir.Double.List.beam └── proto.app 

The first thing that catches your eye is the presence of modules with the same names in the consolidated and lib / proto / ebin directories. Consider their contents.


To begin, the beam files need to be decompiled. To do this, create an escript file beam_to_erl


 #!/usr/bin/env escript main([BeamFile]) -> {ok,{_,[{abstract_code,{_,AC}}]}} = beam_lib:chunks(BeamFile,[abstract_code]), io:fwrite("~s~n", [erl_prettypr:format(erl_syntax:form_list(AC))]). 

and run through all the beam files.


 $ for f in $(find _build/ -name "*.beam"); do ./beam_to_erl $f > "${f%.beam}.erl"; done 

 $ tree _build/dev/ | grep -v ".beam" _build/dev/ β”œβ”€β”€ consolidated β”‚ β”œβ”€β”€ Elixir.Collectable.erl β”‚ β”œβ”€β”€ Elixir.Double.erl β”‚ β”œβ”€β”€ Elixir.Enumerable.erl β”‚ β”œβ”€β”€ Elixir.IEx.Info.erl β”‚ β”œβ”€β”€ Elixir.Inspect.erl β”‚ β”œβ”€β”€ Elixir.List.Chars.erl β”‚ └── Elixir.String.Chars.erl └── lib └── proto └── ebin β”œβ”€β”€ Elixir.Double.erl β”œβ”€β”€ Elixir.Double.Integer.erl β”œβ”€β”€ Elixir.Double.List.erl └── proto.app 

Consider the contents of the file lib/proto/ebin/Elixir.Double.erl .


 -compile(no_auto_import). -file("lib/proto.ex", 1). -module('Elixir.Double'). -compile(debug_info). -compile({inline, [{any_impl_for, 0}, {struct_impl_for, 1}, {'impl_for?', 1}]}). -protocol([{fallback_to_any, false}]). -export_type([t/0]). -type t() :: term(). -spec '__protocol__'('consolidated?') -> boolean(); (functions) -> [{double, 1}, ...]; (module) -> 'Elixir.Double'. -spec impl_for(term()) -> atom() | nil. -spec 'impl_for!'(term()) -> atom() | no_return(). -callback double(t()) -> term(). -export(['__info__'/1, '__protocol__'/1, double/1, impl_for/1, 'impl_for!'/1]). -spec '__info__'(attributes | compile | exports | functions | macros | md5 | module | native_addresses) -> atom() | [{atom(), any()} | {atom(), byte(), integer()}]. '__info__'(functions) -> [{'__protocol__', 1}, {double, 1}, {impl_for, 1}, {'impl_for!', 1}]; '__info__'(macros) -> []; '__info__'(info) -> erlang:get_module_info('Elixir.Double', info). '__protocol__'(module) -> 'Elixir.Double'; '__protocol__'(functions) -> [{double, 1}]; '__protocol__'('consolidated?') -> false. any_impl_for() -> nil. double(_@1) -> ('impl_for!'(_@1)):double(_@1). impl_for(#{'__struct__' := _@1}) when erlang:is_atom(_@1) -> struct_impl_for(_@1); impl_for(_@1) when erlang:is_tuple(_@1) -> case 'impl_for?'('Elixir.Double.Tuple') of true -> 'Elixir.Double.Tuple':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_atom(_@1) -> case 'impl_for?'('Elixir.Double.Atom') of true -> 'Elixir.Double.Atom':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_list(_@1) -> case 'impl_for?'('Elixir.Double.List') of true -> 'Elixir.Double.List':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_map(_@1) -> case 'impl_for?'('Elixir.Double.Map') of true -> 'Elixir.Double.Map':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_bitstring(_@1) -> case 'impl_for?'('Elixir.Double.BitString') of true -> 'Elixir.Double.BitString':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_integer(_@1) -> case 'impl_for?'('Elixir.Double.Integer') of true -> 'Elixir.Double.Integer':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_float(_@1) -> case 'impl_for?'('Elixir.Double.Float') of true -> 'Elixir.Double.Float':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_function(_@1) -> case 'impl_for?'('Elixir.Double.Function') of true -> 'Elixir.Double.Function':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_pid(_@1) -> case 'impl_for?'('Elixir.Double.PID') of true -> 'Elixir.Double.PID':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_port(_@1) -> case 'impl_for?'('Elixir.Double.Port') of true -> 'Elixir.Double.Port':'__impl__'(target); false -> any_impl_for() end; impl_for(_@1) when erlang:is_reference(_@1) -> case 'impl_for?'('Elixir.Double.Reference') of true -> 'Elixir.Double.Reference':'__impl__'(target); false -> any_impl_for() end; impl_for(_) -> any_impl_for(). 'impl_for!'(_@1) -> case impl_for(_@1) of _@2 when (_@2 =:= nil) or (_@2 =:= false) -> erlang:error('Elixir.Protocol.UndefinedError':exception([{protocol, 'Elixir.Double'}, {value, _@1}])); _@3 -> _@3 end. 'impl_for?'(_@1) -> case 'Elixir.Code':'ensure_compiled?'(_@1) of true -> 'Elixir.Kernel':'function_exported?'(_@1, '__impl__', 1); false -> false; _@2 -> erlang:error({badbool, 'and', _@2}) end. struct_impl_for(_@1) -> _@2 = 'Elixir.Module':concat('Elixir.Double', _@1), case 'impl_for?'(_@2) of true -> _@2:'__impl__'(target); false -> any_impl_for() end. 

And here is all the magic. Let's take a look at the double/1 function.


 double(_@1) -> ('impl_for!'(_@1)):double(_@1). 

It searches for a module that is suitable for the passed argument, via impl_for/1 and calls its implementation.


And how to find a module for an argument? Very simple:


- if it is a primitive or bif-type, then simply look for a module with the name 'Elixir. {ProtocolName}. {TypeName}', where ProtocolName is the name of the protocol, TypeName is the name of the type. Load it, if not already loaded, via 'Elixir.Code':'ensure_compiled?'/1 . We check whether the module is a protocol implementation through the presence of the function '__impl__'/1 , and we get the module of the implementation '__impl__'(target) ,
- if it is a structure, then we look at the __struct__ service field and in the same way look for the module 'Elixir. {ProtocolName}. {StructName}',
- if the implementation is not found, check the presence of the default implementation for any type or return an error.


The implementation of the protocol remains almost unchanged. Only a few system functions are added. For example: 'Elixir.Double.Integer' . 'Elixir.Double.Integer' .


 -compile(no_auto_import). -file("lib/proto.ex", 5). -module('Elixir.Double.Integer'). -behaviour('Elixir.Double'). -impl([{protocol, 'Elixir.Double'}, {for, 'Elixir.Integer'}]). -spec '__impl__'(protocol) -> 'Elixir.Double'; (target) -> 'Elixir.Double.Integer'; (for) -> 'Elixir.Integer'. -export(['__impl__'/1, '__info__'/1, double/1]). -spec '__info__'(attributes | compile | exports | functions | macros | md5 | module | native_addresses) -> atom() | [{atom(), any()} | {atom(), byte(), integer()}]. '__info__'(functions) -> [{'__impl__', 1}, {double, 1}]; '__info__'(macros) -> []; '__info__'(info) -> erlang:get_module_info('Elixir.Double.Integer', info). '__impl__'(for) -> 'Elixir.Integer'; '__impl__'(target) -> 'Elixir.Double.Integer'; '__impl__'(protocol) -> 'Elixir.Double'. double(int@1) -> int@1 * 2. 

In other words, all dynamic dispatching is reduced to finding a module by name, knowing the algorithm for composing this name to implement the protocol. This approach has one non-essential minus - you cannot define several protocol implementations for the same type.


Overhead at the same time is not so small, especially for high-load systems. The point is to constantly check for module availability at runtime.


To eliminate this drawback, the ability to β€œsew” routing for protocol implementations known at the compilation stage was added directly to the dispatching function impl_for/1
This compiler function is called consolidated protocols and, with Elixir v1.2, is performed automatically during release build via mix.


consolidated/Elixir.Double.erl look at consolidated/Elixir.Double.erl .


 -compile(no_auto_import). -file("lib/proto.ex", 1). -module('Elixir.Double'). -compile(debug_info). -compile({inline, [{any_impl_for, 0}, {struct_impl_for, 1}, {'impl_for?', 1}]}). -protocol([{fallback_to_any, false}]). -export_type([t/0]). -type t() :: term(). -spec '__protocol__'('consolidated?') -> boolean(); (functions) -> [{double, 1}, ...]; (module) -> 'Elixir.Double'. -spec impl_for(term()) -> atom() | nil. -spec 'impl_for!'(term()) -> atom() | no_return(). -callback double(t()) -> term(). -export(['__info__'/1, '__protocol__'/1, double/1, impl_for/1, 'impl_for!'/1]). -spec '__info__'(attributes | compile | exports | functions | macros | md5 | module | native_addresses) -> atom() | [{atom(), any()} | {atom(), byte(), integer()}]. '__info__'(functions) -> [{'__protocol__', 1}, {double, 1}, {impl_for, 1}, {'impl_for!', 1}]; '__info__'(macros) -> []; '__info__'(info) -> erlang:get_module_info('Elixir.Double', info). '__protocol__'(module) -> 'Elixir.Double'; '__protocol__'(functions) -> [{double, 1}]; '__protocol__'('consolidated?') -> true. any_impl_for() -> nil. double(_@1) -> ('impl_for!'(_@1)):double(_@1). impl_for(#{'__struct__' := x}) when erlang:is_atom(x) -> struct_impl_for(x); impl_for(x) when erlang:is_list(x) -> 'Elixir.Double.List'; impl_for(x) when erlang:is_integer(x) -> 'Elixir.Double.Integer'; impl_for(_) -> nil. 'impl_for!'(_@1) -> case impl_for(_@1) of _@2 when (_@2 =:= nil) or (_@2 =:= false) -> erlang:error('Elixir.Protocol.UndefinedError':exception([{protocol, 'Elixir.Double'}, {value, _@1}])); _@3 -> _@3 end. 'impl_for?'(_@1) -> case 'Elixir.Code':'ensure_compiled?'(_@1) of true -> 'Elixir.Kernel':'function_exported?'(_@1, '__impl__', 1); false -> false; _@2 -> erlang:error({badbool, 'and', _@2}) end. struct_impl_for(_) -> nil. 

The module code is significantly smaller than the original and, importantly, impl_for works in one step without checking for the presence of the module.


Total


Sometimes it is useful to look at the inside of the tool. This enables us to better understand its advantages and disadvantages.


The implementation of protocols is quite simple and when using consolidated protocols, it gives a slight overhead, while providing a good abstraction over data structures. However, a similar mechanism can easily be added to Erlang, but this will require the manual writing of a dynamic dispatch function.


To use Elixir or not - the choice is yours. But we are still staying at Erlang.


')

Source: https://habr.com/ru/post/328528/


All Articles