JSON dataset optimization (record arrays)

By dataset we mean an array of JavaScript records

 var dataset = [
   {foo: 'xxx', bar: xxx '},
   {foo: 'yyy', bar: 'yyy'},
   ...
   {foo: 'zzz', bar: 'zzz'}
 ];

This is the usual JavaScript representation of some part of the table (or sample) from a relational database: the property names correspond to the column names, the property values to the values of the record fields.
')

When transferring datasets between server and client, they have to be converted to JSON. The advantages of such a representation in JSON: you can omit NULL fields (do not specify at all). Disadvantages: the column names are repeated, the redundancy is very high.

The solution is simple: get rid of the repetition of column names.

The idea: remember that the JSON representation of a regular array contains only data.

Solution: let's make a record array from the record array.

 function shrink (array) {
	 var r = {};
	 for (var i = 0; i <array.length; i ++) {
		 var rec = array [i];
		 for (var name in rec) 
		  if (rec.hasOwnProperty (name)) {
			 if (! r.hasOwnProperty (name)) {
				 r [name] = new Array (i);
			 }
			 r [name] .push (rec [name]);
		 }
	 }
	 return r;
 }

The shrink () function from the array of records creates one record, each property of which is an array of values. A little trick associated with the correct handling of missing properties.

Of course, we will need an inverse operation:


 function unshrink (r) {

	 if (r.constructor == Array) return;  // dataset was not compressed
	
	 var size = 0;  // determine how many records are in the table
	 for (var name in r) 
	  if (r.hasOwnProperty (name)) {
		 size = Math.max (size, r [name] .length);
	 }
	
	 var array = new Array (size);  // create an array of empty records of the desired size
	 for (var i = size; i--;) {
		 array [i] = {};
	 }
	
	 for (var name in r) // transfer data to records
	  if (r.hasOwnProperty (name)) {
		 var values = r [name];
		 for (var i = values.length; i--;) {
			 array [i] [name] = values [i];
		 }
	 }
	 return array;
 }

The unshrink () function makes an array of records from array records. Her trick is that if she stumbles upon an array, she does nothing and silently returns control. Thus, it can be immediately integrated into the current data processing chain; datasets in the form of arrays will walk through it transparently. As soon as a dataset appears in the form of a record of arrays, it is immediately transformed into an array of records.

Let's try to take a typical datasset of a typical task of a typical IT project and check how well we managed to optimize the size.

Take the first 13 people from the list of famous people who died from alcoholism.

en.wikipedia.org/wiki/List_of_deaths_through_alcohol

 var people = [
		 {name: 'Alan Watts', death_date: 'November 16, 1973', location: 'Chislehurst, England', cause: 'Alcohol poisoning'} //, age: 58}
		 , {name: 'John Barrymore', death_date: '29 May 1942 ', location:' Hollywood, United States', cause: 'Various', age: 60}
		 , {name: 'Brendan Behan', death_date: '20 March 1964 ', location:' Dublin, Ireland ', cause:' Alcoholism ', age: 41}
		 , {name: 'Bix Beiderbecke', death_date: '6 August 1931', location: 'New York, United States'} //, cause: 'Pneumonia', age: 28}
		 , {name: 'George Best', death_date: '25 November 2005 ', location:' London, United Kingdom ', cause:' Alcoholism ', age: 59}
		 , {name: 'John Bonham', death_date: '25 September 1980 ', location:' Windsor, United Kingdom ', cause:' Asphyxiation ', age: 32}
		 , {name: 'Julia Bruns', death_date: '24 December 1927', location: 'New York City, United States', cause: 'Alcohol poisoning', age: 32}
		 , {name: 'Rob Buck', death_date: '19 December 2000 ', location:' Pittsburgh, United States', cause: 'Liver failure', age: 42}
		 , {name: 'Richard Burton', death_date: '5 August 1984', location: 'Geneva, Switzerland', cause: 'Cerebral haemorrhage', age: 58}
		 , {name: 'David Byron', death_date: '28 February 1985 ', location:' Reading, United Kingdom ', cause:' Alcoholism ', age: 38}
		 , {name: 'Truman Capote', death_date: '25 August 1984', location: 'Los Angeles, United States', cause: 'Liver disease', age: 59}
		 , {name: 'Leonard Chadwick', death_date: '18 May 1940 ', location:' Boston, United States', cause: 'Asphyxiation', age: 61}
		 , {name: 'Raymond Chandler', death_date: '26 March 1959 ', location:' La Jolla, United States', cause: 'Pneumonia', age: 70}
 ];

Let's add it to the code for direct and inverse transformation and calculation of totals.

 function JSON (obj) {
   / * return JSON from obj * /
  ...
 }

 function print (obj) {
	 var s = JSON (obj);
	 document.write (s + '<HR>' + s.length + 'b.

 ');
	 return s.length;
 }

 var before = print (people);
 var shrinked_people = shrink (people);
 var after = print (shrinked_people);

 document.write ('The winnings were: <b>' + Math.round (100 * (after - before) / before) + '% </ b> <HR>');

 var unshrinked_people = unshrink (shrinked_people);
 print (unshrinked_people);

We get:

 [{"name": "Alan Watts", "death_date": "November 16, 1973", "location": "Chislehurst, England", "cause": "Alcohol poisoning"}, {"name": "John Barrymore "," death_date ":" 29 May 1942 "," location ":" Hollywood, United States "," cause ":" Various "," age ": 60}, {" name ":" Brendan Behan "," death_date ":" 20 March 1964 "," location ":" Dublin, Ireland "," cause ":" Alcoholism "," age ": 41}, {" name ":" Bix Beiderbecke "," death_date ":" 6 August 1931 "," location ":" New York, United States "}, {" name ":" George Best "," death_date ":" 25 November 2005 "," location ":" London, United Kingdom "," cause " : "Alcoholism", "age": 59}, {"name": "John Bonham", "death_date": "25 September 1980", "location": "Windsor, United Kingdom", "cause": "Asphyxiation" , "age": 32}, {"name": "Julia Bruns", "death_date": "24 December 1927", "location": "New York City, United States", "cause": "Alcohol poisoning", "age": 32}, {"name": "Rob Buck", "death_date": "19 December 2000", "location": "Pittsburgh, United States", "cause": "Liver failure", "age" : 42}  {"name": "Richard Burton", "death_date": "5 August 1984", "location": "Geneva, Switzerland", "cause": "Cerebral haemorrhage", "age": 58}, {"name" : "David Byron", "death_date": "28 February 1985", "location": "Reading, United Kingdom", "cause": "Alcoholism", "age": 38}, {"name": "Truman Capote "," death_date ":" 25 August 1984 "," location ":" Los Angeles, United States "," cause ":" Liver disease "," age ": 59}, {" name ":" Leonard Chadwick ", "death_date": "18 May 1940", "location": "Boston, United States", "cause": "Asphyxiation", "age": 61}, {"name": "Raymond Chandler", "death_date": "26 March 1959", "location": "La Jolla, United States", "cause": "Pneumonia", "age": 70}]
 1658 b.

 {"name": ["Alan Watts", "John Barrymore", "Brendan Behan", "Bix Beiderbecke", "George Best", "John Bonham", "Julia Bruns", "Rob Buck", "Richard Burton" , "David Byron", "Truman Capote", "Leonard Chadwick", "Raymond Chandler"], "death_date": ["November 16, 1973", "29 May 1942", "20 March 1964", "6 August 1931 "," 25 November 2005 "," 25 September 1980 "," 24 December 1927 "," 19 December 2000 "," 5 August 1984 "," 28 February 1985 "," 25 August 1984 "," 18 May 1940 ", "26 March 1959"], "location": ["Chislehurst, England", "Hollywood, United States", "Dublin, Ireland", "New York, United States", "London, United Kingdom", "Windsor, United Kingdom "," New York City, United States "," Pittsburgh, United States "," Geneva, Switzerland "," Reading, United Kingdom "," Los Angeles, United States "," Boston, United States "," La Jolla , United States "]," cause ": [" Alcohol poisoning "," Various "," Alcoholism "," Alcoholism "," Asphyxiation "," Alcohol poisoning "," Liver failure "," Cerebral haemorrhage "," Alcohol  m "," Liver disease "," Asphyxiation "," Pneumonia "]," age ": [undefined, 60, 41, 59, 32, 32, 42, 58, 38, 59, 61, 70]}
 1117 b.

 The win was: -33%
 [{"name": "Alan Watts", "death_date": "November 16, 1973", "location": "Chislehurst, England", "cause": "Alcohol poisoning", "age": undefined}, {" name ":" John Barrymore "," death_date ":" 29 May 1942 "," location ":" Hollywood, United States "," cause ":" Various "," age ": 60}, {" name ":" Brendan Behan "," death_date ":" 20 March 1964 "," location ":" Dublin, Ireland "," cause ":" Alcoholism "," age ": 41}, {" name ":" Bix Beiderbecke "," death_date ":" 6 August 1931 "," location ":" New York, United States "," cause ":" Alcoholism "," age ": 59}, {" name ":" George Best "," death_date ": "25 November 2005", "location": "London, United Kingdom", "cause": "Asphyxiation", "age": 32}, {"name": "John Bonham", "death_date": "25 September 1980 "," location ":" Windsor, United Kingdom "," cause ":" Alcohol poisoning "," age ": 32}, {" name ":" Julia Bruns "," death_date ":" 24 December 1927 "," location ":" New York City, United States "," cause ":" Liver failure "," age ": 42}, {" name ":" Rob Buck "," death_date ":" 19 December 2000 "," location ":" Pittsburgh, U  nited States "," cause ":" Cerebral haemorrhage "," age ": 58}, {" name ":" Richard Burton "," death_date ":" 5 August 1984 "," location ":" Geneva, Switzerland ", "cause": "Alcoholism", "age": 38}, {"name": "David Byron", "death_date": "28 February 1985", "location": "Reading, United Kingdom", "cause": "Liver disease", "age": 59}, {"name": "Truman Capote", "death_date": "25 August 1984", "location": "Los Angeles, United States", "cause": "Asphyxiation "," age ": 61}, {" name ":" Leonard Chadwick "," death_date ":" 18 May 1940 "," location ":" Boston, United States "," cause ":" Pneumonia "," age ": 70}, {" name ":" Raymond Chandler "," death_date ":" 26 March 1959 "," location ":" La Jolla, United States "}]
 1675 b.

JSON size of record array: 1658 bytes.
JSON size of writing arrays: 1117 bytes.
Won a third (33%).

Minuses:
- slight loss of performance;
- poorly suited for highly sparse tables (with a large number of NULL fields).

About the last minus. NULL cells will be represented by the string “undefined” in the JSON representation of the array record. If your JSON function allows you to specify a string representing "undefined", then further optimization is possible (replacing the letter undefined by one character, for example "U"):

 function JSON (obj, undefined) {
  ///
 }


 var json = JSON (shrink (dataset), "U");
 ...

 var U = undefined;
 var array = unshrink (eval ('(' + json + ')'));

We save all.

Update

I downloaded gzip, tried it.

The first presentation was packed in 588 bytes, the optimized one was packed in 564 bytes.

Update

Took another example. Here is the packet from the work program; it contains an array (in object).

 {"message": {"what": "client_context; push", "cmd_id": 1, "type": "depth", "instrument_code": "LKOH", "instrument_subcode": "EQBR", "exg": "MICEX"}, "object": [{"buysell": "B", "price": 1526.7, "qty": 47}, {"buysell": "B", "price": 1526.82, "qty" : 5}, {"buysell": "B", "price": 1526.83, "qty": 30}, {"buysell": "B", "price": 1526.86, "qty": 50}, {" buysell ":" B "," price ": 1526.9," qty ": 44}, {" buysell ":" B "," price ": 1526.94," qty ": 49}, {" buysell ":" B " , "price": 1526.97, "qty": 11}, {"buysell": "B", "price": 1527.17, "qty": 27}, {"buysell": "B", "price": 1527.21 , "qty": 68}, {"buysell": "B", "price": 1527.39, "qty": 16}, {"buysell": "B", "price": 1527.41, "qty": 8 }, {"buysell": "B", "price": 1527.45, "qty": 15}, {"buysell": "B", "price": 1527.59, "qty": 46}, {"buysell" : "B", "price": 1527.61, "qty": 38}, {"buysell": "B", "price": 1527.73, "qty": 34}, {"buysell": "B", " price ": 1527.75," qty ": 20}, {" buysell ":" B "," price ": 1527.86," qty ": 29}, {" buysell ":" B "," price ": 1527.98," qty ": 68}, {" buysell ":" B "," price ": 1528.03," qty ": 32}, {" buysell ":" B "," price ": 1553.67," qty ": 5}, {"buysell": "S", "p  rice ": 1634.15," qty ": 5}, {" buysell ":" S "," price ": 1634.23," qty ": 48}, {" buysell ":" S "," price ": 1634.24," qty ": 116}, {" buysell ":" S "," price ": 1634.29," qty ": 19}, {" buysell ":" S "," price ": 1634.39," qty ": 22}, {"buysell": "S", "price": 1634.4, "qty": 31}, {"buysell": "S", "price": 1634.59, "qty": 39}, {"buysell": " S "," price ": 1634.64," qty ": 70}, {" buysell ":" S "," price ": 1634.68," qty ": 2}, {" buysell ":" S "," price " : 1634.73, "qty": 58}, {"buysell": "S", "price": 1634.8, "qty": 35}, {"buysell": "S", "price": 1634.82, "qty" : 23}, {"buysell": "S", "price": 1634.83, "qty": 18}, {"buysell": "S", "price": 1634.92, "qty": 7}, {" buysell ":" S "," price ": 1635," qty ": 49}, {" buysell ":" S "," price ": 1635.01," qty ": 68}, {" buysell ":" S " , "price": 1635.02, "qty": 8}, {"buysell": "S", "price": 1635.06, "qty": 41}, {"buysell": "S", "price": 1635.08 , "qty": 1}, {"buysell": "S", "price": 1635.09, "qty": 18}]}

We take 1000 similar packages. And compare the two methods.

Increasing JSON generation speed: 40% (i.e. the second method is also faster)
Reduced size: 55%.

Now we pack both the resulting JSON gzip.

We get:

31916 against 8713 bytes.

That is, the size of gzip decreased 4 times.

Doubting teteri write e-mail, I will send you sootv. files.

Source: https://habr.com/ru/post/85675/

All Articles

JSON dataset optimization (record arrays)

More articles: