⬆️ ⬇️

Natural string sorting in javascript

The sorting task is perhaps the most frequently solved problem by programmers. Despite the fact that there are not so many common algorithms and all of them have been written and optimized for any languages ​​and platforms for a long time, the SortList () methods and the like flash in the source code. Probably, each of us wrote a sorting bubble more than once and wondered why it didn’t work the first time.



However, this is not about the sorting algorithm, but about the method of string comparison. It would seem that everything is trivial here - it is enough to compare the first different characters from the beginning. And if the lines have numbers? Then this sorting ( lexicographic ) converts the sequence ['file2', 'file10', 'file1'] into ['file1', 'file10', 'file2']. But when reading a text, a person perceives numbers separately, and the same sequence, intuitively ordered, looks like this: ['file1', 'file2', 'file10']. Such sorting is called natural sort.



Under the cut - a bike detailed algorithm in JavaScript. It does not pretend to be optimal and beautiful, but it is still better than a multi-pass head-on implementation.

')





We will not write the sorting algorithm itself, but use the built-in method Array.sort (). Therefore, the “main function” will look something like this:

function naturalSort(stringArray) {

var arr = stringArray;

// ...

return arr.sort(compareStrings) //

}


It remains to write the compareStrings () function to compare strings. This can be done in different ways, but I came up with the following approach: separate the numbers from the text as part of the lines, and then go through the resulting arrays to the first difference between the lines (or numbers) with the same indices. Split strings will be splitString () (about it later), and the comparison itself will look something like the following:

function compareStrings(str1, str2) {

//

var compare = function (a, b) {

return (a < b) ? -1 : (a > b) ? 1 : 0;

};

// ; splitString()

var parts1 = splitString(str1);

var parts2 = splitString(str2);

//

var minLength = Math.min(parts1.length, parts2.length);

for ( var i = 0; i < minLength; i++) {

//

}

//

// , "", , ""

return compare(parts1.length, parts2.length);

}


However, this approach is not so smooth: most of the strings that have to be compared differ from the first characters, that is, splitString () does a lot of extra work. But this is fixable with the help of deferred calculations - let splitString () return not an array ready, but an iterator object with the methods next () and count (). Thanks to the closures in JavaScript, the implementation was not too voluminous:

( UPD at the end of the post)

function splitString(str) {

var from = 0; // slice()

var index = 0; //

var count = 0; //

var splitter = {}; //

//

splitter.count = function () {

return count;

}

//

splitter.next = function () {

// - null

if (index === str.length) {

return null ;

}

//

while (++index) {

var currentIsDigit = isDigit(str.charAt(index - 1)); // isDigit()

var nextChar = str.charAt(index);

var currentIsLast = (index === str.length);

// - ,

// ( / )

var isBorder = currentIsLast ||

xor(currentIsDigit, isDigit(nextChar)); // xor()

if (isBorder) {

var part = str.slice(from, index);

from = index;

count++;

return {

IsNumber: currentIsDigit,

Value: currentIsDigit ? Number(part) : part

};

} // end if

} // end while

}; // end next()



return splitter;

}


Let's write auxiliary functions - xor () and isDigit ():

function xor(a, b) {

return a ? !b : b;

}



function isDigit(chr) {

var charCode = function (ch) {

return ch.charCodeAt(0);

};

var code = charCode(chr);

return (code >= charCode( '0' )) && (code <= charCode( '9' ));

}


Now let's return to the compareStrings () string comparison function. With the introduction of iterators, the overall logic has hardly changed:

( UPD at the end of the post)

function compareStrings(str1, str2) {

//

var compare = function (a, b) {

return (a < b) ? -1 : (a > b) ? 1 : 0;

};

//

var splitter1 = splitString(str1);

var splitter2 = splitString(str2);

//

while ( true ) {

var first = splitter1.next();

var second = splitter2.next();

// ...

if ( null !== first && null !== second) {

// ( )

if (xor(first.IsNumber, second.IsNumber)) {

// ""

return first.IsNumber ? -1 : 1;

//

} else {

var comp = compare(first.Value, second.Value);

if (comp != 0) {

return comp;

}

} // end if

// ... - ""

} else {

return compare(splitter1.count(), splitter2.count());

}

} // end while

}


Finally, we will write the sort function, put it all together and link the code:

( UPD at the end of the post)

function naturalSort(stringArray) {

// ""

var xor = function (a, b) {

return a ? !b : b;

};

// ,

var isDigit = function (chr) {

var charCode = function (ch) {

return ch.charCodeAt(0);

};

var code = charCode(chr);

return (code >= charCode( '0' )) && (code <= charCode( '9' ));

};

//

var splitString = function (str) {

var from = 0; // slice()

var index = 0; //

var count = 0; //

var splitter = {}; //

//

splitter.count = function () {

return count;

}

//

splitter.next = function () {

// - null

if (index === str.length) {

return null ;

}

//

while (++index) {

var currentIsDigit = isDigit(str.charAt(index - 1));

var nextChar = str.charAt(index);

var currentIsLast = (index === str.length);

// - ,

// ( / )

var isBorder = currentIsLast ||

xor(currentIsDigit, isDigit(nextChar));

if (isBorder) {

var part = str.slice(from, index);

from = index;

count++;

return {

IsNumber: currentIsDigit,

Value: currentIsDigit ? Number(part) : part

};

} // end if

} // end while

}; // end next()

return splitter;

};

// ""

var compareStrings = function (str1, str2) {

//

var compare = function (a, b) {

return (a < b) ? -1 : (a > b) ? 1 : 0;

};

//

var splitter1 = splitString(str1);

var splitter2 = splitString(str2);

//

while ( true ) {

var first = splitter1.next();

var second = splitter2.next();

// ...

if ( null !== first && null !== second) {

// ( )

if (xor(first.IsNumber, second.IsNumber)) {

// ""

return first.IsNumber ? -1 : 1;

//

} else {

var comp = compare(first.Value, second.Value);

if (comp != 0) {

return comp;

}

} // end if

// ... - ""

} else {

return compare(splitter1.count(), splitter2.count());

}

} // end while

}

//

var arr = stringArray;

return arr.sort(compareStrings);

}




I would be grateful for the found bugs, tips and suggestions for the code. I hope someone was interested, and maybe even useful.



UPD:

In comments, Alex_At_Net noticed that it would be nice to cache the results of splitting lines, and standy suggested immediately splitting the lines into parts and sorting ready-made sets of fragments. The result was a revised and updated version of the script. The optional parameter extractor is added to the sorting function - a function for converting an arbitrary object into a string: now you can sort arrays of any objects. The input array is immediately converted into an array of splitters, but the splitting itself occurs only when the fragment is accessed by the index, to the right place and only once. Instead of the next () method, the splitter now has a part (i) method that returns a fragment by index. The code is a little more “combed” and “objectized”. It looks like this:



function naturalSort(array, extractor) {

"use strict" ;

//

var splitters = array.map(makeSplitter);

//

var sorted = splitters.sort(compareSplitters);

//

return sorted.map( function (splitter) {

return splitter.item;

});

//

function makeSplitter(item) {

return new Splitter(item);

}

//

// ""

function Splitter(item) {

var index = 0; //

var from = 0; //

var parts = []; //

var completed = false ; //

//

this .item = item;

// -

var key = ( typeof (extractor) === 'function' ) ?

extractor(item) :

item;

this .key = key;

//

this .count = function () {

return parts.length;

};

// ( parts[])

this .part = function (i) {

while (parts.length <= i && !completed) {

next(); //

}

return (i < parts.length) ? parts[i] : null ;

};

//

function next() {

//

if (index < key.length) {

//

while (++index) {

var currentIsDigit = isDigit(key.charAt(index - 1));

var nextChar = key.charAt(index);

var currentIsLast = (index === key.length);

// - ,

// ( / )

var isBorder = currentIsLast ||

xor(currentIsDigit, isDigit(nextChar));

if (isBorder) {

// parts[]

var partStr = key.slice(from, index);

parts.push( new Part(partStr, currentIsDigit));

from = index;

break ;

} // end if

} // end while

//

} else {

completed = true ;

} // end if

} // end next

//

function Part(text, isNumber) {

this .isNumber = isNumber;

this .value = isNumber ? Number(text) : text;

}

}

//

function compareSplitters(sp1, sp2) {

var i = 0;

do {

var first = sp1.part(i);

var second = sp2.part(i);

// ...

if ( null !== first && null !== second) {

// ( )

if (xor(first.isNumber, second.isNumber)) {

// ""

return first.isNumber ? -1 : 1;

//

} else {

var comp = compare(first.value, second.value);

if (comp != 0) {

return comp;

}

} // end if

// ... - ""

} else {

return compare(sp1.count(), sp2.count());

}

} while (++i);

//

function compare(a, b) {

return (a < b) ? -1 : (a > b) ? 1 : 0;

};

};

// ""

function xor(a, b) {

return a ? !b : b;

};

// :

function isDigit(chr) {

var code = charCode(chr);

return (code >= charCode( '0' )) && (code <= charCode( '9' ));

function charCode(ch) {

return ch.charCodeAt(0);

};

};

}




With comments

No comments



PS:

function thanks() {

alert( 'C PoiSoN Source Code Highlighter' );

};




* This source code was highlighted with Source Code Highlighter .

Source: https://habr.com/ru/post/127943/



All Articles