mirror of
https://github.com/koreader/koreader.git
synced 2025-12-13 20:36:53 +01:00
Misc: Natural sorting refactor (#10023)
* Move natural sorting algo to a dedicated sort module to avoid code duplication * Use a slightly more accurate algorithm, and speed it up by caching intermediary strings * Calibre: Use natural sorting in metadata search (fix #10009)
This commit is contained in:
@@ -219,7 +219,13 @@ function FileSearcher:showSearchResults()
|
||||
|
||||
local collate = G_reader_settings:readSetting("collate") or "strcoll"
|
||||
local reverse_collate = G_reader_settings:isTrue("reverse_collate")
|
||||
local sorting = FileChooser:getSortingFunction(collate, reverse_collate)
|
||||
-- If we have a FileChooser instance, use it, to be able to make use of its natsort cache
|
||||
local sorting
|
||||
if self.ui.file_chooser then
|
||||
sorting = self.ui.file_chooser:getSortingFunction(collate, reverse_collate)
|
||||
else
|
||||
sorting = FileChooser:getSortingFunction(collate, reverse_collate)
|
||||
end
|
||||
|
||||
table.sort(self.results, sorting)
|
||||
self.search_menu:switchItemTable(T(_("Search results (%1)"), #self.results), self.results)
|
||||
|
||||
133
frontend/sort.lua
Normal file
133
frontend/sort.lua
Normal file
@@ -0,0 +1,133 @@
|
||||
--[[--
|
||||
This module contains a collection of comparison functions (or factories for comparison functions) for `table.sort`.
|
||||
@module sort
|
||||
]]
|
||||
|
||||
local sort = {}
|
||||
|
||||
--[[
|
||||
Natural sorting functions, for use with table.sort
|
||||
<http://notebook.kulchenko.com/algorithms/alphanumeric-natural-sorting-for-humans-in-lua>
|
||||
--]]
|
||||
-- Original implementation by Paul Kulchenko
|
||||
--[[
|
||||
local function addLeadingZeroes(d)
|
||||
local dec, n = string.match(d, "(%.?)0*(.+)")
|
||||
return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n)
|
||||
end
|
||||
function sort.natsort(a, b)
|
||||
return tostring(a):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#b)
|
||||
< tostring(b):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#a)
|
||||
end
|
||||
--]]
|
||||
-- Hardened (but more expensive) implementation by Egor Skriptunoff, with an UTF-8 tweak by Paul Kulchenko
|
||||
--[[
|
||||
local function natsort_conv(s)
|
||||
local res, dot = "", ""
|
||||
for n, m, c in tostring(s):gmatch("(0*(%d*))(.?)") do
|
||||
if n == "" then
|
||||
dot, c = "", dot..c
|
||||
else
|
||||
res = res..(dot == "" and ("%03d%s"):format(#m, m)
|
||||
or "."..n)
|
||||
dot, c = c:match("(%.?)(.*)")
|
||||
end
|
||||
res = res..c:gsub("[%z\1-\127\192-\255]", "\0%0")
|
||||
end
|
||||
return res
|
||||
end
|
||||
--]]
|
||||
-- The above conversion is *fairly* expensive,
|
||||
-- and table.sort ensures that it'll be called on identical strings multiple times,
|
||||
-- so keeping a cache of massaged strings makes sense.
|
||||
-- <https://github.com/koreader/koreader/pull/10023#discussion_r1069776657>
|
||||
-- We can rely on LRU to avoid explicit cache maintenance concerns
|
||||
-- (given the type of content we massage, the memory impact is fairly insignificant).
|
||||
-- The extra persistence this affords us also happens to help with the FM use-case ;).
|
||||
|
||||
-- Dumb persistent hash-map => cold, ~200 to 250ms; hot: ~150ms (which roughly matches sorting by numerical file attributes).
|
||||
-- (Numbers are from the FM sorting 350 entries (mostly composed of author names) on an H2O; an uncached run takes ~650ms).
|
||||
--[[
|
||||
local natsort_cache = {}
|
||||
|
||||
function sort.natsort(a, b)
|
||||
local ca, cb = natsort_cache[a], natsort_cache[b]
|
||||
if not ca then
|
||||
ca = natsort_conv(a)
|
||||
natsort_cache[a] = ca
|
||||
end
|
||||
if not cb then
|
||||
cb = natsort_conv(b)
|
||||
natsort_cache[b] = cb
|
||||
end
|
||||
|
||||
return ca < cb or ca == cb and a < b
|
||||
end
|
||||
--]]
|
||||
|
||||
-- LRU => cold, ~200 to 250ms; hot ~150 to 175ms (which is barely any slower than a dumb hash-map, yay, LRU and LuaJIT magic).
|
||||
--[[
|
||||
local lru = require("ffi/lru")
|
||||
local natsort_cache = lru.new(1024, nil, false)
|
||||
|
||||
function sort.natsort(a, b)
|
||||
local ca, cb = natsort_cache:get(a), natsort_cache:get(b)
|
||||
if not ca then
|
||||
ca = natsort_conv(a)
|
||||
natsort_cache:set(a, ca)
|
||||
end
|
||||
if not cb then
|
||||
cb = natsort_conv(b)
|
||||
natsort_cache:set(b, cb)
|
||||
end
|
||||
|
||||
return ca < cb or ca == cb and a < b
|
||||
end
|
||||
--]]
|
||||
|
||||
--[[--
|
||||
Generates a natural sorting comparison function for table.sort.
|
||||
|
||||
@param cache Optional, hashmap used to cache the processed strings to speed up sorting
|
||||
@return The cmp function to feed to `table.sort`
|
||||
@return The cache used (same object as the passed one, if any; will be created if not)
|
||||
|
||||
@usage
|
||||
|
||||
-- t is an array of strings, we don't want to keep the cache around
|
||||
table.sort(t, sort.natsort_cmp())
|
||||
|
||||
-- t is an array of arrays, we want to sort the strings in the "text" field of the inner arrays, and we want to keep the cache around.
|
||||
local cmp, cache
|
||||
cmp, cache = sort.natsort_cmp(cache)
|
||||
table.sort(t, function(a, b) return cmp(a.text, b.text) end)
|
||||
]]
|
||||
function sort.natsort_cmp(cache)
|
||||
if not cache then
|
||||
cache = {}
|
||||
end
|
||||
|
||||
local function natsort_conv(s)
|
||||
local res, dot = "", ""
|
||||
for n, m, c in tostring(s):gmatch("(0*(%d*))(.?)") do
|
||||
if n == "" then
|
||||
dot, c = "", dot..c
|
||||
else
|
||||
res = res..(dot == "" and ("%03d%s"):format(#m, m)
|
||||
or "."..n)
|
||||
dot, c = c:match("(%.?)(.*)")
|
||||
end
|
||||
res = res..c:gsub("[%z\1-\127\192-\255]", "\0%0")
|
||||
end
|
||||
cache[s] = res
|
||||
return res
|
||||
end
|
||||
|
||||
local function natsort(a, b)
|
||||
local ca, cb = cache[a] or natsort_conv(a), cache[b] or natsort_conv(b)
|
||||
return ca < cb or ca == cb and a < b
|
||||
end
|
||||
return natsort, cache
|
||||
end
|
||||
|
||||
return sort
|
||||
@@ -12,6 +12,7 @@ local ffiUtil = require("ffi/util")
|
||||
local T = ffiUtil.template
|
||||
local _ = require("gettext")
|
||||
local Screen = Device.screen
|
||||
local sort = require("sort")
|
||||
local util = require("util")
|
||||
local getFileNameSuffix = util.getFileNameSuffix
|
||||
local getFriendlySize = util.getFriendlySize
|
||||
@@ -232,14 +233,18 @@ function FileChooser:getSortingFunction(collate, reverse_collate)
|
||||
return a.percent_finished < b.percent_finished
|
||||
end
|
||||
elseif collate == "natural" then
|
||||
-- adapted from: http://notebook.kulchenko.com/algorithms/alphanumeric-natural-sorting-for-humans-in-lua
|
||||
local function addLeadingZeroes(d)
|
||||
local dec, n = string.match(d, "(%.?)0*(.+)")
|
||||
return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n)
|
||||
end
|
||||
sorting = function(a, b)
|
||||
return tostring(a.name):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#b.name)
|
||||
< tostring(b.name):gsub("%.?%d+",addLeadingZeroes)..("%3d"):format(#a.name)
|
||||
local natsort
|
||||
-- Only keep the cache if we're an *instance* of FileChooser
|
||||
if self ~= FileChooser then
|
||||
natsort, self.natsort_cache = sort.natsort_cmp(self.natsort_cache)
|
||||
sorting = function(a, b)
|
||||
return natsort(a.name, b.name)
|
||||
end
|
||||
else
|
||||
natsort = sort.natsort_cmp()
|
||||
sorting = function(a, b)
|
||||
return natsort(a.name, b.name)
|
||||
end
|
||||
end
|
||||
else
|
||||
sorting = function(a, b)
|
||||
|
||||
@@ -25,6 +25,7 @@ end
|
||||
|
||||
local lfs = require("libs/libkoreader-lfs")
|
||||
local logger = require("logger")
|
||||
local sort = require("sort")
|
||||
local DataStorage = require("datastorage")
|
||||
|
||||
-- the directory KOReader is installed in (and runs from)
|
||||
@@ -55,16 +56,7 @@ local function runUserPatchTasks(dir, priority)
|
||||
return -- nothing to do
|
||||
end
|
||||
|
||||
local function addLeadingZeroes(d)
|
||||
local dec, n = string.match(d, "(%.?)0*(.+)")
|
||||
return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n)
|
||||
end
|
||||
local function sorting(a, b)
|
||||
return tostring(a):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#b)
|
||||
< tostring(b):gsub("%.?%d+", addLeadingZeroes)..("%3d"):format(#a)
|
||||
end
|
||||
|
||||
table.sort(patches, sorting)
|
||||
table.sort(patches, sort.natsort_cmp())
|
||||
|
||||
for i, entry in ipairs(patches) do
|
||||
local fullpath = dir .. "/" .. entry
|
||||
|
||||
@@ -17,8 +17,9 @@ local WidgetContainer = require("ui/widget/container/widgetcontainer")
|
||||
local lfs = require("libs/libkoreader-lfs")
|
||||
local logger = require("logger")
|
||||
local rapidjson = require("rapidjson")
|
||||
local util = require("util")
|
||||
local sort = require("sort")
|
||||
local time = require("ui/time")
|
||||
local util = require("util")
|
||||
local _ = require("gettext")
|
||||
local T = require("ffi/util").template
|
||||
|
||||
@@ -159,6 +160,7 @@ end
|
||||
local CalibreSearch = WidgetContainer:extend{
|
||||
books = {},
|
||||
libraries = {},
|
||||
natsort_cache = {},
|
||||
last_scan = {},
|
||||
search_options = {
|
||||
"cache_metadata",
|
||||
@@ -271,7 +273,7 @@ function CalibreSearch:bookCatalog(t, option)
|
||||
entry.info = getBookInfo(book)
|
||||
entry.path = book.rootpath .. "/" .. book.lpath
|
||||
if series and book.series_index then
|
||||
local major, minor = string.format("%05.2f", book.series_index):match("([^.]+).([^.]+)")
|
||||
local major, minor = string.format("%05.2f", book.series_index):match("([^.]+)%.([^.]+)")
|
||||
if minor ~= "00" then
|
||||
subseries = true
|
||||
end
|
||||
@@ -448,7 +450,8 @@ function CalibreSearch:switchResults(t, title, is_child, page)
|
||||
title = _("Search results")
|
||||
end
|
||||
|
||||
table.sort(t, function(v1,v2) return v1.text < v2.text end)
|
||||
local natsort = sort.natsort_cmp(self.natsort_cache)
|
||||
table.sort(t, function(a, b) return natsort(a.text, b.text) end)
|
||||
|
||||
if is_child then
|
||||
local path_entry = {}
|
||||
@@ -549,6 +552,7 @@ end
|
||||
function CalibreSearch:invalidateCache()
|
||||
self.cache_books:delete()
|
||||
self.books = {}
|
||||
self.natsort_cache = {}
|
||||
end
|
||||
|
||||
-- get metadata from cache or calibre files
|
||||
|
||||
@@ -683,7 +683,7 @@ function CoverMenu:onCloseWidget()
|
||||
end)
|
||||
nb_drawings_since_last_collectgarbage = 0
|
||||
|
||||
-- Call original Menu:onCloseWidget (no subclass seems to override it)
|
||||
-- Call the object's original onCloseWidget (i.e., Menu's, as none our our expected subclasses currently implement it)
|
||||
Menu.onCloseWidget(self)
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user