mirror of
https://github.com/koreader/koreader.git
synced 2025-12-13 20:36:53 +01:00
Binary catalogs are more space efficient (total translations disk usage: 14MB instead of 35MB), and easier and faster (roughly 10 times) to load: no parsing, and less processing (no need to unescape strings). Not much difference on the Android APK size (a ~600KB reduction), but other TAR.GZ / ZIP distributions see a reduction of ~3.5MB.
508 lines
17 KiB
Lua
508 lines
17 KiB
Lua
--[[--
|
|
A pure Lua implementation of a gettext subset.
|
|
|
|
Example:
|
|
local _ = require("gettext") -- @{gettext.gettext|gettext}()
|
|
local C_ = _.pgettext -- @{pgettext}()
|
|
local N_ = _.ngettext -- @{ngettext}()
|
|
local NC_ = _.npgettext -- @{npgettext}()
|
|
local T = require("ffi/util").template -- @{ffi.util.template}()
|
|
|
|
-- The most common use case with regular @{gettext.gettext|gettext}().
|
|
local simple_string = _("item")
|
|
|
|
-- A more complex example. The correct plural form will be automatically
|
|
-- selected by @{ngettext}() based on the number.
|
|
local numbered_string = T(N_("1 item", "%1 items", num_items), num_items)
|
|
|
|
It's required to pass along the number twice, because @{ngettext}() doesn't do anything with placeholders.
|
|
See @{ffi.util.template}() for more information about the template function.
|
|
--]]
|
|
|
|
local isAndroid, android = pcall(require, "android")
|
|
local logger = require("logger")
|
|
local buffer = require("string.buffer")
|
|
local ffi = require("ffi")
|
|
local C = ffi.C
|
|
|
|
require "table.new"
|
|
require "ffi/posix_h"
|
|
|
|
local GetText = {
|
|
context = {},
|
|
translation = {},
|
|
current_lang = "C",
|
|
dirname = "l10n",
|
|
textdomain = "koreader",
|
|
plural_default = "n != 1",
|
|
}
|
|
|
|
local GetText_mt = {
|
|
__index = {}
|
|
}
|
|
|
|
-- wrapUntranslated() will be overridden by bidi.lua when UI language is RTL,
|
|
-- to wrap untranslated english strings as LTR-isolated segments.
|
|
-- It should do nothing when the UI language is LTR.
|
|
GetText.wrapUntranslated_nowrap = function(text) return text end
|
|
GetText.wrapUntranslated = GetText.wrapUntranslated_nowrap
|
|
-- Note: this won't be possible if we switch from our Lua GetText to
|
|
-- GetText through FFI (but hopefully, RTL languages will be fully
|
|
-- translated by then).
|
|
|
|
--[[--
|
|
Returns a translation.
|
|
|
|
@function gettext
|
|
|
|
@string msgid
|
|
|
|
@treturn string translation
|
|
|
|
@usage
|
|
local _ = require("gettext")
|
|
local translation = _("A meaningful message.")
|
|
--]]
|
|
function GetText_mt.__call(gettext, msgid)
|
|
return gettext.translation[msgid] and gettext.translation[msgid][0] or gettext.translation[msgid] or gettext.wrapUntranslated(msgid)
|
|
end
|
|
|
|
--- Converts C logical operators to Lua.
|
|
local function logicalCtoLua(logical_str)
|
|
logical_str = logical_str:gsub("&&", "and")
|
|
logical_str = logical_str:gsub("!=", "~=")
|
|
logical_str = logical_str:gsub("||", "or")
|
|
return logical_str
|
|
end
|
|
|
|
--- Default getPlural function.
|
|
local function getDefaultPlural(n)
|
|
if n ~= 1 then
|
|
return 1
|
|
else
|
|
return 0
|
|
end
|
|
end
|
|
|
|
--- Generates a proper Lua function out of logical gettext math tests.
|
|
local function getPluralFunc(pl_tests, nplurals, plural_default)
|
|
-- the return function() stuff is a bit of loadstring trickery
|
|
local plural_func_str = "return function(n) if "
|
|
|
|
if #pl_tests > 1 then
|
|
for i = 1, #pl_tests do
|
|
local pl_test = pl_tests[i]
|
|
pl_test = logicalCtoLua(pl_test)
|
|
|
|
if i > 1 and tonumber(pl_test) == nil then
|
|
pl_test = " elseif "..pl_test
|
|
end
|
|
if tonumber(pl_test) ~= nil then
|
|
-- no condition, just a number
|
|
pl_test = " else return "..pl_test
|
|
end
|
|
pl_test = pl_test:gsub("?", " then return")
|
|
|
|
-- append to plural function
|
|
plural_func_str = plural_func_str..pl_test
|
|
end
|
|
plural_func_str = plural_func_str.." end end"
|
|
else
|
|
local pl_test = pl_tests[1]
|
|
-- Ensure JIT compiled function if we're dealing with one of the many simpler languages.
|
|
-- After all, loadstring won't be.
|
|
-- Potential workaround: write to file and use require.
|
|
if pl_test == plural_default then
|
|
return getDefaultPlural
|
|
end
|
|
-- language with no plural forms
|
|
if tonumber(pl_test) ~= nil then
|
|
plural_func_str = "return function(n) return "..pl_test.." end"
|
|
else
|
|
pl_test = logicalCtoLua(pl_test)
|
|
plural_func_str = "return function(n) if "..pl_test.." then return 1 else return 0 end end"
|
|
end
|
|
end
|
|
logger.dbg("gettext: plural function", plural_func_str)
|
|
return loadstring(plural_func_str)()
|
|
end
|
|
|
|
local function addTranslation(msgctxt, msgid, msgstr, n)
|
|
assert(not msgctxt or msgctxt ~= "")
|
|
assert(msgid and msgid ~= "")
|
|
assert(msgstr)
|
|
if msgstr == "" then
|
|
return
|
|
end
|
|
if msgctxt then
|
|
if not GetText.context[msgctxt] then
|
|
GetText.context[msgctxt] = {}
|
|
end
|
|
if n then
|
|
if not GetText.context[msgctxt][msgid] then
|
|
GetText.context[msgctxt][msgid] = {}
|
|
end
|
|
GetText.context[msgctxt][msgid][n] = msgstr
|
|
else
|
|
GetText.context[msgctxt][msgid] = msgstr
|
|
end
|
|
else
|
|
if n then
|
|
if not GetText.translation[msgid] then
|
|
GetText.translation[msgid] = {}
|
|
end
|
|
GetText.translation[msgid][n] = msgstr
|
|
else
|
|
GetText.translation[msgid] = msgstr
|
|
end
|
|
end
|
|
end
|
|
|
|
function GetText_mt.__index.changeLang(new_lang)
|
|
GetText.context = {}
|
|
GetText.translation = {}
|
|
GetText.current_lang = "C"
|
|
|
|
-- the "C" locale disables localization altogether
|
|
-- can be various things such as `en_US` or `en_US:en`
|
|
if new_lang == "C" or new_lang == nil or new_lang == ""
|
|
or new_lang:match("^en_US") == "en_US" then return end
|
|
|
|
-- strip encoding suffix in locale like "zh_CN.utf8"
|
|
new_lang = new_lang:sub(1, new_lang:find(".%."))
|
|
|
|
local mo = GetText.dirname .. "/" .. new_lang .. "/" .. GetText.textdomain .. ".mo"
|
|
if not GetText.loadMO(mo) then
|
|
return false
|
|
end
|
|
|
|
GetText.current_lang = new_lang
|
|
return true
|
|
end
|
|
|
|
local function parse_headers(headers)
|
|
local plural_forms = headers:match("Plural%-Forms: (.*)")
|
|
local nplurals = plural_forms:match("nplurals=([0-9]+);") or 2
|
|
local plurals = plural_forms:match("plural=%((.*)%);")
|
|
|
|
-- Hardcoded workaround for Hebrew which has 4 plural forms.
|
|
if plurals == "n == 1) ? 0 : ((n == 2) ? 1 : ((n > 10 && n % 10 == 0) ? 2 : 3)" then
|
|
plurals = "n == 1 ? 0 : (n == 2) ? 1 : (n > 10 && n % 10 == 0) ? 2 : 3"
|
|
end
|
|
-- Hardcoded workaround for Latvian.
|
|
if plurals == "n % 10 == 0 || n % 100 >= 11 && n % 100 <= 19) ? 0 : ((n % 10 == 1 && n % 100 != 11) ? 1 : 2" then
|
|
plurals = "n % 10 == 0 || n % 100 >= 11 && n % 100 <= 19 ? 0 : (n % 10 == 1 && n % 100 != 11) ? 1 : 2"
|
|
end
|
|
-- Hardcoded workaround for Romanian which has 3 plural forms.
|
|
if plurals == "n == 1) ? 0 : ((n == 0 || n != 1 && n % 100 >= 1 && n % 100 <= 19) ? 1 : 2" then
|
|
plurals = "n == 1 ? 0 : (n == 0 || n != 1 && n % 100 >= 1 && n % 100 <= 19) ? 1 : 2"
|
|
end
|
|
|
|
if not plurals then
|
|
-- Some languages (e.g., Arabic) may not use parentheses.
|
|
-- However, the following more inclusive match is more likely
|
|
-- to accidentally include junk and seldom relevant.
|
|
-- We might also be dealing with a language without plurals.
|
|
-- That would look like `plural=0`.
|
|
plurals = plural_forms:match("plural=(.*);")
|
|
end
|
|
|
|
if plurals:find("[^n!=%%<>&:%(%)|?0-9 ]") then
|
|
-- we don't trust this input, go with default instead
|
|
plurals = GetText.plural_default
|
|
end
|
|
|
|
local pl_tests = {}
|
|
for pl_test in plurals:gmatch("[^:]+") do
|
|
table.insert(pl_tests, pl_test)
|
|
end
|
|
|
|
GetText.getPlural = getPluralFunc(pl_tests, nplurals, GetText.plural_default)
|
|
if not GetText.getPlural then
|
|
GetText.getPlural = getDefaultPlural
|
|
end
|
|
end
|
|
|
|
-- for MO file format, see
|
|
-- https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
|
|
|
|
ffi.cdef[[
|
|
struct __attribute__((packed)) mo_header {
|
|
uint32_t magic;
|
|
uint16_t revision_major;
|
|
uint16_t revision_minor;
|
|
uint32_t nb_strings;
|
|
uint32_t original_strings_table_offset;
|
|
uint32_t translated_strings_table_offset;
|
|
uint32_t hash_table_size;
|
|
uint32_t hash_table_offset;
|
|
};
|
|
|
|
struct __attribute__((packed)) mo_string_table {
|
|
uint32_t length;
|
|
uint32_t offset;
|
|
};
|
|
]]
|
|
local MO_MAGIC = 0x950412de
|
|
|
|
function GetText_mt.__index.loadMO(file)
|
|
local fd = C.open(file, C.O_RDONLY)
|
|
if fd < 0 then
|
|
logger.dbg(string.format("cannot open translation file: %s", file))
|
|
return false
|
|
end
|
|
local strerror = function()
|
|
return ffi.string(C.strerror(ffi.errno()))
|
|
end
|
|
local seek_and_read = function(off, ptr, len)
|
|
local ret
|
|
ret = C.lseek(fd, off, C.SEEK_SET)
|
|
if ret ~= off then
|
|
logger.err(string.format("loading translation file failed: %s [%s]", file, ret < 0 and strerror() or "lseek"))
|
|
return false
|
|
end
|
|
ret = C.read(fd, ptr, len)
|
|
if ret ~= len then
|
|
logger.err(string.format("loading translation file failed: %s [%s]", file), ret < 0 and strerror() or "short read")
|
|
return false
|
|
end
|
|
return true
|
|
end
|
|
local mo_hdr = ffi.new("struct mo_header")
|
|
if not seek_and_read(0, mo_hdr, ffi.sizeof(mo_hdr)) then
|
|
C.close(fd)
|
|
return false
|
|
end
|
|
if mo_hdr.magic ~= MO_MAGIC then
|
|
logger.err(string.format("bad translation file: %s [magic]", file))
|
|
C.close(fd)
|
|
return false
|
|
end
|
|
if mo_hdr.revision_major ~= 0 then
|
|
logger.err(string.format("bad translation file: %s [revision]", file))
|
|
C.close(fd)
|
|
return false
|
|
end
|
|
local table_buf = buffer:new()
|
|
local table_size = mo_hdr.nb_strings * ffi.sizeof("struct mo_string_table")
|
|
local table_ptr = table_buf:reserve(table_size)
|
|
local read_strings_count
|
|
local read_strings = function(check_for_context)
|
|
local m_str_tbl = ffi.cast("struct mo_string_table *", table_ptr)
|
|
local str_buf = buffer:new()
|
|
read_strings_count = -1
|
|
return function()
|
|
read_strings_count = read_strings_count + 1
|
|
if read_strings_count >= mo_hdr.nb_strings then
|
|
return
|
|
end
|
|
local str_len = m_str_tbl[read_strings_count].length
|
|
local str_off = m_str_tbl[read_strings_count].offset
|
|
local str_ptr = str_buf:reserve(str_len)
|
|
if not seek_and_read(str_off, str_ptr, str_len) then
|
|
return
|
|
end
|
|
local ctx
|
|
local pos = 0
|
|
if check_for_context then
|
|
-- 4: ␄ (End of Transmission).
|
|
local p = C.memchr(str_ptr, 4, str_len)
|
|
if p ~= nil then
|
|
local l = ffi.cast("ssize_t", p) - ffi.cast("ssize_t", str_ptr)
|
|
ctx = ffi.string(str_ptr, l)
|
|
pos = l + 1
|
|
end
|
|
end
|
|
local l = C.strnlen(str_ptr + pos, str_len - pos)
|
|
if l + pos < str_len then
|
|
-- Plurals!
|
|
local strings = {ffi.string(str_ptr + pos, l)}
|
|
pos = pos + l + 1
|
|
while pos < str_len do
|
|
l = C.strnlen(str_ptr + pos, str_len - pos)
|
|
table.insert(strings, ffi.string(str_ptr + pos, l))
|
|
pos = pos + l + 1
|
|
end
|
|
return read_strings_count + 1, strings, ctx
|
|
else
|
|
return read_strings_count + 1, ffi.string(str_ptr + pos, str_len - pos), ctx
|
|
end
|
|
end
|
|
end
|
|
-- Read original strings.
|
|
if not seek_and_read(mo_hdr.original_strings_table_offset, table_ptr, table_size) then
|
|
C.close(fd)
|
|
return false
|
|
end
|
|
local original_context = {}
|
|
local original_strings = table.new(mo_hdr.nb_strings, 0)
|
|
for n, s, ctx in read_strings(true) do
|
|
if ctx then
|
|
original_context[n] = ctx
|
|
end
|
|
original_strings[n] = s
|
|
end
|
|
if read_strings_count ~= mo_hdr.nb_strings then
|
|
C.close(fd)
|
|
return false
|
|
end
|
|
-- Read translated strings.
|
|
if not seek_and_read(mo_hdr.translated_strings_table_offset, table_ptr, table_size) then
|
|
C.close(fd)
|
|
return false
|
|
end
|
|
for n, ts in read_strings() do
|
|
local ctx = original_context[n]
|
|
local os = original_strings[n]
|
|
if type(os) == "table" then
|
|
if type(ts) == "table" then
|
|
for pn, pts in ipairs(ts) do
|
|
addTranslation(ctx, os[1], pts, pn - 1)
|
|
end
|
|
else
|
|
addTranslation(ctx, os[1], ts, 0)
|
|
end
|
|
elseif type(ts) == "table" then
|
|
logger.warn(string.format("bad translation file: %s [singular / plurals mismatch]", file))
|
|
else
|
|
if n == 1 and #os == 0 then
|
|
parse_headers(ts)
|
|
else
|
|
addTranslation(ctx, os, ts)
|
|
end
|
|
end
|
|
end
|
|
local ok = read_strings_count == mo_hdr.nb_strings
|
|
C.close(fd)
|
|
return ok
|
|
end
|
|
|
|
GetText_mt.__index.getPlural = getDefaultPlural
|
|
|
|
--[[--
|
|
Returns a plural form.
|
|
|
|
Many languages have more forms than just singular and plural. This function
|
|
abstracts the complexity away. The translation can contain as many
|
|
pluralizations as it requires.
|
|
|
|
See [gettext plural forms](https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html)
|
|
and [translating plural forms](https://www.gnu.org/software/gettext/manual/html_node/Translating-plural-forms.html)
|
|
for more information.
|
|
|
|
It's required to pass along the number twice, because @{ngettext}() doesn't do anything with placeholders.
|
|
See @{ffi.util.template}() for more information about the template function.
|
|
|
|
@function ngettext
|
|
|
|
@string msgid
|
|
@string msgid_plural
|
|
@int n
|
|
|
|
@treturn string translation
|
|
|
|
@usage
|
|
local _ = require("gettext")
|
|
local N_ = _.ngettext
|
|
local T = require("ffi/util").template
|
|
|
|
local items_string = T(N_("1 item", "%1 items", num_items), num_items)
|
|
--]]
|
|
function GetText_mt.__index.ngettext(msgid, msgid_plural, n)
|
|
local plural = GetText.getPlural(n)
|
|
|
|
if plural == 0 then
|
|
return GetText.translation[msgid] and GetText.translation[msgid][plural] or GetText.wrapUntranslated(msgid)
|
|
else
|
|
return GetText.translation[msgid] and GetText.translation[msgid][plural] or GetText.wrapUntranslated(msgid_plural)
|
|
end
|
|
end
|
|
|
|
--[[--
|
|
Returns a context-disambiguated plural form.
|
|
|
|
This is the logical combination between @{ngettext}() and @{pgettext}().
|
|
Please refer there for more information.
|
|
|
|
@function npgettext
|
|
|
|
@string msgctxt
|
|
@string msgid
|
|
@string msgid_plural
|
|
@int n
|
|
|
|
@treturn string translation
|
|
|
|
@usage
|
|
local _ = require("gettext")
|
|
local NC_ = _.npgettext
|
|
local T = require("ffi/util").template
|
|
|
|
local statistics_items_string = T(NC_("Statistics", "1 item", "%1 items", num_items), num_items)
|
|
local books_items_string = T(NC_("Books", "1 item", "%1 items", num_items), num_items)
|
|
--]]
|
|
function GetText_mt.__index.npgettext(msgctxt, msgid, msgid_plural, n)
|
|
local plural = GetText.getPlural(n)
|
|
|
|
if plural == 0 then
|
|
return GetText.context[msgctxt] and GetText.context[msgctxt][msgid] and GetText.context[msgctxt][msgid][plural] or GetText.wrapUntranslated(msgid)
|
|
else
|
|
return GetText.context[msgctxt] and GetText.context[msgctxt][msgid] and GetText.context[msgctxt][msgid][plural] or GetText.wrapUntranslated(msgid_plural)
|
|
end
|
|
end
|
|
|
|
--[[--
|
|
Returns a context-disambiguated translation.
|
|
|
|
The same string might occur multiple times, but require a different translation based on context.
|
|
An example within KOReader is **Pages** meaning *page styles* (within the context of style tweaks)
|
|
and **Pages** meaning *number of pages*.
|
|
|
|
We generally don't apply context unless a conflict is known. This is only likely to occur with
|
|
short strings, of which of course there are many.
|
|
|
|
See [gettext contexts](https://www.gnu.org/software/gettext/manual/html_node/Contexts.html) for more information.
|
|
|
|
@function pgettext
|
|
|
|
@string msgctxt
|
|
@string msgid
|
|
|
|
@treturn string translation
|
|
|
|
@usage
|
|
local _ = require("gettext")
|
|
local C_ = _.pgettext
|
|
|
|
local copy_file = C_("File", "Copy")
|
|
local copy_text = C_("Text", "Copy")
|
|
--]]
|
|
function GetText_mt.__index.pgettext(msgctxt, msgid)
|
|
return GetText.context[msgctxt] and GetText.context[msgctxt][msgid] or GetText.wrapUntranslated(msgid)
|
|
end
|
|
|
|
setmetatable(GetText, GetText_mt)
|
|
|
|
if os.getenv("LANGUAGE") then
|
|
GetText.changeLang(os.getenv("LANGUAGE"))
|
|
elseif os.getenv("LC_ALL") then
|
|
GetText.changeLang(os.getenv("LC_ALL"))
|
|
elseif os.getenv("LC_MESSAGES") then
|
|
GetText.changeLang(os.getenv("LC_MESSAGES"))
|
|
elseif os.getenv("LANG") then
|
|
GetText.changeLang(os.getenv("LANG"))
|
|
end
|
|
|
|
if isAndroid then
|
|
local buf = ffi.new("char[?]", 16)
|
|
android.lib.AConfiguration_getLanguage(android.app.config, buf)
|
|
local lang = ffi.string(buf)
|
|
android.lib.AConfiguration_getCountry(android.app.config, buf)
|
|
local country = ffi.string(buf)
|
|
if lang and country then
|
|
GetText.changeLang(lang.."_"..country)
|
|
end
|
|
end
|
|
|
|
return GetText
|