mirror of
https://github.com/koreader/koreader.git
synced 2025-12-13 20:36:53 +01:00
Book information: refactored and additional features
- Factored out duplicate code from filemanager.lua and filemanagerhistory.lua to new filemanagerbookinfo.lua (and other common code to filemanagerutil.lua). - Uses sidecar files' new doc_props and doc_pages settings, or fallback to old 'stats' settings, or to opening document. - Shows filename, filetype and directory. - Shows description (Hold to see whole truncated text), keywords, and cover image (tap to extract image from document and display it if available). - Book information now available from reader menu, to display info about the currently opened book. - Convert possibly HTML description to plain text via added util.htmlToPlainTextIfHtml() (for simple HTML conversion).
This commit is contained in:
@@ -6,6 +6,7 @@ local DocSettings = require("docsettings")
|
||||
local DocumentRegistry = require("document/documentregistry")
|
||||
local Event = require("ui/event")
|
||||
local FileChooser = require("ui/widget/filechooser")
|
||||
local FileManagerBookInfo = require("apps/filemanager/filemanagerbookinfo")
|
||||
local FileManagerConverter = require("apps/filemanager/filemanagerconverter")
|
||||
local FileManagerHistory = require("apps/filemanager/filemanagerhistory")
|
||||
local FileManagerMenu = require("apps/filemanager/filemanagermenu")
|
||||
@@ -15,7 +16,6 @@ local Geom = require("ui/geometry")
|
||||
local InfoMessage = require("ui/widget/infomessage")
|
||||
local InputContainer = require("ui/widget/container/inputcontainer")
|
||||
local InputDialog = require("ui/widget/inputdialog")
|
||||
local KeyValuePage = require("ui/widget/keyvaluepage")
|
||||
local PluginLoader = require("pluginloader")
|
||||
local ReaderDictionary = require("apps/reader/modules/readerdictionary")
|
||||
local ReaderUI = require("apps/reader/readerui")
|
||||
@@ -25,37 +25,13 @@ local TextWidget = require("ui/widget/textwidget")
|
||||
local VerticalGroup = require("ui/widget/verticalgroup")
|
||||
local VerticalSpan = require("ui/widget/verticalspan")
|
||||
local UIManager = require("ui/uimanager")
|
||||
local filemanagerutil = require("apps/filemanager/filemanagerutil")
|
||||
local lfs = require("libs/libkoreader-lfs")
|
||||
local logger = require("logger")
|
||||
local util = require("ffi/util")
|
||||
local _ = require("gettext")
|
||||
local Screen = Device.screen
|
||||
|
||||
local function getDefaultDir()
|
||||
if Device:isKindle() then
|
||||
return "/mnt/us/documents"
|
||||
elseif Device:isKobo() then
|
||||
return "/mnt/onboard"
|
||||
elseif Device:isAndroid() then
|
||||
return "/sdcard"
|
||||
else
|
||||
return "."
|
||||
end
|
||||
end
|
||||
|
||||
local function abbreviate(path)
|
||||
local home_dir_name = G_reader_settings:readSetting("home_dir_display_name")
|
||||
if home_dir_name ~= nil then
|
||||
local home_dir = G_reader_settings:readSetting("home_dir") or getDefaultDir()
|
||||
local len = home_dir:len()
|
||||
local start = path:sub(1, len)
|
||||
if start == home_dir then
|
||||
return home_dir_name .. path:sub(len+1)
|
||||
end
|
||||
end
|
||||
return path
|
||||
end
|
||||
|
||||
local function restoreScreenMode()
|
||||
local screen_mode = G_reader_settings:readSetting("fm_screen_mode")
|
||||
if Screen:getScreenMode() ~= screen_mode then
|
||||
@@ -80,7 +56,7 @@ function FileManager:init()
|
||||
|
||||
self.path_text = TextWidget:new{
|
||||
face = Font:getFace("xx_smallinfofont"),
|
||||
text = abbreviate(self.root_path),
|
||||
text = filemanagerutil.abbreviate(self.root_path),
|
||||
}
|
||||
|
||||
self.banner = FrameContainer:new{
|
||||
@@ -122,7 +98,7 @@ function FileManager:init()
|
||||
self.file_chooser = file_chooser
|
||||
|
||||
function file_chooser:onPathChanged(path) -- luacheck: ignore
|
||||
FileManager.instance.path_text:setText(abbreviate(path))
|
||||
FileManager.instance.path_text:setText(filemanagerutil.abbreviate(path))
|
||||
UIManager:setDirty(FileManager.instance, function()
|
||||
return "ui", FileManager.instance.banner.dimen
|
||||
end)
|
||||
@@ -259,20 +235,9 @@ function FileManager:init()
|
||||
},
|
||||
{
|
||||
text = _("Book information"),
|
||||
enabled = lfs.attributes(file, "mode") == "file"
|
||||
and not DocumentRegistry:getProvider(file).is_pic and true or false,
|
||||
enabled = FileManagerBookInfo:isSupported(file),
|
||||
callback = function()
|
||||
local book_info_metadata = FileManager:bookInformation(file)
|
||||
if book_info_metadata then
|
||||
UIManager:show(KeyValuePage:new{
|
||||
title = _("Book information"),
|
||||
kv_pairs = book_info_metadata,
|
||||
})
|
||||
else
|
||||
UIManager:show(InfoMessage:new{
|
||||
text = _("Cannot fetch information for a selected book"),
|
||||
})
|
||||
end
|
||||
FileManagerBookInfo:show(file)
|
||||
UIManager:close(self.file_dialog)
|
||||
end,
|
||||
},
|
||||
@@ -346,23 +311,6 @@ function FileManager:init()
|
||||
self:handleEvent(Event:new("SetDimensions", self.dimen))
|
||||
end
|
||||
|
||||
function FileManager:bookInformation(file)
|
||||
local file_mode = lfs.attributes(file, "mode")
|
||||
if file_mode ~= "file" then return false end
|
||||
local book_stats = DocSettings:open(file):readSetting('stats')
|
||||
if book_stats ~= nil then
|
||||
return FileManagerHistory:buildBookInformationTable(book_stats)
|
||||
end
|
||||
local document = DocumentRegistry:openDocument(file)
|
||||
if document.loadDocument then
|
||||
document:loadDocument()
|
||||
document:render()
|
||||
end
|
||||
book_stats = document:getProps()
|
||||
book_stats.pages = document:getPageCount()
|
||||
return FileManagerHistory:buildBookInformationTable(book_stats)
|
||||
end
|
||||
|
||||
function FileManager:reinit(path)
|
||||
self.dimen = Screen:getSize()
|
||||
-- backup the root path and path items
|
||||
@@ -536,7 +484,7 @@ function FileManager:getSortingMenuTable()
|
||||
end
|
||||
|
||||
function FileManager:showFiles(path)
|
||||
path = path or G_reader_settings:readSetting("lastdir") or getDefaultDir()
|
||||
path = path or G_reader_settings:readSetting("lastdir") or filemanagerutil.getDefaultDir()
|
||||
G_reader_settings:saveSetting("lastdir", path)
|
||||
restoreScreenMode()
|
||||
local file_manager = FileManager:new{
|
||||
|
||||
183
frontend/apps/filemanager/filemanagerbookinfo.lua
Normal file
183
frontend/apps/filemanager/filemanagerbookinfo.lua
Normal file
@@ -0,0 +1,183 @@
|
||||
--[[--
|
||||
This module provides a way to display book information (filename and book metadata)
|
||||
]]
|
||||
|
||||
local DocSettings = require("docsettings")
|
||||
local DocumentRegistry = require("document/documentregistry")
|
||||
local ImageViewer = require("ui/widget/imageviewer")
|
||||
local InfoMessage = require("ui/widget/infomessage")
|
||||
local InputContainer = require("ui/widget/container/inputcontainer")
|
||||
local KeyValuePage = require("ui/widget/keyvaluepage")
|
||||
local UIManager = require("ui/uimanager")
|
||||
local filemanagerutil = require("apps/filemanager/filemanagerutil")
|
||||
local lfs = require("libs/libkoreader-lfs")
|
||||
local util = require("util")
|
||||
local _ = require("gettext")
|
||||
|
||||
local BookInfo = InputContainer:extend{
|
||||
bookinfo_menu_title = _("Book information"),
|
||||
}
|
||||
|
||||
function BookInfo:init()
|
||||
if self.ui then -- only for Reader menu
|
||||
self.ui.menu:registerToMainMenu(self)
|
||||
end
|
||||
end
|
||||
|
||||
function BookInfo:addToMainMenu(menu_items)
|
||||
menu_items.book_info = {
|
||||
text = self.bookinfo_menu_title,
|
||||
callback = function()
|
||||
-- Get them directly from ReaderUI's doc_settings
|
||||
local doc_props = self.ui.doc_settings:readSetting("doc_props")
|
||||
-- Make a copy, so we don't add "pages" to the original doc_props
|
||||
-- that will be saved at some point by ReaderUI.
|
||||
local book_props = {}
|
||||
for k, v in pairs(doc_props) do
|
||||
book_props[k] = v
|
||||
end
|
||||
book_props.pages = self.ui.doc_settings:readSetting("doc_pages")
|
||||
self:show(self.document.file, book_props)
|
||||
end,
|
||||
}
|
||||
end
|
||||
|
||||
function BookInfo:isSupported(file)
|
||||
return lfs.attributes(file, "mode") == "file"
|
||||
end
|
||||
|
||||
function BookInfo:show(file, book_props)
|
||||
local kv_pairs = {}
|
||||
|
||||
local directory, filename = util.splitFilePathName(file)
|
||||
local filename_without_suffix, filetype = util.splitFileNameSuffix(filename) -- luacheck: no unused
|
||||
table.insert(kv_pairs, { _("Filename:"), filename })
|
||||
table.insert(kv_pairs, { _("Format:"), filetype:upper() })
|
||||
table.insert(kv_pairs, { _("Directory:"), filemanagerutil.abbreviate(directory) })
|
||||
table.insert(kv_pairs, "----")
|
||||
|
||||
-- book_props may be provided if caller already has them available
|
||||
-- but it may lack 'pages', that we may get from sidecar file
|
||||
if not book_props or not book_props.pages then
|
||||
local doc_settings = DocSettings:open(file)
|
||||
if doc_settings then
|
||||
if not book_props then
|
||||
-- Files opened after 20170701 have a 'doc_props' setting with
|
||||
-- complete metadata and 'doc_pages' with accurate nb of pages
|
||||
book_props = doc_settings:readSetting('doc_props')
|
||||
end
|
||||
if not book_props then
|
||||
-- File last opened before 20170701 may have a 'stats' setting
|
||||
-- with partial metadata, or empty metadata if statistics plugin
|
||||
-- was not enabled when book was read (we can guess that from
|
||||
-- the fact that stats.page = 0)
|
||||
local stats = doc_settings:readSetting('stats')
|
||||
if stats and stats.pages ~= 0 then
|
||||
-- Let's use them as is (which was what was done before), even if
|
||||
-- incomplete, to avoid expensive book opening
|
||||
book_props = stats
|
||||
end
|
||||
end
|
||||
-- Files opened after 20170701 have an accurate 'doc_pages' setting
|
||||
local doc_pages = doc_settings:readSetting('doc_pages')
|
||||
if doc_pages and book_props then
|
||||
book_props.pages = doc_pages
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- If still no book_props (book never opened or empty 'stats'), open the
|
||||
-- document to get them
|
||||
if not book_props then
|
||||
local pages
|
||||
local document = DocumentRegistry:openDocument(file)
|
||||
if document.loadDocument then -- needed for crengine
|
||||
document:loadDocument()
|
||||
-- document:render()
|
||||
-- It would be needed to get nb of pages, but the nb obtained
|
||||
-- by simply calling here document:getPageCount() is wrong,
|
||||
-- often 2 to 3 times the nb of pages we see when opening
|
||||
-- the document (may be some other cre settings should be applied
|
||||
-- before calling render() ?)
|
||||
else
|
||||
-- for all others than crengine, we seem to get an accurate nb of pages
|
||||
pages = document:getPageCount()
|
||||
end
|
||||
-- via pcall because picdocument:getProps() always fails (we could
|
||||
-- check document.is_pic, but this way, we'll catch any other error)
|
||||
local ok, props = pcall(document.getProps, document)
|
||||
if ok then
|
||||
book_props = props
|
||||
else
|
||||
book_props = {}
|
||||
end
|
||||
book_props.pages = pages
|
||||
DocumentRegistry:closeDocument(file)
|
||||
end
|
||||
|
||||
local title = book_props.title
|
||||
if title == "" or title == nil then title = _("N/A") end
|
||||
table.insert(kv_pairs, { _("Title:"), title })
|
||||
|
||||
local authors = book_props.authors
|
||||
if authors == "" or authors == nil then authors = _("N/A") end
|
||||
table.insert(kv_pairs, { _("Authors:"), authors })
|
||||
|
||||
local series = book_props.series
|
||||
if series == "" or series == nil then series = _("N/A") end
|
||||
table.insert(kv_pairs, { _("Series:"), series })
|
||||
|
||||
local pages = book_props.pages
|
||||
if pages == "" or pages == nil then pages = _("N/A") end
|
||||
table.insert(kv_pairs, { _("Pages:"), pages })
|
||||
|
||||
local language = book_props.language
|
||||
if language == "" or language == nil then language = _("N/A") end
|
||||
table.insert(kv_pairs, { _("Language:"), language })
|
||||
|
||||
local keywords = book_props.keywords
|
||||
if keywords == "" or keywords == nil then keywords = _("N/A") end
|
||||
table.insert(kv_pairs, { _("Keywords:"), keywords })
|
||||
|
||||
local description = book_props.description
|
||||
if description == "" or description == nil then
|
||||
description = _("N/A")
|
||||
else
|
||||
-- Description may (often in EPUB, but not always) or may not (rarely
|
||||
-- in PDF) be HTML.
|
||||
description = util.htmlToPlainTextIfHtml(book_props.description)
|
||||
end
|
||||
table.insert(kv_pairs, { _("Description:"), description })
|
||||
|
||||
-- Cover image
|
||||
local viewCoverImage = function()
|
||||
local widget
|
||||
local document = DocumentRegistry:openDocument(file)
|
||||
if document then
|
||||
local cover_bb = document:getCoverPageImage()
|
||||
if cover_bb then
|
||||
widget = ImageViewer:new{
|
||||
image = cover_bb,
|
||||
with_title_bar = false,
|
||||
fullscreen = true,
|
||||
}
|
||||
end
|
||||
DocumentRegistry:closeDocument(file)
|
||||
end
|
||||
if not widget then
|
||||
widget = InfoMessage:new{
|
||||
text = _("No cover image available"),
|
||||
}
|
||||
end
|
||||
UIManager:show(widget)
|
||||
end
|
||||
table.insert(kv_pairs, { _("Cover image:"), _("Tap to display"), callback=viewCoverImage })
|
||||
|
||||
local widget = KeyValuePage:new{
|
||||
title = _("Book information"),
|
||||
kv_pairs = kv_pairs,
|
||||
}
|
||||
UIManager:show(widget)
|
||||
end
|
||||
|
||||
return BookInfo
|
||||
@@ -1,10 +1,8 @@
|
||||
local ButtonDialog = require("ui/widget/buttondialog")
|
||||
local CenterContainer = require("ui/widget/container/centercontainer")
|
||||
local DocSettings = require("docsettings")
|
||||
local FileManagerBookInfo = require("apps/filemanager/filemanagerbookinfo")
|
||||
local Font = require("ui/font")
|
||||
local InfoMessage = require("ui/widget/infomessage")
|
||||
local InputContainer = require("ui/widget/container/inputcontainer")
|
||||
local KeyValuePage = require("ui/widget/keyvaluepage")
|
||||
local Menu = require("ui/widget/menu")
|
||||
local UIManager = require("ui/uimanager")
|
||||
local RenderText = require("ui/rendertext")
|
||||
@@ -39,48 +37,6 @@ function FileManagerHistory:onSetDimensions(dimen)
|
||||
self.dimen = dimen
|
||||
end
|
||||
|
||||
function FileManagerHistory:buildBookInformationTable(book_props)
|
||||
if book_props == nil then
|
||||
return false
|
||||
end
|
||||
|
||||
if book_props.authors == "" or book_props.authors == nil then
|
||||
book_props.authors = _("N/A")
|
||||
end
|
||||
|
||||
if book_props.title == "" or book_props.title == nil then
|
||||
book_props.title = _("N/A")
|
||||
end
|
||||
|
||||
if book_props.series == "" or book_props.series == nil then
|
||||
book_props.series = _("N/A")
|
||||
end
|
||||
|
||||
if book_props.pages == "" or book_props.pages == nil then
|
||||
book_props.pages = _("N/A")
|
||||
end
|
||||
|
||||
if book_props.language == "" or book_props.language == nil then
|
||||
book_props.language = _("N/A")
|
||||
end
|
||||
|
||||
return {
|
||||
{ _("Title:"), book_props.title },
|
||||
{ _("Authors:"), book_props.authors },
|
||||
{ _("Series:"), book_props.series },
|
||||
{ _("Pages:"), book_props.pages },
|
||||
{ _("Language:"), string.upper(book_props.language) },
|
||||
}
|
||||
end
|
||||
|
||||
function FileManagerHistory:bookInformation(file)
|
||||
local file_mode = lfs.attributes(file, "mode")
|
||||
if file_mode ~= "file" then return false end
|
||||
local book_stats = DocSettings:open(file):readSetting('stats')
|
||||
if book_stats == nil then return false end
|
||||
return self:buildBookInformationTable(book_stats)
|
||||
end
|
||||
|
||||
function FileManagerHistory:onMenuHold(item)
|
||||
local font_size = Font:getFace("tfont")
|
||||
local text_remove_hist = _("Remove \"%1\" from history")
|
||||
@@ -113,18 +69,9 @@ function FileManagerHistory:onMenuHold(item)
|
||||
{
|
||||
{
|
||||
text = _("Book information"),
|
||||
enabled = FileManagerBookInfo:isSupported(item.file),
|
||||
callback = function()
|
||||
local book_info_metadata = FileManagerHistory:bookInformation(item.file)
|
||||
if book_info_metadata then
|
||||
UIManager:show(KeyValuePage:new{
|
||||
title = _("Book information"),
|
||||
kv_pairs = book_info_metadata,
|
||||
})
|
||||
else
|
||||
UIManager:show(InfoMessage:new{
|
||||
text = _("Cannot fetch information for a selected book"),
|
||||
})
|
||||
end
|
||||
FileManagerBookInfo:show(item.file)
|
||||
UIManager:close(self.histfile_dialog)
|
||||
end,
|
||||
},
|
||||
|
||||
34
frontend/apps/filemanager/filemanagerutil.lua
Normal file
34
frontend/apps/filemanager/filemanagerutil.lua
Normal file
@@ -0,0 +1,34 @@
|
||||
--[[--
|
||||
This module contains miscellaneous helper functions for FileManager
|
||||
]]
|
||||
|
||||
local Device = require("device")
|
||||
|
||||
local filemanagerutil = {}
|
||||
|
||||
function filemanagerutil.getDefaultDir()
|
||||
if Device:isKindle() then
|
||||
return "/mnt/us/documents"
|
||||
elseif Device:isKobo() then
|
||||
return "/mnt/onboard"
|
||||
elseif Device:isAndroid() then
|
||||
return "/sdcard"
|
||||
else
|
||||
return "."
|
||||
end
|
||||
end
|
||||
|
||||
function filemanagerutil.abbreviate(path)
|
||||
local home_dir_name = G_reader_settings:readSetting("home_dir_display_name")
|
||||
if home_dir_name ~= nil then
|
||||
local home_dir = G_reader_settings:readSetting("home_dir") or filemanagerutil.getDefaultDir()
|
||||
local len = home_dir:len()
|
||||
local start = path:sub(1, len)
|
||||
if start == home_dir then
|
||||
return home_dir_name .. path:sub(len+1)
|
||||
end
|
||||
end
|
||||
return path
|
||||
end
|
||||
|
||||
return filemanagerutil
|
||||
@@ -10,6 +10,7 @@ local Device = require("device")
|
||||
local DocSettings = require("docsettings")
|
||||
local DocumentRegistry = require("document/documentregistry")
|
||||
local Event = require("ui/event")
|
||||
local FileManagerBookInfo = require("apps/filemanager/filemanagerbookinfo")
|
||||
local FileManagerHistory = require("apps/filemanager/filemanagerhistory")
|
||||
local Geom = require("ui/geometry")
|
||||
local InfoMessage = require("ui/widget/infomessage")
|
||||
@@ -317,6 +318,12 @@ function ReaderUI:init()
|
||||
dialog = self.dialog,
|
||||
ui = self,
|
||||
})
|
||||
-- book info
|
||||
self:registerModule("bookinfo", FileManagerBookInfo:new{
|
||||
dialog = self.dialog,
|
||||
document = self.document,
|
||||
ui = self,
|
||||
})
|
||||
-- koreader plugins
|
||||
for _, plugin_module in ipairs(PluginLoader:loadPlugins()) do
|
||||
local ok, plugin_or_err = PluginLoader:createPluginInstance(
|
||||
|
||||
@@ -81,6 +81,7 @@ local order = {
|
||||
main = {
|
||||
"history",
|
||||
"book_status",
|
||||
"book_info",
|
||||
"----------------------------",
|
||||
"ota_update", --[[ if Device:isKindle() or Device:isKobo() or
|
||||
Device:isPocketBook() or Device:isAndroid() ]]--
|
||||
|
||||
@@ -400,4 +400,105 @@ function util.splitToArray(str, splitter, capture_empty_entity)
|
||||
return result
|
||||
end
|
||||
|
||||
--- Convert a Unicode codepoint (number) to UTF8 char
|
||||
--
|
||||
--- @int c Unicode codepoint
|
||||
--- @treturn string UTF8 char
|
||||
function util.unicodeCodepointToUtf8(c)
|
||||
if c < 128 then
|
||||
return string.char(c)
|
||||
elseif c < 2048 then
|
||||
return string.char(192 + c/64, 128 + c%64)
|
||||
elseif c < 55296 or 57343 < c and c < 65536 then
|
||||
return string.char(224 + c/4096, 128 + c/64%64, 128 + c%64)
|
||||
elseif c < 1114112 then
|
||||
return string.char(240 + c/262144, 128 + c/4096%64, 128 + c/64%64, 128 + c%64)
|
||||
else
|
||||
return util.unicodeCodepointToUtf8(65533) -- U+FFFD REPLACEMENT CHARACTER
|
||||
end
|
||||
end
|
||||
|
||||
local HTML_ENTITIES_TO_UTF8 = {
|
||||
["<"] = "<",
|
||||
[">"] = ">",
|
||||
["""] = '"',
|
||||
["'"] = "'",
|
||||
[" "] = "\xC2\xA0",
|
||||
["&#(%d+);"] = function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end,
|
||||
["&#x(%x+);"] = function(x) return util.unicodeCodepointToUtf8(tonumber(x,16)) end,
|
||||
["&"] = "&", -- must be last
|
||||
}
|
||||
--- Replace HTML entities with their UTF8 equivalent in text
|
||||
--
|
||||
-- Supports only basic ones and those with numbers (no support
|
||||
-- for named entities like é)
|
||||
--- @int string text with HTML entities
|
||||
--- @treturn string UTF8 text
|
||||
function util.htmlEntitiesToUtf8(text)
|
||||
for k,v in pairs(HTML_ENTITIES_TO_UTF8) do
|
||||
text = text:gsub(k, v)
|
||||
end
|
||||
return text
|
||||
end
|
||||
|
||||
--- Convert simple HTML to plain text
|
||||
-- This may fail on complex HTML (with styles, scripts, comments), but should
|
||||
-- be fine enough with simple HTML as found in EPUB's <dc:description>.
|
||||
--
|
||||
--- @string text HTML text
|
||||
--- @treturn string plain text
|
||||
function util.htmlToPlainText(text)
|
||||
-- Replace <br> and <p> with \n
|
||||
text = text:gsub("%s*<%s*br%s*/?>%s*", "\n") -- <br> and <br/>
|
||||
text = text:gsub("%s*<%s*p%s*>%s*", "\n") -- <p>
|
||||
text = text:gsub("%s*</%s*p%s*>%s*", "\n") -- </p>
|
||||
text = text:gsub("%s*<%s*p%s*/>%s*", "\n") -- standalone <p/>
|
||||
-- Remove all HTML tags
|
||||
text = text:gsub("<[^>]*>", "")
|
||||
-- Convert HTML entities
|
||||
text = util.htmlEntitiesToUtf8(text)
|
||||
-- Trim spaces and new lines at start and end
|
||||
text = text:gsub("^[\n%s]*", "")
|
||||
text = text:gsub("[\n%s]*$", "")
|
||||
return text
|
||||
end
|
||||
|
||||
--- Convert HTML to plain text if text seems to be HTML
|
||||
-- Detection of HTML is simple and may raise false positives
|
||||
-- or negatives, but seems quite good at guessing content type
|
||||
-- of text found in EPUB's <dc:description>.
|
||||
--
|
||||
--- @string text the string with possibly some HTML
|
||||
--- @treturn string cleaned text
|
||||
function util.htmlToPlainTextIfHtml(text)
|
||||
local is_html = false
|
||||
-- Quick way to check if text is some HTML:
|
||||
-- look for html tags
|
||||
local _, nb_tags
|
||||
_, nb_tags = text:gsub("<%w+.->", "")
|
||||
if nb_tags > 0 then
|
||||
is_html = true
|
||||
else
|
||||
-- no <tag> found
|
||||
-- but we may meet some text badly twicely encoded html containing "<br>"
|
||||
local nb_encoded_tags
|
||||
_, nb_encoded_tags = text:gsub("<%a+>", "")
|
||||
if nb_encoded_tags > 0 then
|
||||
is_html = true
|
||||
-- decode one of the two encodes
|
||||
text = util.htmlEntitiesToUtf8(text)
|
||||
end
|
||||
end
|
||||
|
||||
if is_html then
|
||||
text = util.htmlToPlainText(text)
|
||||
else
|
||||
-- if text ends with ]]>, it probably comes from <![CDATA[ .. ]]> that
|
||||
-- crengine has extracted correctly, but let the ending tag in, so
|
||||
-- let's remove it
|
||||
text = text:gsub("]]>%s*$", "")
|
||||
end
|
||||
return text
|
||||
end
|
||||
|
||||
return util
|
||||
|
||||
@@ -283,4 +283,17 @@ describe("util module", function()
|
||||
assert.are_same(util.splitToArray("100 abc def ghi200 ", " ", false),
|
||||
{"100", "abc", "def", "ghi200"})
|
||||
end)
|
||||
|
||||
it("should guess it is not HTML and let is as is", function()
|
||||
local s = "if (i < 0 && j < 0) j = i&"
|
||||
assert.is_equal(util.htmlToPlainTextIfHtml(s), s)
|
||||
end)
|
||||
it("should guess it is HTML and convert it to text", function()
|
||||
assert.is_equal(util.htmlToPlainTextIfHtml("<div> <br> Making <b>unit tests</b> is <i class='notreally'>fun & nécéssaire</i><br/> </div>"),
|
||||
"Making unit tests is fun & nécéssaire")
|
||||
end)
|
||||
it("should guess it is double encoded HTML and convert it to text", function()
|
||||
assert.is_equal(util.htmlToPlainTextIfHtml("Deux parties.<br>Prologue.Désespérée, elle le tue...<br>Première partie. Sur la route &amp; dans la nuit"),
|
||||
"Deux parties.\nPrologue.Désespérée, elle le tue...\nPremière partie. Sur la route & dans la nuit")
|
||||
end)
|
||||
end)
|
||||
|
||||
Reference in New Issue
Block a user