mirror of
https://github.com/koreader/koreader.git
synced 2025-12-13 20:36:53 +01:00
htmlToPlainText(): Format paragraphs with indentation (#7027)
Cf. <https://www.mobileread.com/forums/showthread.php?p=4072308#post4072308>.
This commit is contained in:
@@ -1002,7 +1002,7 @@ This may fail on complex HTML (with styles, scripts, comments), but should be fi
|
|||||||
function util.htmlToPlainText(text)
|
function util.htmlToPlainText(text)
|
||||||
-- Replace <br> and <p> with \n
|
-- Replace <br> and <p> with \n
|
||||||
text = text:gsub("%s*<%s*br%s*/?>%s*", "\n") -- <br> and <br/>
|
text = text:gsub("%s*<%s*br%s*/?>%s*", "\n") -- <br> and <br/>
|
||||||
text = text:gsub("%s*<%s*p%s*>%s*", "\n") -- <p>
|
text = text:gsub("%s*<%s*p%s*>%s*", "\n ") -- <p>
|
||||||
text = text:gsub("%s*</%s*p%s*>%s*", "\n") -- </p>
|
text = text:gsub("%s*</%s*p%s*>%s*", "\n") -- </p>
|
||||||
text = text:gsub("%s*<%s*p%s*/>%s*", "\n") -- standalone <p/>
|
text = text:gsub("%s*<%s*p%s*/>%s*", "\n") -- standalone <p/>
|
||||||
-- Remove all HTML tags
|
-- Remove all HTML tags
|
||||||
@@ -1012,6 +1012,8 @@ function util.htmlToPlainText(text)
|
|||||||
-- Trim spaces and new lines at start and end
|
-- Trim spaces and new lines at start and end
|
||||||
text = text:gsub("^[\n%s]*", "")
|
text = text:gsub("^[\n%s]*", "")
|
||||||
text = text:gsub("[\n%s]*$", "")
|
text = text:gsub("[\n%s]*$", "")
|
||||||
|
-- Trim non-breaking spaces from the start
|
||||||
|
text = text:gsub("^\xC2\xA0\xC2\xA0\xC2\xA0\xC2\xA0", "")
|
||||||
return text
|
return text
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user