Add hash-based document metadata storage option (#10945)

This option saves metadata sidecar (sdr) directories not next to the book or in koreader/docsettings/, but in koreader/hashdocsettings/ using the partial md5 hash of each documents, allowing users to move, rename, and copy their documents outside of KOReader without accidentally losing their highlights/notes/progress. Included are various warnings and info to users of the benefits and drawbacks of this non-default option.

Closes #10892.
This commit is contained in:
Ryan W West
2023-10-11 04:39:33 -04:00
committed by GitHub
parent 4eac18f9b9
commit 27104ea011
5 changed files with 344 additions and 88 deletions

View File

@@ -9,6 +9,7 @@ local Math = require("optmath")
local TileCacheItem = require("document/tilecacheitem")
local lfs = require("libs/libkoreader-lfs")
local logger = require("logger")
local util = require("util")
--[[
This is an abstract interface to a document
@@ -145,16 +146,6 @@ end
-- calculate partial digest of the document and store in its docsettings to avoid document saving
-- feature to change its checksum.
--
-- To the calculating mechanism itself.
-- since only PDF documents could be modified by KOReader by appending data
-- at the end of the files when highlighting, we use a non-even sampling
-- algorithm which samples with larger weight at file head and much smaller
-- weight at file tail, thus reduces the probability that appended data may change
-- the digest value.
-- Note that if PDF file size is around 1024, 4096, 16384, 65536, 262144
-- 1048576, 4194304, 16777216, 67108864, 268435456 or 1073741824, appending data
-- by highlighting in KOReader may change the digest value.
function Document:fastDigest(docsettings)
if not self.file then return end
local file = io.open(self.file, 'rb')
@@ -167,21 +158,7 @@ function Document:fastDigest(docsettings)
local result = docsettings:readSetting("partial_md5_checksum")
if not result then
logger.dbg("computing and storing partial_md5_checksum")
local bit = require("bit")
local md5 = require("ffi/sha2").md5
local lshift = bit.lshift
local step, size = 1024, 1024
local update = md5()
for i = -1, 10 do
file:seek("set", lshift(step, 2*i))
local sample = file:read(size)
if sample then
update(sample)
else
break
end
end
result = update()
result = util.partialMD5(file)
docsettings:saveSetting("partial_md5_checksum", result)
end
if tmp_docsettings then