mirror of
https://github.com/koreader/koreader.git
synced 2025-12-13 20:36:53 +01:00
NewsDownloader: add date time to filename (#3863)
* Download all files from ftp server(Not only epub). Show failed download number * add date and time to filename * optimalization - decrese max redirect number * remove new external lib from luacheck * add new lib licence info
This commit is contained in:
@@ -98,8 +98,10 @@ read_globals = {
|
|||||||
exclude_files = {
|
exclude_files = {
|
||||||
"frontend/luxl.lua",
|
"frontend/luxl.lua",
|
||||||
"plugins/newsdownloader.koplugin/lib/handler.lua",
|
"plugins/newsdownloader.koplugin/lib/handler.lua",
|
||||||
"plugins/newsdownloader.koplugin/lib/LICENSE",
|
"plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML",
|
||||||
"plugins/newsdownloader.koplugin/lib/xml.lua",
|
"plugins/newsdownloader.koplugin/lib/xml.lua",
|
||||||
|
"plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser",
|
||||||
|
"plugins/newsdownloader.koplugin/lib/dateparser.lua",
|
||||||
}
|
}
|
||||||
|
|
||||||
-- don't balk on busted stuff in spec
|
-- don't balk on busted stuff in spec
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ local socket = require('socket')
|
|||||||
local socket_url = require("socket.url")
|
local socket_url = require("socket.url")
|
||||||
|
|
||||||
local InternalDownloadBackend = {}
|
local InternalDownloadBackend = {}
|
||||||
local max_redirects = 10; --prevent infinite redirects
|
local max_redirects = 5; --prevent infinite redirects
|
||||||
|
|
||||||
function InternalDownloadBackend:getResponseAsString(url, redirectCount)
|
function InternalDownloadBackend:getResponseAsString(url, redirectCount)
|
||||||
if not redirectCount then
|
if not redirectCount then
|
||||||
|
|||||||
28
plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser
Normal file
28
plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
feedparser is available under the (new) BSD license. it uses a
|
||||||
|
portion of LuaSocket code (copyright 2007 Diego Nehab)
|
||||||
|
(http://www.keplerproject.org/luaexpat/), which is under the MIT license.
|
||||||
|
|
||||||
|
Copyright (c) 2009 Leo Ponomarev.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the <organization> nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY <copyright holder> ''AS IS'' AND ANY
|
||||||
|
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
192
plugins/newsdownloader.koplugin/lib/dateparser.lua
Normal file
192
plugins/newsdownloader.koplugin/lib/dateparser.lua
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
local difftime, time, date = os.difftime, os.time, os.date
|
||||||
|
local format = string.format
|
||||||
|
local tremove, tinsert = table.remove, table.insert
|
||||||
|
local pcall, pairs, ipairs, tostring, tonumber, type, setmetatable = pcall, pairs, ipairs, tostring, tonumber, type, setmetatable
|
||||||
|
|
||||||
|
local dateparser={}
|
||||||
|
|
||||||
|
--we shall use the host OS's time conversion facilities. Dealing with all those leap seconds by hand can be such a bore.
|
||||||
|
local unix_timestamp
|
||||||
|
do
|
||||||
|
local now = time()
|
||||||
|
local local_UTC_offset_sec = difftime(time(date("!*t", now)), time(date("*t", now)))
|
||||||
|
unix_timestamp = function(t, offset_sec)
|
||||||
|
local success, improper_time = pcall(time, t)
|
||||||
|
if not success or not improper_time then return nil, "invalid date. os.time says: " .. (improper_time or "nothing") end
|
||||||
|
return improper_time - local_UTC_offset_sec - offset_sec
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local formats = {} -- format names
|
||||||
|
local format_func = setmetatable({}, {__mode='v'}) --format functions
|
||||||
|
|
||||||
|
---register a date format parsing function
|
||||||
|
function dateparser.register_format(format_name, format_function)
|
||||||
|
if type(format_name)~="string" or type(format_function)~='function' then return nil, "improper arguments, can't register format handler" end
|
||||||
|
|
||||||
|
local found
|
||||||
|
for i, f in ipairs(format_func) do --for ordering
|
||||||
|
if f==format_function then
|
||||||
|
found=true
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if not found then
|
||||||
|
tinsert(format_func, format_function)
|
||||||
|
end
|
||||||
|
formats[format_name] = format_function
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
|
||||||
|
---register a date format parsing function
|
||||||
|
function dateparser.unregister_format(format_name)
|
||||||
|
if type(format_name)~="string" then return nil, "format name must be a string" end
|
||||||
|
formats[format_name]=nil
|
||||||
|
end
|
||||||
|
|
||||||
|
---return the function responsible for handling format_name date strings
|
||||||
|
function dateparser.get_format_function(format_name)
|
||||||
|
return formats[format_name] or nil, ("format %s not registered"):format(format_name)
|
||||||
|
end
|
||||||
|
|
||||||
|
---try to parse date string
|
||||||
|
--@param str date string
|
||||||
|
--@param date_format optional date format name, if known
|
||||||
|
--@return unix timestamp if str can be parsed; nil, error otherwise.
|
||||||
|
function dateparser.parse(str, date_format)
|
||||||
|
local success, res, err
|
||||||
|
if date_format then
|
||||||
|
if not formats[date_format] then return 'unknown date format: ' .. tostring(date_format) end
|
||||||
|
success, res = pcall(formats[date_format], str)
|
||||||
|
else
|
||||||
|
for i, func in ipairs(format_func) do
|
||||||
|
success, res = pcall(func, str)
|
||||||
|
if success and res then return res end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return success and res
|
||||||
|
end
|
||||||
|
|
||||||
|
dateparser.register_format('W3CDTF', function(rest)
|
||||||
|
|
||||||
|
local year, day_of_year, month, day, week
|
||||||
|
local hour, minute, second, second_fraction, offset_hours
|
||||||
|
|
||||||
|
local alt_rest
|
||||||
|
|
||||||
|
year, rest = rest:match("^(%d%d%d%d)%-?(.*)$")
|
||||||
|
|
||||||
|
day_of_year, alt_rest = rest:match("^(%d%d%d)%-?(.*)$")
|
||||||
|
|
||||||
|
if day_of_year then rest=alt_rest end
|
||||||
|
|
||||||
|
month, rest = rest:match("^(%d%d)%-?(.*)$")
|
||||||
|
|
||||||
|
day, rest = rest:match("^(%d%d)(.*)$")
|
||||||
|
if #rest>0 then
|
||||||
|
rest = rest:match("^T(.*)$")
|
||||||
|
hour, rest = rest:match("^([0-2][0-9]):?(.*)$")
|
||||||
|
minute, rest = rest:match("^([0-6][0-9]):?(.*)$")
|
||||||
|
second, rest = rest:match("^([0-6][0-9])(.*)$")
|
||||||
|
second_fraction, alt_rest = rest:match("^%.(%d+)(.*)$")
|
||||||
|
if second_fraction then
|
||||||
|
rest=alt_rest
|
||||||
|
end
|
||||||
|
if rest=="Z" then
|
||||||
|
rest=""
|
||||||
|
offset_hours=0
|
||||||
|
else
|
||||||
|
local sign, offset_h, offset_m
|
||||||
|
sign, offset_h, rest = rest:match("^([+-])(%d%d)%:?(.*)$")
|
||||||
|
local offset_m, alt_rest = rest:match("^(%d%d)(.*)$")
|
||||||
|
if offset_m then rest=alt_rest end
|
||||||
|
offset_hours = tonumber(sign .. offset_h) + (tonumber(offset_m) or 0)/60
|
||||||
|
end
|
||||||
|
if #rest>0 then return nil end
|
||||||
|
end
|
||||||
|
|
||||||
|
year = tonumber(year)
|
||||||
|
local d = {
|
||||||
|
year = year and (year > 100 and year or (year < 50 and (year + 2000) or (year + 1900))),
|
||||||
|
month = tonumber(month) or 1,
|
||||||
|
day = tonumber(day) or 1,
|
||||||
|
hour = tonumber(hour) or 0,
|
||||||
|
min = tonumber(minute) or 0,
|
||||||
|
sec = tonumber(second) or 0,
|
||||||
|
isdst = false
|
||||||
|
}
|
||||||
|
local t = unix_timestamp(d, (offset_hours or 0) * 3600)
|
||||||
|
if second_fraction then
|
||||||
|
return t + tonumber("0."..second_fraction)
|
||||||
|
else
|
||||||
|
return t
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
|
||||||
|
|
||||||
|
do
|
||||||
|
local tz_table = { --taken from http://www.timeanddate.com/library/abbreviations/timezones/
|
||||||
|
A = 1, B = 2, C = 3, D = 4, E=5, F = 6, G = 7, H = 8, I = 9,
|
||||||
|
K = 10, L = 11, M = 12, N = -1, O = -2, P = -3, Q = -4, R = -5,
|
||||||
|
S = -6, T = -7, U = -8, V = -9, W = -10, X = -11, Y = -12,
|
||||||
|
Z = 0,
|
||||||
|
|
||||||
|
EST = -5, EDT = -4, CST = -6, CDT = -5,
|
||||||
|
MST = -7, MDT = -6, PST = -8, PDT = -7,
|
||||||
|
|
||||||
|
GMT = 0, UT = 0, UTC = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
local month_val = {Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12}
|
||||||
|
|
||||||
|
dateparser.register_format('RFC2822', function(rest)
|
||||||
|
|
||||||
|
local year, month, day, day_of_year, week_of_year, weekday
|
||||||
|
local hour, minute, second, second_fraction, offset_hours
|
||||||
|
|
||||||
|
local alt_rest
|
||||||
|
|
||||||
|
weekday, alt_rest = rest:match("^(%w%w%w),%s+(.*)$")
|
||||||
|
if weekday then rest=alt_rest end
|
||||||
|
day, rest=rest:match("^(%d%d?)%s+(.*)$")
|
||||||
|
month, rest=rest:match("^(%w%w%w)%s+(.*)$")
|
||||||
|
month = month_val[month]
|
||||||
|
year, rest = rest:match("^(%d%d%d?%d?)%s+(.*)$")
|
||||||
|
hour, rest = rest:match("^(%d%d?):(.*)$")
|
||||||
|
minute, rest = rest:match("^(%d%d?)(.*)$")
|
||||||
|
second, alt_rest = rest:match("^:(%d%d)(.*)$")
|
||||||
|
if second then rest = alt_rest end
|
||||||
|
local tz, offset_sign, offset_h, offset_m
|
||||||
|
tz, alt_rest = rest:match("^%s+(%u+)(.*)$")
|
||||||
|
if tz then
|
||||||
|
rest = alt_rest
|
||||||
|
offset_hours = tz_table[tz]
|
||||||
|
else
|
||||||
|
offset_sign, offset_h, offset_m, rest = rest:match("^%s+([+-])(%d%d)(%d%d)%s*(.*)$")
|
||||||
|
offset_hours = tonumber(offset_sign .. offset_h) + (tonumber(offset_m) or 0)/60
|
||||||
|
end
|
||||||
|
|
||||||
|
if #rest>0 or not (year and day and month and hour and minute) then
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
year = tonumber(year)
|
||||||
|
local d = {
|
||||||
|
year = year and ((year > 100) and year or (year < 50 and (year + 2000) or (year + 1900))),
|
||||||
|
month = month,
|
||||||
|
day = tonumber(day),
|
||||||
|
|
||||||
|
hour= tonumber(hour) or 0,
|
||||||
|
min = tonumber(minute) or 0,
|
||||||
|
sec = tonumber(second) or 0,
|
||||||
|
isdst = false
|
||||||
|
}
|
||||||
|
return unix_timestamp(d, offset_hours * 3600)
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
|
||||||
|
dateparser.register_format('RFC822', formats.RFC2822) --2822 supercedes 822, but is not a strict superset. For our intents and purposes though, it's perfectly good enough
|
||||||
|
dateparser.register_format('RFC3339', formats.W3CDTF) --RFC3339 is a subset of W3CDTF
|
||||||
|
|
||||||
|
|
||||||
|
return dateparser
|
||||||
@@ -8,6 +8,7 @@ local LuaSettings = require("frontend/luasettings")
|
|||||||
local UIManager = require("ui/uimanager")
|
local UIManager = require("ui/uimanager")
|
||||||
local NetworkMgr = require("ui/network/manager")
|
local NetworkMgr = require("ui/network/manager")
|
||||||
local WidgetContainer = require("ui/widget/container/widgetcontainer")
|
local WidgetContainer = require("ui/widget/container/widgetcontainer")
|
||||||
|
local dateparser = require("lib.dateparser")
|
||||||
local ffi = require("ffi")
|
local ffi = require("ffi")
|
||||||
local logger = require("logger")
|
local logger = require("logger")
|
||||||
local util = require("util")
|
local util = require("util")
|
||||||
@@ -245,6 +246,7 @@ function NewsDownloader:deserializeXMLString(xml_str)
|
|||||||
-- uses LuaXML https://github.com/manoelcampos/LuaXML
|
-- uses LuaXML https://github.com/manoelcampos/LuaXML
|
||||||
-- The MIT License (MIT)
|
-- The MIT License (MIT)
|
||||||
-- Copyright (c) 2016 Manoel Campos da Silva Filho
|
-- Copyright (c) 2016 Manoel Campos da Silva Filho
|
||||||
|
-- see: koreader/plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML
|
||||||
local treehdl = require("lib/handler")
|
local treehdl = require("lib/handler")
|
||||||
local libxml = require("lib/xml")
|
local libxml = require("lib/xml")
|
||||||
|
|
||||||
@@ -297,10 +299,30 @@ function NewsDownloader:processRSS(feeds, limit, download_full_article)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
local function parseDate(dateTime)
|
||||||
|
-- uses lua-feedparser https://github.com/slact/lua-feedparser
|
||||||
|
-- feedparser is available under the (new) BSD license.
|
||||||
|
-- see: koreader/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser
|
||||||
|
local date = dateparser.parse(dateTime)
|
||||||
|
return os.date("%y-%m-%d_%H-%M_", date)
|
||||||
|
end
|
||||||
|
|
||||||
|
local function getTitleWithDate(feed)
|
||||||
|
local title = util.replaceInvalidChars(getFeedTitle(feed.title))
|
||||||
|
if feed.updated then
|
||||||
|
title = parseDate(feed.updated) .. title
|
||||||
|
elseif feed.pubDate then
|
||||||
|
title = parseDate(feed.pubDate) .. title
|
||||||
|
elseif feed.published then
|
||||||
|
title = parseDate(feed.published) .. title
|
||||||
|
end
|
||||||
|
return title
|
||||||
|
end
|
||||||
|
|
||||||
function NewsDownloader:downloadFeed(feed, feed_output_dir)
|
function NewsDownloader:downloadFeed(feed, feed_output_dir)
|
||||||
local link = getFeedLink(feed.link)
|
local link = getFeedLink(feed.link)
|
||||||
local news_dl_path = ("%s%s%s"):format(feed_output_dir,
|
local news_dl_path = ("%s%s%s"):format(feed_output_dir,
|
||||||
util.replaceInvalidChars(getFeedTitle(feed.title)),
|
getTitleWithDate(feed),
|
||||||
file_extension)
|
file_extension)
|
||||||
logger.dbg("NewsDownloader: News file will be stored to :", news_dl_path)
|
logger.dbg("NewsDownloader: News file will be stored to :", news_dl_path)
|
||||||
|
|
||||||
@@ -309,7 +331,7 @@ end
|
|||||||
|
|
||||||
function NewsDownloader:createFromDescription(feed, context, feed_output_dir)
|
function NewsDownloader:createFromDescription(feed, context, feed_output_dir)
|
||||||
local news_file_path = ("%s%s%s"):format(feed_output_dir,
|
local news_file_path = ("%s%s%s"):format(feed_output_dir,
|
||||||
util.replaceInvalidChars(getFeedTitle(feed.title)),
|
getTitleWithDate(feed),
|
||||||
file_extension)
|
file_extension)
|
||||||
logger.dbg("NewsDownloader: News file will be created :", news_file_path)
|
logger.dbg("NewsDownloader: News file will be created :", news_file_path)
|
||||||
local file = io.open(news_file_path, "w")
|
local file = io.open(news_file_path, "w")
|
||||||
|
|||||||
Reference in New Issue
Block a user