Files
mailspring-mirror/app/internal_packages/translation/lib/service.ts
Ben Gotow cff437e900 Upgrade to Electron 8, improve TS usage and TS errors outside calendar [requires re- npm install] (#2284)
* Shfit away from default exports and PropTypes for better TS support

* localize strings and expand use of types in WeekView, create new EventOccurence distinct from Event

* Remove calendar wrap, use TS enum for view type + consistent prop interface

* Bump Typescript to 3.8.3 and improve query / attribute / search typings

* Re-use the Autolinker for calendar event descriptions with aggressive phone detection

* Clean up WeekView and the editing popover, lots of cruft here

* Update ScrollRegion to initialize scrollbar provided by external ref

* Expose ScrollRegion’s resizeObserver to clean up tick interval tracking

* Simply tickGenerator and move it to a helper

* Bump to Electron 8.x for Chrome 75+ CSS features

* Bump Handlebars dep to fix annoying npm audit noise

* Remove electron-remote from electron-spellchecker

* Explicitly add node-gyp, why is this necessary?

* Fix lesslint issues

* Bump eslint and let it fix 133 issues

* Satisfy remaining eslint@2020 errors by hand

* Add tsc-watch npm script and fix all TS errors outside calendar

* Configure appveyor to publish all the pdb files it gets

* Log sync exit codes and signals for easier triage on Windows

* Upgrade npm, mark that the build process supports Node 11+ not just Node 11

* Resolve more errors

* Upgrade sqlite to be a context-aware native module

* Fix: Tab key no longer navigating into contenteditable because tabIndex not inferred

* Fix: Bad print styles because Chrome now adds more CSS of it’s own when doctype is missing

* Fix: before-navigate is now called after beforeunload
2021-02-14 15:58:22 -06:00

282 lines
7.9 KiB
TypeScript

import LRU from 'lru-cache';
import {
QuotedHTMLTransformer,
localized,
Actions,
MailspringAPIRequest,
RegExpUtils,
FeatureLexicon,
} from 'mailspring-exports';
export const TranslatePopupOptions = {
English: 'en',
Spanish: 'es',
Russian: 'ru',
Chinese: 'zh',
Arabic: 'ar',
French: 'fr',
German: 'de',
Italian: 'it',
Japanese: 'ja',
Portuguese: 'pt',
Hindi: 'hi',
Korean: 'ko',
};
export const AllLanguages = {
az: 'Azerbaijan',
ml: 'Malayalam',
sq: 'Albanian',
mt: 'Maltese',
am: 'Amharic',
mk: 'Macedonian',
en: 'English',
mi: 'Maori',
ar: 'Arabic',
mr: 'Marathi',
hy: 'Armenian',
mhr: 'Mari',
af: 'Afrikaans',
mn: 'Mongolian',
eu: 'Basque',
de: 'German',
ba: 'Bashkir',
ne: 'Nepali',
be: 'Belarusian',
no: 'Norwegian',
bn: 'Bengali',
pa: 'Punjabi',
my: 'Burmese',
pap: 'Papiamento',
bg: 'Bulgarian',
fa: 'Persian',
bs: 'Bosnian',
pl: 'Polish',
cy: 'Welsh',
pt: 'Portuguese',
hu: 'Hungarian',
ro: 'Romanian',
vi: 'Vietnamese',
ru: 'Russian',
ht: 'Haitian (Creole)',
ceb: 'Cebuano',
gl: 'Galician',
sr: 'Serbian',
nl: 'Dutch',
si: 'Sinhala',
mrj: 'Hill Mari',
sk: 'Slovakian',
el: 'Greek',
sl: 'Slovenian',
ka: 'Georgian',
sw: 'Swahili',
gu: 'Gujarati',
su: 'Sundanese',
da: 'Danish',
tg: 'Tajik',
he: 'Hebrew',
th: 'Thai',
yi: 'Yiddish',
tl: 'Tagalog',
id: 'Indonesian',
ta: 'Tamil',
ga: 'Irish',
tt: 'Tatar',
it: 'Italian',
te: 'Telugu',
is: 'Icelandic',
tr: 'Turkish',
es: 'Spanish',
udm: 'Udmurt',
kk: 'Kazakh',
uz: 'Uzbek',
kn: 'Kannada',
uk: 'Ukrainian',
ca: 'Catalan',
ur: 'Urdu',
ky: 'Kyrgyz',
fi: 'Finnish',
zh: 'Chinese',
fr: 'French',
ko: 'Korean',
hi: 'Hindi',
xh: 'Xhosa',
hr: 'Croatian',
km: 'Khmer',
cs: 'Czech',
lo: 'Laotian',
sv: 'Swedish',
la: 'Latin',
gd: 'Scottish',
lv: 'Latvian',
et: 'Estonian',
lt: 'Lithuanian',
eo: 'Esperanto',
lb: 'Luxembourgish',
jv: 'Javanese',
mg: 'Malagasy',
ja: 'Japanese',
ms: 'Malay',
};
export const TranslationsUsedLexicon: FeatureLexicon = {
headerText: localized('All Translations Used'),
rechargeText: `${localized(
'You can translate up to %1$@ emails each %2$@ with Mailspring Basic.'
)} ${localized('Upgrade to Pro today!')}`,
iconUrl: 'mailspring://translation/assets/ic-translation-modal@2x.png',
};
function forEachTranslatableText(doc: Document, callback: (el: Node, text: string) => void) {
const textWalker = document.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT);
const urlRegexp = RegExpUtils.urlRegex({ matchStartOfString: true, matchTailOfString: true });
const usaAddressRegexp = /, [A-Z]{2},? [A-Z]{2,3},? [\d]{5}/; // matches ", CA 94114"
while (textWalker.nextNode()) {
const node = textWalker.currentNode;
if (['SCRIPT', 'STYLE', 'LINK', 'META', 'TITLE'].includes(node.parentElement.tagName)) {
continue;
}
if (node.parentElement.closest('.notranslate')) {
continue; // the HTML explicitly requests this text not be translated
}
const text = node.textContent.replace(/\s{2,}/g, ' ');
if (!/[A-Za-z\p{L}]+/u.test(text)) {
continue; // there are no latin or unicode letters in the string
}
if (urlRegexp.test(text.trim())) {
continue; // purely a plaintext link
}
if (text.length < 250 && usaAddressRegexp.test(text)) {
continue; // looks very much like a US address. Don't awkwardly translate these
}
const closestWithFont = node.parentElement.closest('[style*=font]');
if (closestWithFont instanceof HTMLElement) {
const family = closestWithFont.style.fontFamily || closestWithFont.style.font || '';
if (
family.includes('monospace') ||
family.includes('Monaco') ||
family.includes('Lucida Console') ||
family.includes('Courier')
) {
continue; // the text is in a monospace font and is probably a meaningful "value / variable"
}
const size = Number((closestWithFont.style.fontSize || '').replace(/[A-Za-z]+/g, ''));
if (size > 0 && size < 12) {
continue; // the text is tiny or hidden, don't waste translation characters on footer stuff
}
}
if (text.length > 2) {
// we will translate this text. But trim out large blocks of whitespace that may
// exist in the raw textContent to avoid those counting as translation characters.
// NOTE: We use node.textContent here, not the trimmed version. The leading/trailing
// space are necessary for things like <span>this is a <a>link</a></span>
callback(node, text);
}
}
}
// We maintain a cache of blocks of text we've translated. Because the user may have a whole
// mailbox of very similar or templated emails, this can cut down on the amount of text we
// need to translate for a new email dramatically.
const translatedSnippetCache = new LRU<string, string>({ max: 1000 });
let translatedSnippetLang = '';
export async function translateMessageBody(
html: string,
targetLang?: string,
silent = false
): Promise<string | false> {
if (translatedSnippetLang !== targetLang) {
translatedSnippetCache.reset();
translatedSnippetLang = targetLang;
}
const replyHtml = QuotedHTMLTransformer.removeQuotedHTML(html);
// break the document down into text blocks to translate. We don't send the HTML
// because translation services bill by the character and it's unclear if a giant
// <style> tag counts.
const domParser = new DOMParser();
const doc = domParser.parseFromString(replyHtml, 'text/html');
const translationDoc = document.createElement('div');
// Iterate over the HTML document and pull out text blocks we need to translate via the API
// because LRU cache can change while the req is in flight
const skipped: { [text: string]: string } = {};
let totalChars = 0;
forEachTranslatableText(doc, (node, text) => {
const t = translatedSnippetCache.get(text);
if (t) {
skipped[text] = t;
return;
}
if (totalChars > 5000) {
return;
}
const b = document.createElement('b');
b.textContent = text;
totalChars += text.length + 7; // <b></b>;
translationDoc.appendChild(b);
});
let resultElements = [];
// Make the translation request
if (translationDoc.childElementCount > 0) {
const translationHTML = translationDoc.innerHTML.replace(/&nbsp;/g, ' '); // nbsp char
let response = null;
try {
response = await MailspringAPIRequest.makeRequest({
server: 'identity',
method: 'POST',
path: `/api/translate`,
json: true,
body: { lang: targetLang, text: translationHTML, format: 'html' },
timeout: 5000,
});
} catch (error) {
Actions.closePopover();
if (!silent) {
const dialog = require('electron').remote.dialog;
dialog.showErrorBox(localized('Language Conversion Failed'), error.toString());
}
return false;
}
const resultDoc = domParser.parseFromString(response.result, 'text/html');
resultElements = Array.from(resultDoc.querySelectorAll('b'));
}
// Merge the results back in to the original document by traversing it in the same order
// and inserting the result back in. Note that we have saved our cache hits into `skipped`
// and read them from there to be 100% sure the translation indexes still align with the doc.
forEachTranslatableText(doc, (node, text) => {
if (skipped[text]) {
node.textContent = skipped[text];
} else {
const resultElement = resultElements.shift();
if (resultElement) {
const translated = resultElement.textContent;
node.textContent = translated;
translatedSnippetCache.set(text, translated);
} else {
// we may have hit the `totalChars` per-email translation limit.
// nothing more to do here!
}
}
});
// Put the quoted text back in!
return QuotedHTMLTransformer.appendQuotedHTML(doc.body.innerHTML, html);
}