Moved dictionaries manifest into an MTP-fetched pinned post.

2026-06-02 03:53:42 +02:00 · 2026-04-20 18:34:56 +03:00
parent 56851519da
commit 5d768d81b0
5 changed files with 921 additions and 81 deletions
@@ -344,24 +344,44 @@ void Inner::setupContent(
 	const auto queryStream = content->lifetime()
 		.make_state<rpl::event_stream<QStringView>>();

-	for (const auto &dict : Spellchecker::Dictionaries()) {
-		const auto id = dict.id;
-		const auto row = AddButtonWithLoader(
-			content,
-			session,
-			dict,
-			ranges::contains(enabledDictionaries, id),
-			queryStream->events());
-		row->toggledValue(
-		) | rpl::on_next([=](auto enabled) {
-			if (enabled) {
-				_enabledRows.push_back(id);
-			} else {
-				auto &rows = _enabledRows;
-				rows.erase(ranges::remove(rows, id), end(rows));
-			}
-		}, row->lifetime());
-	}
+	// Rows are created once, when Spellchecker::Dictionaries() becomes
+	// non-empty. Manifest is fetched lazily and may arrive after the box
+	// opens, so we subscribe to DictionariesChanged and populate rows
+	// then if we haven't already.
+	const auto built = content->lifetime().make_state<bool>(false);
+	const auto buildRows = [=] {
+		if (*built) {
+			return;
+		}
+		const auto dicts = Spellchecker::Dictionaries();
+		if (dicts.empty()) {
+			return;
+		}
+		*built = true;
+		for (const auto &dict : dicts) {
+			const auto id = dict.id;
+			const auto row = AddButtonWithLoader(
+				content,
+				session,
+				dict,
+				ranges::contains(enabledDictionaries, id),
+				queryStream->events());
+			row->toggledValue(
+			) | rpl::on_next([=](auto enabled) {
+				if (enabled) {
+					_enabledRows.push_back(id);
+				} else {
+					auto &rows = _enabledRows;
+					rows.erase(ranges::remove(rows, id), end(rows));
+				}
+			}, row->lifetime());
+		}
+	};
+
+	buildRows();
+	Spellchecker::DictionariesChanged(
+	) | rpl::on_next(buildRows, content->lifetime());
+	Spellchecker::RefreshDictionariesManifest(session);

 	_queryCallback = [=](const QString &query) {
 		if (query.size() >= kMaxQueryLength) {
@@ -10,6 +10,7 @@ https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
 #ifndef TDESKTOP_DISABLE_SPELLCHECK

 #include "base/platform/base_platform_info.h"
+#include "base/weak_ptr.h"
 #include "base/zlib_help.h"
 #include "data/data_session.h"
 #include "lang/lang_instance.h"
@@ -18,12 +19,16 @@ https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
 #include "main/main_domain.h"
 #include "main/main_session.h"
 #include "mainwidget.h"
+#include "mtproto/dedicated_file_loader.h"
 #include "spellcheck/platform/platform_spellcheck.h"
 #include "spellcheck/spellcheck_utils.h"
 #include "spellcheck/spellcheck_value.h"
 #include "core/application.h"
 #include "core/core_settings.h"

+#include <QtCore/QJsonArray>
+#include <QtCore/QJsonDocument>
+#include <QtCore/QJsonObject>
 #include <QtGui/QGuiApplication>
 #include <QtGui/QInputMethod>

@@ -59,53 +64,187 @@ inline auto LanguageFromLocale(QLocale loc) {
 			: int(locLang);
 }

-const auto kDictionaries = {
-	Dict{{ QLocale::English,                               649,   174'516, "English" }}, // en_US
-	Dict{{ QLocale::Bulgarian,                             594,   229'658, "\xd0\x91\xd1\x8a\xd0\xbb\xd0\xb3\xd0\xb0\xd1\x80\xd1\x81\xd0\xba\xd0\xb8" }}, // bg_BG
-	Dict{{ QLocale::Catalan,                               595,   417'611, "\x43\x61\x74\x61\x6c\xc3\xa0" }}, // ca_ES
-	Dict{{ QLocale::Czech,                                 596,   860'286, "\xc4\x8c\x65\xc5\xa1\x74\x69\x6e\x61" }}, // cs_CZ
-	Dict{{ QLocale::Welsh,                                 597,   177'305, "\x43\x79\x6d\x72\x61\x65\x67" }}, // cy_GB
-	Dict{{ QLocale::Danish,                                598,   345'874, "\x44\x61\x6e\x73\x6b" }}, // da_DK
-	Dict{{ QLocale::German,                                599, 2'412'780, "\x44\x65\x75\x74\x73\x63\x68" }}, // de_DE
-	Dict{{ QLocale::Greek,                                 600, 1'389'160, "\xce\x95\xce\xbb\xce\xbb\xce\xb7\xce\xbd\xce\xb9\xce\xba\xce\xac" }}, // el_GR
-	Dict{{ LWC(QLocale::English, QLocale::Australia),      601,   175'266, "English (Australia)" }}, // en_AU
-	Dict{{ LWC(QLocale::English, QLocale::Canada),         602,   174'295, "English (Canada)" }}, // en_CA
-	Dict{{ LWC(QLocale::English, QLocale::UnitedKingdom),  603,   174'433, "English (United Kingdom)" }}, // en_GB
-	Dict{{ QLocale::Spanish,                               604,   264'717, "\x45\x73\x70\x61\xc3\xb1\x6f\x6c" }}, // es_ES
-	Dict{{ QLocale::Estonian,                              605,   757'394, "\x45\x65\x73\x74\x69" }}, // et_EE
-	Dict{{ QLocale::Persian,                               606,   333'911, "\xd9\x81\xd8\xa7\xd8\xb1\xd8\xb3\xdb\x8c" }}, // fa_IR
-	Dict{{ QLocale::French,                                607,   321'391, "\x46\x72\x61\x6e\xc3\xa7\x61\x69\x73" }}, // fr_FR
-	Dict{{ QLocale::Hebrew,                                608,   622'550, "\xd7\xa2\xd7\x91\xd7\xa8\xd7\x99\xd7\xaa" }}, // he_IL
-	Dict{{ QLocale::Hindi,                                 609,    56'105, "\xe0\xa4\xb9\xe0\xa4\xbf\xe0\xa4\xa8\xe0\xa5\x8d\xe0\xa4\xa6\xe0\xa5\x80" }}, // hi_IN
-	Dict{{ QLocale::Croatian,                              610,   668'876, "\x48\x72\x76\x61\x74\x73\x6b\x69" }}, // hr_HR
-	Dict{{ QLocale::Hungarian,                             611,   660'402, "\x4d\x61\x67\x79\x61\x72" }}, // hu_HU
-	Dict{{ QLocale::Armenian,                              612,   928'746, "\xd5\x80\xd5\xa1\xd5\xb5\xd5\xa5\xd6\x80\xd5\xa5\xd5\xb6" }}, // hy_AM
-	Dict{{ QLocale::Indonesian,                            613,   100'134, "\x49\x6e\x64\x6f\x6e\x65\x73\x69\x61" }}, // id_ID
-	Dict{{ QLocale::Italian,                               614,   324'613, "\x49\x74\x61\x6c\x69\x61\x6e\x6f" }}, // it_IT
-	Dict{{ QLocale::Korean,                                615, 1'256'987, "\xed\x95\x9c\xea\xb5\xad\xec\x96\xb4" }}, // ko_KR
-	Dict{{ QLocale::Lithuanian,                            616,   267'427, "\x4c\x69\x65\x74\x75\x76\x69\xc5\xb3" }}, // lt_LT
-	Dict{{ QLocale::Latvian,                               617,   641'602, "\x4c\x61\x74\x76\x69\x65\xc5\xa1\x75" }}, // lv_LV
-	Dict{{ QLocale::NorwegianBokmal,                       618,   588'650, "\x4e\x6f\x72\x73\x6b" }}, // nb_NO
-	Dict{{ QLocale::Dutch,                                 619,   743'406, "\x4e\x65\x64\x65\x72\x6c\x61\x6e\x64\x73" }}, // nl_NL
-	Dict{{ QLocale::Polish,                                620, 1'015'747, "\x50\x6f\x6c\x73\x6b\x69" }}, // pl_PL
-	Dict{{ QLocale::Portuguese,                            621, 1'231'999, "\x50\x6f\x72\x74\x75\x67\x75\xc3\xaa\x73 (Brazil)" }}, // pt_BR
-	Dict{{ LWC(QLocale::Portuguese, QLocale::Portugal),    622,   138'571, "\x50\x6f\x72\x74\x75\x67\x75\xc3\xaa\x73" }}, // pt_PT
-	Dict{{ QLocale::Romanian,                              623,   455'643, "\x52\x6f\x6d\xc3\xa2\x6e\xc4\x83" }}, // ro_RO
-	Dict{{ QLocale::Russian,                               624,   463'194, "\xd0\xa0\xd1\x83\xd1\x81\xd1\x81\xd0\xba\xd0\xb8\xd0\xb9" }}, // ru_RU
-	Dict{{ QLocale::Slovak,                                625,   525'328, "\x53\x6c\x6f\x76\x65\x6e\xc4\x8d\x69\x6e\x61" }}, // sk_SK
-	Dict{{ QLocale::Slovenian,                             626, 1'143'710, "\x53\x6c\x6f\x76\x65\x6e\xc5\xa1\xc4\x8d\x69\x6e\x61" }}, // sl_SI
-	Dict{{ QLocale::Albanian,                              627,   583'412, "\x53\x68\x71\x69\x70" }}, // sq_AL
-	Dict{{ QLocale::Swedish,                               628,   593'877, "\x53\x76\x65\x6e\x73\x6b\x61" }}, // sv_SE
-	Dict{{ QLocale::Tamil,                                 629,   323'193, "\xe0\xae\xa4\xe0\xae\xae\xe0\xae\xbf\xe0\xae\xb4\xe0\xaf\x8d" }}, // ta_IN
-	Dict{{ QLocale::Tajik,                                 630,   369'931, "\xd0\xa2\xd0\xbe\xd2\xb7\xd0\xb8\xd0\xba\xd3\xa3" }}, // tg_TG
-	Dict{{ QLocale::Turkish,                               631, 4'301'099, "\x54\xc3\xbc\x72\x6b\xc3\xa7\x65" }}, // tr_TR
-	Dict{{ QLocale::Ukrainian,                             632,   445'711, "\xd0\xa3\xd0\xba\xd1\x80\xd0\xb0\xd1\x97\xd0\xbd\xd1\x81\xd1\x8c\xd0\xba\xd0\xb0" }}, // uk_UA
-	Dict{{ QLocale::Vietnamese,                            633,    12'949, "\x54\x69\xe1\xba\xbf\x6e\x67\x20\x56\x69\xe1\xbb\x87\x74" }}, // vi_VN
-	// The Tajik code is 'tg_TG' in Chromium, but QT has only 'tg_TJ'.
+constexpr auto kDictionariesManifestChannel = "tdhbcfeed"_cs;
+constexpr auto kDictionariesManifestPostId = 0;
+
+// Runtime-loaded dictionaries manifest. Kept in memory only: fetched from
+// the pinned JSON post the first time something actually needs it
+// (Manage Dictionaries, auto-download). Nothing persists to disk, so an
+// install whose enabled dictionaries are all on disk never hits the net.
+std::vector<Dict> DictionariesList;
+rpl::event_stream<> DictionariesListChanged;
+
+void EnsurePath();
+
+bool ParseLocation(const QString &text, QString &channel, int &postId) {
+	const auto sep = text.indexOf('#');
+	if (sep <= 0 || sep == text.size() - 1) {
+		return false;
+	}
+	auto ok = false;
+	const auto parsed = text.mid(sep + 1).toInt(&ok);
+	if (!ok || parsed <= 0) {
+		return false;
+	}
+	channel = text.left(sep);
+	postId = parsed;
+	return true;
+}
+
+std::vector<Dict> ParseManifest(const QByteArray &bytes) {
+	auto result = std::vector<Dict>();
+	auto err = QJsonParseError();
+	const auto doc = QJsonDocument::fromJson(bytes, &err);
+	if (err.error != QJsonParseError::NoError || !doc.isObject()) {
+		LOG(("Spellcheck Error: manifest JSON parse failed: %1"
+			).arg(err.errorString()));
+		return result;
+	}
+	const auto list = doc.object().value(u"dictionaries"_q).toArray();
+	result.reserve(list.size());
+	for (const auto &v : list) {
+		const auto obj = v.toObject();
+		auto d = Dict();
+		d.id = obj.value(u"id"_q).toInt();
+		d.size = int64(obj.value(u"size"_q).toDouble());
+		d.name = obj.value(u"name"_q).toString();
+		const auto location = obj.value(u"location"_q).toString();
+		if (!d.id
+			|| d.name.isEmpty()
+			|| !ParseLocation(location, d.channel, d.postId)) {
+			continue;
+		}
+		result.push_back(std::move(d));
+	}
+	return result;
+}
+
+class DictManifestLoader final : public base::has_weak_ptr {
+public:
+	explicit DictManifestLoader(base::weak_ptr<Main::Session> session);
+
+	void start();
+
+private:
+	void resolved(const MTPInputChannel &channel);
+	void received(const MTPmessages_Messages &result);
+	void apply(const QByteArray &bytes);
+	void finish();
+
+	MTP::WeakInstance _mtp;
+
 };

+std::shared_ptr<DictManifestLoader> ActiveManifestLoader;
+
+DictManifestLoader::DictManifestLoader(base::weak_ptr<Main::Session> session)
+: _mtp(session) {
+}
+
+void DictManifestLoader::start() {
+	if (!_mtp.valid()) {
+		finish();
+		return;
+	}
+	const auto weak = base::make_weak(this);
+	MTP::ResolveChannel(&_mtp, kDictionariesManifestChannel.utf16(), [=](
+			const MTPInputChannel &channel) {
+		if (const auto strong = weak.get()) {
+			strong->resolved(channel);
+		}
+	}, [=] {
+		if (const auto strong = weak.get()) {
+			strong->finish();
+		}
+	});
+}
+
+void DictManifestLoader::resolved(const MTPInputChannel &channel) {
+	const auto weak = base::make_weak(this);
+	_mtp.send(
+		MTPchannels_GetMessages(
+			channel,
+			MTP_vector<MTPInputMessage>(1,
+				MTP_inputMessageID(
+					MTP_int(kDictionariesManifestPostId)))),
+		[=](const MTPmessages_Messages &result) {
+			if (const auto strong = weak.get()) {
+				strong->received(result);
+			}
+		},
+		[=](const MTP::Error &) {
+			if (const auto strong = weak.get()) {
+				strong->finish();
+			}
+		});
+}
+
+void DictManifestLoader::received(const MTPmessages_Messages &result) {
+	const auto message = MTP::GetMessagesElement(result);
+	if (!message || message->type() != mtpc_message) {
+		LOG(("Spellcheck Error: manifest message not found."));
+		finish();
+		return;
+	}
+	apply(message->c_message().vmessage().v);
+	finish();
+}
+
+void DictManifestLoader::apply(const QByteArray &bytes) {
+	auto parsed = ParseManifest(bytes);
+	if (parsed.empty()) {
+		LOG(("Spellcheck Error: manifest empty or unparseable."));
+		return;
+	}
+	DictionariesList = std::move(parsed);
+	DictionariesListChanged.fire({});
+}
+
+void DictManifestLoader::finish() {
+	crl::on_main([] {
+		ActiveManifestLoader.reset();
+	});
+}
+
+void StartManifestRefresh(not_null<Main::Session*> session) {
+	if (ActiveManifestLoader) {
+		return;
+	}
+	ActiveManifestLoader = std::make_shared<DictManifestLoader>(
+		base::make_weak(session));
+	ActiveManifestLoader->start();
+}
+
+// Callers waiting for the first manifest arrival in this session.
+// We don't cache manifest to disk, so each cold start begins empty;
+// queued callbacks fire once when the MTP fetch lands.
+std::vector<Fn<void()>> ManifestPending;
+rpl::lifetime ManifestPendingSubscription;
+
+void EnsureManifestThen(
+		not_null<Main::Session*> session,
+		Fn<void()> callback) {
+	if (!DictionariesList.empty()) {
+		callback();
+		return;
+	}
+	const auto firstCaller = ManifestPending.empty();
+	ManifestPending.push_back(std::move(callback));
+	if (firstCaller) {
+		DictionariesListChanged.events(
+		) | rpl::take(1) | rpl::on_next([] {
+			auto fns = std::move(ManifestPending);
+			ManifestPending.clear();
+			for (auto &fn : fns) {
+				fn();
+			}
+		}, ManifestPendingSubscription);
+	}
+	StartManifestRefresh(session);
+}
+
 inline auto IsSupportedLang(int lang) {
-	return ranges::contains(kDictionaries, lang, &Dict::id);
+	return ranges::contains(DictionariesList, lang, &Dict::id);
 }

 void EnsurePath() {
@@ -144,9 +283,12 @@ void DownloadDictionaryInBackground(

 		if (DictionaryExists(id)) {
 			auto dicts = Core::App().settings().dictionariesEnabled();
-			if (!ranges::contains(dicts, id)) {
+			if (ranges::contains(dicts, id)) {
+				Platform::Spellchecker::UpdateLanguages(dicts);
+			} else {
 				dicts.push_back(id);
-				Core::App().settings().setDictionariesEnabled(std::move(dicts));
+				Core::App().settings().setDictionariesEnabled(
+					std::move(dicts));
 				Core::App().saveSettingsDelayed();
 			}
 		}
@@ -230,17 +372,28 @@ void DictLoader::fail() {
 }

 std::vector<Dict> Dictionaries() {
-	return kDictionaries | ranges::to_vector;
+	return DictionariesList;
+}
+
+rpl::producer<> DictionariesChanged() {
+	return DictionariesListChanged.events();
+}
+
+void RefreshDictionariesManifest(not_null<Main::Session*> session) {
+	StartManifestRefresh(session);
 }

 int64 GetDownloadSize(int id) {
-	return ranges::find(kDictionaries, id, &Spellchecker::Dict::id)->size;
+	const auto i = ranges::find(DictionariesList, id, &Dict::id);
+	return (i == end(DictionariesList)) ? 0 : i->size;
 }

 MTP::DedicatedLoader::Location GetDownloadLocation(int id) {
-	const auto username = kCloudLocationUsername.utf16();
-	const auto i = ranges::find(kDictionaries, id, &Spellchecker::Dict::id);
-	return MTP::DedicatedLoader::Location{ username, i->postId };
+	const auto i = ranges::find(DictionariesList, id, &Dict::id);
+	if (i == end(DictionariesList)) {
+		return MTP::DedicatedLoader::Location{};
+	}
+	return MTP::DedicatedLoader::Location{ i->channel, i->postId };
 }

 QString DictPathByLangId(int langId) {
@@ -316,26 +469,26 @@ rpl::producer<QString> ButtonManageDictsState(
 		if (!Core::App().settings().spellcheckerEnabled()) {
 			return QString();
 		}
-		if (!Core::App().settings().dictionariesEnabled().size()) {
+		const auto dicts = Core::App().settings().dictionariesEnabled();
+		if (dicts.empty()) {
 			return QString();
 		}
-		const auto dicts = Core::App().settings().dictionariesEnabled();
 		const auto filtered = ranges::views::all(
 			dicts
 		) | ranges::views::filter(
 			DictionaryExists
 		) | ranges::to_vector;
-		const auto active = Platform::Spellchecker::ActiveLanguages();

-		return (active.size() == filtered.size())
-			? QString::number(filtered.size())
-			: tr::lng_contacts_loading(tr::now);
+		return (filtered.size() < dicts.size())
+			? tr::lng_contacts_loading(tr::now)
+			: QString::number(filtered.size());
 	};
 	return rpl::single(
 		computeString()
 	) | rpl::then(
 		rpl::merge(
 			Spellchecker::SupportedScriptsChanged(),
+			Spellchecker::DictionariesChanged(),
 			Core::App().settings().dictionariesEnabledChanges(
 			) | rpl::to_empty,
 			Core::App().settings().spellcheckerEnabledChanges(
@@ -418,12 +571,18 @@ void Start(not_null<Main::Session*> session) {
 				return;
 			}
 			const auto l = LanguageFromLocale(method->locale());
-			if (!IsSupportedLang(l) || DictionaryExists(l)) {
+			// Avoid pulling the manifest just because an input-method
+			// locale flipped; only fetch if we'd actually need to
+			// download something for the new locale.
+			if (DictionaryExists(l)) {
 				return;
 			}
-			crl::on_main(session, [=] {
+			EnsureManifestThen(session, crl::guard(session, [=] {
+				if (!IsSupportedLang(l) || DictionaryExists(l)) {
+					return;
+				}
 				DownloadDictionaryInBackground(session, 0, { l });
-			});
+			}));
 		};
 		QObject::connect(
 			method,
@@ -437,8 +596,17 @@ void Start(not_null<Main::Session*> session) {
 			if (!loaded) {
 				return;
 			}
-
-			DownloadDictionaryInBackground(session, 0, DefaultLanguages());
+			const auto enabled = settings->dictionariesEnabled();
+			if (!enabled.empty()
+				&& ranges::all_of(enabled, &DictionaryExists)) {
+				// Every previously-enabled dictionary is already on
+				// disk; no manifest fetch, no network traffic.
+				return;
+			}
+			EnsureManifestThen(session, crl::guard(session, [=] {
+				DownloadDictionaryInBackground(
+					session, 0, DefaultLanguages());
+			}));
 		}, lifetime);

 		connectInput();
@@ -19,6 +19,7 @@ class Session;
 namespace Spellchecker {

 struct Dict : public Storage::CloudBlob::Blob {
+	QString channel;
 };

 int64 GetDownloadSize(int id);
@@ -33,6 +34,8 @@ bool RemoveDictionary(int langId);

 bool WriteDefaultDictionary();
 std::vector<Dict> Dictionaries();
+rpl::producer<> DictionariesChanged();
+void RefreshDictionariesManifest(not_null<Main::Session*> session);

 void Start(not_null<Main::Session*> session);
 [[nodiscard]] rpl::producer<QString> ButtonManageDictsState(
@@ -0,0 +1,649 @@
+#!/usr/bin/env python3
+# This file is part of Telegram Desktop,
+# the official desktop application for the Telegram messaging service.
+#
+# For license and copyright information please follow this link:
+# https://github.com/telegramdesktop/tdesktop/blob/master/LEGAL
+"""
+Update the Hunspell dictionaries manifest consumed by Telegram Desktop.
+
+One end-to-end run:
+  1. Shallow-clone Chromium's hunspell_dictionaries repo into --cache-dir.
+  2. If --manifest-post-id is not provided, send an empty placeholder
+     message and use its id (printed so you can hardcode it in the client).
+  3. For each language: read raw .dic and .aff, recode to UTF-8 if needed
+     (rewriting the SET line), zip them under the Qt-side locale name,
+     upload via Bot API `sendDocument` to --channel.
+  4. Build a JSON manifest (each entry carries `location` =
+     "<blobs-channel>#<message_id>") and write it into the manifest post
+     via `editMessageText`.
+  5. Delete --cache-dir unless --keep-cache is passed.
+
+State file tracks sha256 of the UTF-8 dic/aff pair per language so
+subsequent runs re-upload only changed dictionaries.
+
+Usage:
+  # Full one-shot run on a fresh channel: creates the manifest post,
+  # uploads every dictionary to the same channel, edits the post, and
+  # deletes the chromium clone.
+  python3 update_dictionaries.py \\
+      --bot-token $TG_BOT_TOKEN \\
+      --channel @my_test_channel \\
+      --state-file ./dict_state.json
+
+  # Manifest and blobs in separate channels. Blobs go to @my_blobs_channel
+  # (where clients will fetch them), the JSON manifest lives in the
+  # private coordination channel -100...1438.
+  python3 update_dictionaries.py \\
+      --bot-token $TG_BOT_TOKEN \\
+      --channel @my_test_channel \\
+      --blobs-channel @my_blobs_channel \\
+      --state-file ./dict_state.json
+
+  # Re-run against an existing manifest post (incremental if state file
+  # matches; unchanged languages are carried over without re-upload).
+  python3 update_dictionaries.py \\
+      --bot-token $TG_BOT_TOKEN \\
+      --channel @my_test_channel \\
+      --manifest-post-id 1234 \\
+      --state-file ./dict_state.json
+
+  # Subset / troubleshooting; keep the chromium clone between runs.
+  python3 update_dictionaries.py \\
+      --bot-token $TG_BOT_TOKEN \\
+      --channel @my_test_channel \\
+      --manifest-post-id 1234 \\
+      --languages en_US,ru_RU \\
+      --keep-cache
+
+  # Preview: fetch, recode and zip locally; no network upload, no
+  # manifest edit, no cache cleanup.
+  python3 update_dictionaries.py \\
+      --channel @anything \\
+      --dry-run \\
+      --languages en_US,sr
+
+Flags:
+  --channel <id|@name>         where the manifest post lives
+                               (sendMessage / editMessageText target).
+  --blobs-channel <@name>      where blob zips go (sendDocument target);
+                               defaults to --channel. Must be @username
+                               since the client resolves blob locations
+                               by public username.
+  --manifest-post-id <N>       reuse an existing manifest post; omit to
+                               create a new placeholder automatically.
+  --cache-dir <path>           where the shallow chromium clone lives
+                               (default: .chromium_hunspell_cache).
+  --keep-cache                 keep --cache-dir after completion
+                               (default: delete it).
+  --state-file <path>          sha256/post_id/size per language for
+                               incremental uploads.
+
+The bot must be admin (with post/edit rights) in both --channel and
+--blobs-channel. When they coincide, one admin suffices.
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import io
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import time
+import zipfile
+from pathlib import Path
+from typing import Optional
+
+import requests
+
+CHROMIUM_REPO = (
+    "https://chromium.googlesource.com/chromium/deps/hunspell_dictionaries"
+)
+BOT_API = "https://api.telegram.org/bot{token}/{method}"
+
+# QLocale::Language values (stable across Qt 5/6, confirmed against
+# qtbase/src/corelib/text/qlocale.h for Qt 6.2 and 6.11).
+LANG = {
+    "Afrikaans":        4,
+    "Albanian":         9,
+    "Armenian":        17,
+    "Bulgarian":       45,
+    "Catalan":         48,
+    "Croatian":        66,
+    "Czech":           67,
+    "Danish":          68,
+    "Dutch":           72,
+    "English":         75,
+    "Estonian":        78,
+    "Faroese":         81,
+    "French":          85,
+    "Galician":        90,
+    "German":          94,
+    "Greek":           96,
+    "Hebrew":         103,
+    "Hindi":          105,
+    "Hungarian":      107,
+    "Indonesian":     112,
+    "Italian":        119,
+    "Korean":         142,
+    "Latvian":        155,
+    "Lithuanian":     160,
+    "NorwegianBokmal":209,
+    "Persian":        228,
+    "Polish":         230,
+    "Portuguese":     231,
+    "Romanian":       235,
+    "Russian":        239,
+    "Serbian":        252,
+    "Slovak":         262,
+    "Slovenian":      263,
+    "Spanish":        270,
+    "Swedish":        275,
+    "Tajik":          282,
+    "Tamil":          283,
+    "Turkish":        298,
+    "Ukrainian":      303,
+    "Vietnamese":     310,
+    "Welsh":          316,
+}
+
+# QLocale::Country values.
+COUNTRY = {
+    "Australia":       15,
+    "Brazil":          32,
+    "Canada":          41,
+    "Portugal":       188,
+    "UnitedKingdom":  246,
+    "UnitedStates":   248,
+}
+
+# Matches LWC() in spellchecker_common.cpp: default country collapses to
+# the bare language id; otherwise language*1000 + country.
+_DEFAULT_COUNTRIES = {COUNTRY["UnitedStates"], COUNTRY["Brazil"]}
+
+
+def lwc(language: int, country: int) -> int:
+    return language if country in _DEFAULT_COUNTRIES else language * 1000 + country
+
+
+# Each entry: (chromium_stem, id, qt_name, display_name)
+# - chromium_stem: the filename in chromium/deps/hunspell_dictionaries (without ext)
+# - id: primary key used by settings / UI (QLocale::Language or LWC())
+# - qt_name: must equal QLocale(id).name() at runtime — this is what the
+#   client uses for both the unpack folder and the <qt_name>.dic/.aff
+#   lookups. Double-check when adding new entries.
+# - display_name: shown in "Manage dictionaries" UI.
+LANGUAGES = [
+    ("en_US", LANG["English"],                            "en_US",      "English"),
+    ("bg_BG", LANG["Bulgarian"],                          "bg_BG",      "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438"),
+    ("ca_ES", LANG["Catalan"],                            "ca_ES",      "Catal\u00e0"),
+    ("cs_CZ", LANG["Czech"],                              "cs_CZ",      "\u010ce\u0161tina"),
+    ("cy_GB", LANG["Welsh"],                              "cy_GB",      "Cymraeg"),
+    ("da_DK", LANG["Danish"],                             "da_DK",      "Dansk"),
+    ("de_DE", LANG["German"],                             "de_DE",      "Deutsch"),
+    ("el_GR", LANG["Greek"],                              "el_GR",      "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac"),
+    ("en_AU", lwc(LANG["English"], COUNTRY["Australia"]),      "en_AU", "English (Australia)"),
+    ("en_CA", lwc(LANG["English"], COUNTRY["Canada"]),         "en_CA", "English (Canada)"),
+    ("en_GB", lwc(LANG["English"], COUNTRY["UnitedKingdom"]),  "en_GB", "English (United Kingdom)"),
+    ("es_ES", LANG["Spanish"],                            "es_ES",      "Espa\u00f1ol"),
+    ("et_EE", LANG["Estonian"],                           "et_EE",      "Eesti"),
+    ("fa_IR", LANG["Persian"],                            "fa_IR",      "\u0641\u0627\u0631\u0633\u06cc"),
+    ("fr_FR", LANG["French"],                             "fr_FR",      "Fran\u00e7ais"),
+    ("he_IL", LANG["Hebrew"],                             "he_IL",      "\u05e2\u05d1\u05e8\u05d9\u05ea"),
+    ("hi_IN", LANG["Hindi"],                              "hi_IN",      "\u0939\u093f\u0928\u094d\u0926\u0940"),
+    ("hr_HR", LANG["Croatian"],                           "hr_HR",      "Hrvatski"),
+    ("hu-HU", LANG["Hungarian"],                          "hu_HU",      "Magyar"),
+    ("hy",    LANG["Armenian"],                           "hy_AM",      "\u0540\u0561\u0575\u0565\u0580\u0565\u0576"),
+    ("id_ID", LANG["Indonesian"],                         "id_ID",      "Indonesia"),
+    ("it_IT", LANG["Italian"],                            "it_IT",      "Italiano"),
+    ("ko",    LANG["Korean"],                             "ko_KR",      "\ud55c\uad6d\uc5b4"),
+    ("lt_LT", LANG["Lithuanian"],                         "lt_LT",      "Lietuvi\u0173"),
+    ("lv_LV", LANG["Latvian"],                            "lv_LV",      "Latvie\u0161u"),
+    ("nb_NO", LANG["NorwegianBokmal"],                    "nb_NO",      "Norsk"),
+    ("nl_NL", LANG["Dutch"],                              "nl_NL",      "Nederlands"),
+    ("pl_PL", LANG["Polish"],                             "pl_PL",      "Polski"),
+    ("pt_BR", LANG["Portuguese"],                         "pt_BR",      "Portugu\u00eas (Brazil)"),
+    ("pt_PT", lwc(LANG["Portuguese"], COUNTRY["Portugal"]),    "pt_PT", "Portugu\u00eas"),
+    ("ro_RO", LANG["Romanian"],                           "ro_RO",      "Rom\u00e2n\u0103"),
+    ("ru_RU", LANG["Russian"],                            "ru_RU",      "\u0420\u0443\u0441\u0441\u043a\u0438\u0439"),
+    ("sk_SK", LANG["Slovak"],                             "sk_SK",      "Sloven\u010dina"),
+    ("sl_SI", LANG["Slovenian"],                          "sl_SI",      "Sloven\u0161\u010dina"),
+    ("sq",    LANG["Albanian"],                           "sq_AL",      "Shqip"),
+    ("sv_SE", LANG["Swedish"],                            "sv_SE",      "Svenska"),
+    ("ta_IN", LANG["Tamil"],                              "ta_IN",      "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd"),
+    ("tg_TG", LANG["Tajik"],                              "tg_TJ",      "\u0422\u043e\u04b7\u0438\u043a\u04e3"),
+    ("tr",    LANG["Turkish"],                            "tr_TR",      "T\u00fcrk\u00e7e"),
+    ("uk_UA", LANG["Ukrainian"],                          "uk_UA",      "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430"),
+    ("vi_VN", LANG["Vietnamese"],                         "vi_VN",      "Ti\u1ebfng Vi\u1ec7t"),
+    ("gl",    LANG["Galician"],                           "gl_ES",      "Galego"),
+    ("sr",    LANG["Serbian"],                            "sr_Cyrl_RS", "\u0421\u0440\u043f\u0441\u043a\u0438"),
+    # Afrikaans (af-ZA) and Faroese (fo-FO) are shipped by Chromium only
+    # as compiled .bdic — raw .dic/.aff are not checked in. Add them when
+    # an upstream Hunspell source is picked (LibreOffice, etc.).
+]
+
+
+def ensure_chromium_clone(cache_dir: Path) -> Path:
+    """Return path to a fresh shallow clone of Chromium's hunspell repo."""
+    clone = cache_dir / "hunspell_dictionaries"
+    if clone.exists() and (clone / ".git").exists():
+        print(f"  using existing clone at {clone}", flush=True)
+        try:
+            subprocess.run(
+                ["git", "-C", str(clone), "fetch", "--depth=1", "origin", "main"],
+                check=True, capture_output=True, text=True,
+            )
+            subprocess.run(
+                ["git", "-C", str(clone), "reset", "--hard", "FETCH_HEAD"],
+                check=True, capture_output=True, text=True,
+            )
+            return clone
+        except subprocess.CalledProcessError as e:
+            print(f"  refresh failed ({e.stderr.strip()}), recloning",
+                  flush=True)
+            shutil.rmtree(clone)
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    print(f"  cloning {CHROMIUM_REPO} (shallow) → {clone}", flush=True)
+    subprocess.run(
+        ["git", "clone", "--depth=1", CHROMIUM_REPO, str(clone)],
+        check=True,
+    )
+    return clone
+
+
+def read_chromium_file(clone: Path, stem: str, ext: str) -> bytes:
+    path = clone / f"{stem}.{ext}"
+    if not path.exists():
+        raise FileNotFoundError(f"{stem}.{ext} not found at chromium")
+    return path.read_bytes()
+
+
+# Chromium SET names → Python codec names when they differ.
+_PY_CODEC_ALIAS = {
+    "windows-1251": "cp1251",
+    "windows-1252": "cp1252",
+}
+
+
+def _parse_aff_charset(aff: bytes) -> str:
+    """Return the SET charset declared in an .aff file. Default per Hunspell
+    docs is ISO-8859-1 when SET is absent."""
+    for raw in aff.splitlines():
+        line = raw.strip()
+        if line.startswith(b"\xef\xbb\xbf"):  # BOM
+            line = line[3:].strip()
+        if line.startswith(b"SET "):
+            return line[4:].strip().decode("ascii", errors="replace").strip()
+    return "ISO-8859-1"
+
+
+def _normalize_charset_name(name: str) -> str:
+    return name.upper().replace("_", "-").replace(" ", "")
+
+
+def normalize_to_utf8(dic: bytes, aff: bytes) -> tuple[bytes, bytes]:
+    """Decode dic/aff using the .aff SET charset and re-emit both as UTF-8,
+    rewriting (or inserting) the SET line so Hunspell reports utf-8 at runtime.
+    Idempotent when input is already UTF-8."""
+    charset = _parse_aff_charset(aff)
+    normalized = _normalize_charset_name(charset)
+    if normalized in ("UTF-8", "UTF8"):
+        return dic, aff
+    codec = _PY_CODEC_ALIAS.get(charset, charset)
+    try:
+        dic_text = dic.decode(codec)
+        aff_text = aff.decode(codec)
+    except (LookupError, UnicodeDecodeError) as e:
+        raise RuntimeError(
+            f"cannot decode dictionary as {charset!r}: {e}") from None
+
+    pattern = re.compile(r"^SET\s+\S+\s*$", re.MULTILINE)
+    if pattern.search(aff_text):
+        aff_text = pattern.sub("SET UTF-8", aff_text, count=1)
+    else:
+        aff_text = "SET UTF-8\n" + aff_text
+    return dic_text.encode("utf-8"), aff_text.encode("utf-8")
+
+
+def make_zip(qt_name: str, dic: bytes, aff: bytes) -> bytes:
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as z:
+        z.writestr(f"{qt_name}.dic", dic)
+        z.writestr(f"{qt_name}.aff", aff)
+    return buf.getvalue()
+
+
+def sha256_pair(dic: bytes, aff: bytes) -> str:
+    h = hashlib.sha256()
+    h.update(b"dic:")
+    h.update(dic)
+    h.update(b"aff:")
+    h.update(aff)
+    return h.hexdigest()
+
+
+def bot_call(token: str, method: str, *, data=None, files=None, json_body=None):
+    url = BOT_API.format(token=token, method=method)
+    for attempt in range(5):
+        if json_body is not None:
+            r = requests.post(url, json=json_body, timeout=120)
+        else:
+            r = requests.post(url, data=data, files=files, timeout=300)
+        if r.status_code == 429:
+            wait = r.json().get("parameters", {}).get("retry_after", 5)
+            print(f"  rate-limited, sleeping {wait}s", flush=True)
+            time.sleep(wait + 1)
+            continue
+        try:
+            body = r.json()
+        except ValueError:
+            r.raise_for_status()
+            raise
+        if r.ok and body.get("ok"):
+            return body["result"]
+        raise RuntimeError(
+            f"Bot API {method} failed ({r.status_code}): {body}"
+        )
+    raise RuntimeError(f"Bot API {method}: too many retries")
+
+
+def bot_send_document(token, chat_id, filename, blob):
+    result = bot_call(
+        token,
+        "sendDocument",
+        data={"chat_id": chat_id, "disable_notification": "true"},
+        files={"document": (filename, blob, "application/zip")},
+    )
+    return result["message_id"], result["document"]["file_size"]
+
+
+def bot_edit_message_text(token, chat_id, message_id, text):
+    try:
+        bot_call(
+            token,
+            "editMessageText",
+            json_body={
+                "chat_id": chat_id,
+                "message_id": message_id,
+                "text": text,
+            },
+        )
+    except RuntimeError as e:
+        if "message is not modified" in str(e):
+            print("manifest post unchanged, skip edit", flush=True)
+            return
+        raise
+
+
+def bot_send_placeholder(token, chat_id):
+    result = bot_call(
+        token,
+        "sendMessage",
+        json_body={
+            "chat_id": chat_id,
+            "text": "{}",
+            "disable_notification": True,
+        },
+    )
+    return result["message_id"]
+
+
+def load_state(path: Optional[Path]) -> dict:
+    if path and path.exists():
+        return json.loads(path.read_text())
+    return {}
+
+
+def save_state(path: Optional[Path], state: dict) -> None:
+    if not path:
+        return
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(state, ensure_ascii=False, indent=2) + "\n")
+
+
+_DEFAULT_CLIENT_SOURCE = (
+    Path(__file__).resolve().parent.parent
+    / "SourceFiles" / "chat_helpers" / "spellchecker_common.cpp"
+)
+
+
+def _client_channel_username(channel: str) -> Optional[str]:
+    stripped = str(channel).lstrip("@").lstrip("+")
+    if not stripped or stripped[0] == "-" or stripped.isdigit():
+        return None
+    return stripped
+
+
+def patch_client_source(
+    path: Path,
+    channel_username: Optional[str],
+    post_id: int,
+) -> None:
+    text = path.read_text(encoding="utf-8")
+    original = text
+    if channel_username is not None:
+        text, n = re.subn(
+            r'(constexpr auto kDictionariesManifestChannel\s*=\s*)'
+            r'"[^"]*"(_cs\s*;)',
+            lambda m: f'{m.group(1)}"{channel_username}"{m.group(2)}',
+            text,
+            count=1,
+        )
+        if n == 0:
+            raise RuntimeError(
+                f"patch: kDictionariesManifestChannel not found in {path}")
+    text, n = re.subn(
+        r'(constexpr auto kDictionariesManifestPostId\s*=\s*)\d+(\s*;)',
+        lambda m: f'{m.group(1)}{post_id}{m.group(2)}',
+        text,
+        count=1,
+    )
+    if n == 0:
+        raise RuntimeError(
+            f"patch: kDictionariesManifestPostId not found in {path}")
+    if text == original:
+        print(f"  {path}: constants already up to date", flush=True)
+        return
+    path.write_text(text, encoding="utf-8")
+    parts = [f"postId={post_id}"]
+    if channel_username is not None:
+        parts.append(f"channel={channel_username}")
+    print(f"  patched {path}: {', '.join(parts)}", flush=True)
+
+
+def format_manifest(entries: list[dict]) -> str:
+    # One entry per line for readable diffs and to keep message size small
+    # enough for editMessageText (4096-char limit).
+    lines = ['{"version":1,"dictionaries":[']
+    for i, e in enumerate(entries):
+        sep = "" if i == len(entries) - 1 else ","
+        lines.append(json.dumps(e, ensure_ascii=False, sort_keys=True) + sep)
+    lines.append("]}")
+    return "\n".join(lines)
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--bot-token", default=os.environ.get("TG_BOT_TOKEN"),
+                    help="Bot API token (or via TG_BOT_TOKEN env)")
+    ap.add_argument("--channel", required=True,
+                    help="@username or numeric chat_id of the channel"
+                         " that holds the manifest post (sendMessage /"
+                         " editMessageText target)")
+    ap.add_argument("--blobs-channel", default=None,
+                    help="@username of the channel blobs are uploaded"
+                         " into via sendDocument; defaults to --channel"
+                         " with any leading @ stripped. Must be a public"
+                         " username — clients resolve locations by it")
+    ap.add_argument("--manifest-post-id", type=int, default=None,
+                    help="reuse this message_id; if omitted, sends a new"
+                         " placeholder first and uses its id (prints so"
+                         " you can hardcode it in the client)")
+    ap.add_argument("--state-file", type=Path, default=None,
+                    help="path to persist sha/post_id/size per language"
+                         " for incremental re-uploads")
+    ap.add_argument("--languages", default="",
+                    help="comma-separated chromium stems to restrict to")
+    ap.add_argument("--dry-run", action="store_true",
+                    help="fetch and zip but do not upload, edit, or clean")
+    ap.add_argument("--cache-dir", type=Path,
+                    default=Path(".chromium_hunspell_cache"),
+                    help="directory for the shallow chromium clone")
+    ap.add_argument("--keep-cache", action="store_true",
+                    help="keep --cache-dir after completion (default:"
+                         " delete the chromium clone when done)")
+    ap.add_argument("--client-source", type=Path,
+                    default=_DEFAULT_CLIENT_SOURCE,
+                    help="path to spellchecker_common.cpp; after a"
+                         " successful manifest edit the script rewrites"
+                         " kDictionariesManifestChannel and"
+                         " kDictionariesManifestPostId in place")
+    ap.add_argument("--skip-client-patch", action="store_true",
+                    help="do not rewrite manifest constants in"
+                         " --client-source")
+    args = ap.parse_args()
+
+    if not args.dry_run and not args.bot_token:
+        sys.exit("error: --bot-token or TG_BOT_TOKEN env required")
+
+    blobs_target = args.blobs_channel or args.channel
+    blobs_username = str(blobs_target).lstrip("@").lstrip("+")
+    if not blobs_username or blobs_username.startswith("-"):
+        sys.exit("error: blobs channel must be @username (clients resolve"
+                 " locations by public username, not chat_id). Pass"
+                 " --blobs-channel @name when --channel is numeric.")
+
+    manifest_post_id = args.manifest_post_id
+    if manifest_post_id is None and not args.dry_run:
+        manifest_post_id = bot_send_placeholder(
+            args.bot_token, args.channel)
+        print(f"created manifest placeholder, message_id="
+              f"{manifest_post_id}", flush=True)
+        print(f"hardcode in client: kDictionariesManifestPostId = "
+              f"{manifest_post_id}", flush=True)
+
+    def location(post_id: int) -> str:
+        return f"{blobs_username}#{post_id}"
+
+    # Bot uploads go to --blobs-channel (the @username derived above),
+    # which may or may not equal --channel. Use the args.blobs-channel
+    # value if supplied, otherwise fall back to --channel as-is.
+    blobs_chat = args.blobs_channel or args.channel
+
+    filter_set = {s for s in args.languages.split(",") if s}
+    state = load_state(args.state_file)
+    manifest_entries = []
+
+    clone = ensure_chromium_clone(args.cache_dir)
+
+    for stem, lang_id, qt_name, display in LANGUAGES:
+        if filter_set and stem not in filter_set:
+            prev = state.get(stem)
+            if prev:
+                manifest_entries.append({
+                    "id": lang_id,
+                    "name": display,
+                    "location": location(prev["post_id"]),
+                    "size": prev["size"],
+                })
+            continue
+
+        print(f"[{stem} → {qt_name}]", flush=True)
+        try:
+            dic_raw = read_chromium_file(clone, stem, "dic")
+            aff_raw = read_chromium_file(clone, stem, "aff")
+        except FileNotFoundError as e:
+            print(f"  skip: {e}", flush=True)
+            continue
+        try:
+            dic, aff = normalize_to_utf8(dic_raw, aff_raw)
+        except RuntimeError as e:
+            print(f"  skip: {e}", flush=True)
+            continue
+        if dic is not dic_raw:
+            print(f"  recoded to UTF-8 from "
+                  f"{_parse_aff_charset(aff_raw)}", flush=True)
+        digest = sha256_pair(dic, aff)
+
+        prev = state.get(stem)
+        if (prev
+                and prev.get("sha256") == digest
+                and prev.get("qt_name") == qt_name
+                and not args.dry_run):
+            print(f"  unchanged (sha {digest[:8]}), carrying postId="
+                  f"{prev['post_id']}", flush=True)
+            manifest_entries.append({
+                "id": lang_id,
+                "name": display,
+                "location": location(prev["post_id"]),
+                "size": prev["size"],
+            })
+            continue
+
+        blob = make_zip(qt_name, dic, aff)
+        print(f"  zipped: dic={len(dic):,}  aff={len(aff):,}  "
+              f"zip={len(blob):,}", flush=True)
+
+        if args.dry_run:
+            manifest_entries.append({
+                "id": lang_id,
+                "name": display,
+                "location": location(prev["post_id"] if prev else 0),
+                "size": len(blob),
+            })
+            continue
+
+        post_id, size = bot_send_document(
+            args.bot_token, blobs_chat, qt_name, blob)
+        print(f"  uploaded: postId={post_id} size={size}", flush=True)
+
+        state[stem] = {
+            "sha256": digest,
+            "post_id": post_id,
+            "size": size,
+            "qt_name": qt_name,
+        }
+        manifest_entries.append({
+            "id": lang_id,
+            "name": display,
+            "location": location(post_id),
+            "size": size,
+        })
+
+    manifest_text = format_manifest(manifest_entries)
+    print(f"\nmanifest: {len(manifest_entries)} entries, "
+          f"{len(manifest_text):,} chars", flush=True)
+
+    if args.dry_run:
+        print("--- manifest (dry-run) ---")
+        print(manifest_text)
+        return
+
+    bot_edit_message_text(
+        args.bot_token, args.channel,
+        manifest_post_id, manifest_text)
+    print(f"manifest post {manifest_post_id} updated", flush=True)
+
+    if not args.skip_client_patch:
+        channel_username = _client_channel_username(args.channel)
+        if channel_username is None:
+            print(f"  --channel {args.channel!r} is not a @username;"
+                  f" updating only kDictionariesManifestPostId in"
+                  f" {args.client_source}", flush=True)
+        patch_client_source(
+            args.client_source, channel_username, manifest_post_id)
+
+    save_state(args.state_file, state)
+
+    if not args.keep_cache and args.cache_dir.exists():
+        print(f"removing {args.cache_dir}", flush=True)
+        shutil.rmtree(args.cache_dir)
+
+
+if __name__ == "__main__":
+    main()