Support for languages using Diacritics as multi-codepoint joiners (#11806)

* Diacritics support - achieving 1-Char-Per-Glyph via a fake alphabet

* Diacritics support - Redesign state engine and polishing

* Diacritics support - Unit test

* Diacritics support - Expand unit test to cover more cases

* Diacritics support - Expand unit test to cover more cases

* Clarify a function name

* Change format of diacritic definitions

* Refactor DiacriticSupport to per-language class with statics in Companion

* Update DiacriticSupport to use CharCategory and enable support of surrogate pairs

* Documentation
This commit is contained in:
SomeTroglodyte 2024-06-29 22:39:46 +02:00 committed by GitHub
parent 0f2a697ba6
commit b5622df92d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 432 additions and 32 deletions

View File

@ -9,6 +9,15 @@
# Don't translate these words to your language, only put 'true' or 'false'. # Don't translate these words to your language, only put 'true' or 'false'.
StartWithCapitalLetter = StartWithCapitalLetter =
# Diacritics Support:
# See https://yairm210.github.io/Unciv/Other/Translating/#diacritics-support for details!
# Most languages will not need these, feel free to ignore, or use "" to avoid the "requires translation" mark. Do NOT translate the "key" to the left of the equals sign!
diacritics_support =
unicode_block_start_character =
unicode_block_end_character =
left_joining_diacritics =
right_joining_diacritics =
left_and_right_joiners =
# Fastlane # Fastlane
# These will be automatically copied to the fastlane descriptions used by F-Droid. Their keys are not as usual the english original, please read those directly as linked. # These will be automatically copied to the fastlane descriptions used by F-Droid. Their keys are not as usual the english original, please read those directly as linked.

View File

@ -90,9 +90,9 @@ class AndroidFont : FontImplementation {
return paint.textSize.toInt() return paint.textSize.toInt()
} }
override fun getCharPixmap(char: Char): Pixmap { override fun getCharPixmap(symbolString: String): Pixmap {
val metric = getMetrics() // Use our interpretation instead of paint.fontMetrics because it fixes some bad metrics val metric = getMetrics() // Use our interpretation instead of paint.fontMetrics because it fixes some bad metrics
var width = paint.measureText(char.toString()).toInt() var width = paint.measureText(symbolString).toInt()
var height = ceil(metric.height).toInt() var height = ceil(metric.height).toInt()
if (width == 0) { if (width == 0) {
height = getFontSize() height = getFontSize()
@ -101,7 +101,7 @@ class AndroidFont : FontImplementation {
val bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888) val bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888)
val canvas = Canvas(bitmap) val canvas = Canvas(bitmap)
canvas.drawText(char.toString(), 0f, metric.leading + metric.ascent + 1f, paint) canvas.drawText(symbolString, 0f, metric.leading + metric.ascent + 1f, paint)
val pixmap = Pixmap(width, height, Pixmap.Format.RGBA8888) val pixmap = Pixmap(width, height, Pixmap.Format.RGBA8888)
val data = IntArray(width * height) val data = IntArray(width * height)

View File

@ -7,10 +7,12 @@ import com.unciv.models.ruleset.RulesetCache
import com.unciv.models.ruleset.unique.Unique import com.unciv.models.ruleset.unique.Unique
import com.unciv.models.stats.Stat import com.unciv.models.stats.Stat
import com.unciv.models.stats.Stats import com.unciv.models.stats.Stats
import com.unciv.ui.components.fonts.DiacriticSupport
import com.unciv.ui.components.fonts.FontRulesetIcons import com.unciv.ui.components.fonts.FontRulesetIcons
import com.unciv.utils.Log import com.unciv.utils.Log
import com.unciv.utils.debug import com.unciv.utils.debug
import java.util.Locale import java.util.Locale
import org.jetbrains.annotations.VisibleForTesting
/** /**
* This collection holds all translations for the game. * This collection holds all translations for the game.
@ -41,7 +43,6 @@ class Translations : LinkedHashMap<String, TranslationEntry>() {
// used by tr() whenever GameInfo not initialized (allowing new game screen to use mod translations) // used by tr() whenever GameInfo not initialized (allowing new game screen to use mod translations)
var translationActiveMods = LinkedHashSet<String>() var translationActiveMods = LinkedHashSet<String>()
/** /**
* Searches for the translation entry of a given [text] for a given [language]. * Searches for the translation entry of a given [text] for a given [language].
* This includes translations provided by mods from [activeMods] * This includes translations provided by mods from [activeMods]
@ -118,22 +119,27 @@ class Translations : LinkedHashMap<String, TranslationEntry>() {
debug("Loading translation file for %s - %sms", language, System.currentTimeMillis() - translationStart) debug("Loading translation file for %s - %sms", language, System.currentTimeMillis() - translationStart)
} }
private fun createTranslations(language: String, languageTranslations: HashMap<String,String>) { @VisibleForTesting
for (translation in languageTranslations) { fun createTranslations(language: String, languageTranslations: HashMap<String, String>) {
val hashKey = if (translation.key.contains('[') && !translation.key.contains('<')) val diacriticSupport = DiacriticSupport(languageTranslations).takeIf { it.isEnabled() }
translation.key.getPlaceholderText() for ((key, value) in languageTranslations) {
else translation.key val hashKey = if (key.contains('[') && !key.contains('<'))
key.getPlaceholderText()
else key
var entry = this[hashKey] var entry = this[hashKey]
if (entry == null) { if (entry == null) {
entry = TranslationEntry(translation.key) entry = TranslationEntry(key)
this[hashKey] = entry this[hashKey] = entry
} }
entry[language] = translation.value entry[language] = diacriticSupport?.remapDiacritics(value) ?: value
} }
} }
fun tryReadTranslationForCurrentLanguage() { fun tryReadTranslationForCurrentLanguage() {
DiacriticSupport.reset()
tryReadTranslationForLanguage(UncivGame.Current.settings.language) tryReadTranslationForLanguage(UncivGame.Current.settings.language)
DiacriticSupport.freeTranslationData()
} }
/** Get a list of supported languages for [readAllLanguagesTranslation] */ /** Get a list of supported languages for [readAllLanguagesTranslation] */
@ -174,9 +180,11 @@ class Translations : LinkedHashMap<String, TranslationEntry>() {
val translationStart = System.currentTimeMillis() val translationStart = System.currentTimeMillis()
DiacriticSupport.reset()
for (language in getLanguagesWithTranslationFile()) { for (language in getLanguagesWithTranslationFile()) {
tryReadTranslationForLanguage(language) tryReadTranslationForLanguage(language)
} }
DiacriticSupport.freeTranslationData()
debug("Loading translation files - %sms", System.currentTimeMillis() - translationStart) debug("Loading translation files - %sms", System.currentTimeMillis() - translationStart)
} }

View File

@ -0,0 +1,251 @@
package com.unciv.ui.components.fonts
import com.unciv.utils.Log
import org.jetbrains.annotations.VisibleForTesting
/**
* ## An engine to support languages with heavy diacritic usage through Gdx Scene2D
*
* ### Concepts
* - This is not needed for diacritics where Unicode already defines the combined glyphs as individual codepoints
* - Gdx text rendering assumes one Char one Glyph (and left-to-right)
* - The underlying OS **does** have the capability to render glyphs created by combining diacritic joiners with other characters (if not, this fails with ugly output but hopefully no exceptions).
* - We'll deal with one glyph at a time arranges left to right, and expect a finite number of combination glyphs (all fit into the Unicode Private Use Area **together** with [FontRulesetIcons]).
* - We'll recognize these combos in the translated texts at translation loading time and map each combo into a fake alphabet, which fulfills the "one Char one Glyph" tenet.
* - Conversely, the loader will build a map of distinct combinations -codepoint sequences- that map into a single glyph and correlate each with their fake alphabet codepoint.
* - At render time, only the map of fake alphabet codepoints to their original codepoint sequences is needed.
* * Remember, NativeBitmapFontData renders and caches glyphs on demand
* * GlyphLayout (a Gdx class) needs a Glyph that's not yet cached, then:
* * If it's in the fake alphabet, we'll ask the OS to render the original codepoint sequence instead
* * Otherwise render the single Char as before
*
* ### Usage
* - Call [reset] when translation loading starts over
* - Instantiate [DiacriticSupport] through the constructor-like factory [invoke] once a translation file is read (their map of key (left of =) to translation (right of =) is in memory, pass that as argument)
* - Check [isEnabled] - if false, the rest of that language load need not bother with diacritics
* - Call [remapDiacritics] on each translation and store the result instead of the original value
* - If you wish to save some memory, call [freeTranslationData] after all required languages are done
* - Later, [NativeBitmapFontData.createAndCacheGlyph] will use [getStringFor] to map the fake alphabet back to codepoint sequences
*
* ### Notes
* - [FontRulesetIcons] initialize ***after*** Translation loading. If this ever changes, this might need some tweaking.
* - The primary constructor is only used from the [Companion.invoke] factory and for testing.
*/
class DiacriticSupport(
private val enabled: Boolean = false,
range: CharRange,
leftDiacritics: String,
rightDiacritics: String,
joinerDiacritics: String
) {
private object TranslationKeys {
const val enable = "diacritics_support"
const val rangeStart = "unicode_block_start_character"
const val rangeEnd = "unicode_block_end_character"
const val left = "left_joining_diacritics"
const val right = "right_joining_diacritics"
const val joiner = "left_and_right_joiners"
}
companion object {
/** Start at end of Unicode Private Use Area and go down from there: UShort is the preferred Char() constructor parameter! */
private const val startingReplacementCodepoint: UShort = 63743u // 0xF8FF
/** Use stdlib CharCategory to determine which codepoints represent combining diacritical marks.
* We're defaulting all punctuation, currency & other symbols, and nonprinting to None,
* meaning they won't combine even when followed by a diacritic. */
private val charCategoryToClass = mapOf(
CharCategory.UPPERCASE_LETTER to CharClass.Base,
CharCategory.LOWERCASE_LETTER to CharClass.Base,
CharCategory.TITLECASE_LETTER to CharClass.Base,
CharCategory.OTHER_LETTER to CharClass.Base,
CharCategory.MODIFIER_LETTER to CharClass.Base,
CharCategory.DECIMAL_DIGIT_NUMBER to CharClass.Base,
CharCategory.LETTER_NUMBER to CharClass.Base,
CharCategory.OTHER_NUMBER to CharClass.Base,
CharCategory.COMBINING_SPACING_MARK to CharClass.LeftJoiner,
CharCategory.NON_SPACING_MARK to CharClass.LeftJoiner,
CharCategory.ENCLOSING_MARK to CharClass.LeftJoiner,
CharCategory.SURROGATE to CharClass.Surrogate
)
private const val defaultRangeStart = '\u0021'
private const val defaultRangeEnd = '\uFFEE'
private var nextFreeDiacriticReplacementCodepoint = startingReplacementCodepoint
private val fakeAlphabet = mutableMapOf<Char, String>()
private val inverseMap = mutableMapOf<String, Char>()
/** Prepares this for a complete start-over, expecting a language load to instantiate a DiacriticSupport next */
fun reset() {
fakeAlphabet.clear()
freeTranslationData()
nextFreeDiacriticReplacementCodepoint = startingReplacementCodepoint
}
/** This is the main engine for rendering text glyphs after the translation loader has filled up this `object`
* @param char The real or "fake alphabet" char stored by [remapDiacritics] to render
* @return The one to many (probably 8 max) codepoint string to be rendered into a single glyph by native font services
*/
fun getStringFor(char: Char) = fakeAlphabet[char] ?: char.toString()
/** Call when use of [remapDiacritics] is finished to save some memory */
fun freeTranslationData() {
for ((length, examples) in inverseMap.keys.groupBy { it.length }.toSortedMap()) {
Log.debug("Length %d - example %s", length, examples.first())
}
inverseMap.clear()
}
/** Other "fake" alphabets can use Unicode Private Use Areas from U+E000 up to including... */
fun getCurrentFreeCode() = Char(nextFreeDiacriticReplacementCodepoint)
/** If this is true, no need to bother [remapping chars at render time][getStringFor] */
fun isEmpty() = fakeAlphabet.isEmpty()
/** Factory that gets the primary constructor parameters by extracting the translation entries for [TranslationKeys] */
operator fun invoke(translations: HashMap<String, String>): DiacriticSupport {
val stripCommentRegex = """^"?(.*?)"?(?:\s*#.*)?$""".toRegex()
fun String?.parseDiacriticEntry(): String {
if (isNullOrEmpty()) return ""
val tokens = stripCommentRegex.matchEntire(this)!!.groupValues[1].splitToSequence(' ').toMutableList()
for (index in tokens.indices) {
val token = tokens[index]
when {
token.length == 1 -> continue
token.startsWith("u+", true) -> tokens[index] = Char(token.drop(2).toInt(16)).toString()
tokens.size == 1 -> continue
else -> throw IllegalArgumentException("Invalid diacritic definition: \"$token\" is not a single character or unicode codepoint notation")
}
}
return tokens.joinToString("")
}
val enable = translations[TranslationKeys.enable].parseDiacriticEntry() == "true"
val rangeStart = translations[TranslationKeys.rangeStart].parseDiacriticEntry()
val rangeEnd = translations[TranslationKeys.rangeEnd].parseDiacriticEntry()
val range = if (rangeStart.isEmpty() || rangeEnd.isEmpty()) CharRange.EMPTY
else rangeStart.first()..rangeEnd.first()
val leftDiacritics = translations[TranslationKeys.left].parseDiacriticEntry()
val rightDiacritics = translations[TranslationKeys.right].parseDiacriticEntry()
val joinerDiacritics = translations[TranslationKeys.joiner].parseDiacriticEntry()
return DiacriticSupport(enable, range, leftDiacritics, rightDiacritics, joinerDiacritics)
}
}
private val charClassMap = mutableMapOf<Char, CharClass>()
/** Holds all information to process a single translation line and replace diacritic combinations with fake alphabet codepoints */
private inner class LineData(capacity: Int) {
val output = StringBuilder(capacity)
val accumulator = StringBuilder(9) // touhidurrr said there can be nine
var waitingHighSurrogate = Char.MIN_VALUE
fun expectsJoin() = accumulator.isNotEmpty() && getCharClass(accumulator.last()).expectsRightJoin
fun flush() {
if (accumulator.length <= 1) output.append(accumulator)
else output.append(getReplacementChar(accumulator.toString()))
accumulator.clear()
}
fun forbidWaitingHighSurrogate() {
if (waitingHighSurrogate != Char.MIN_VALUE)
throw IllegalArgumentException("Invalid Unicode: High surrogate without low surrogate")
}
fun accumulate(char: Char) {
forbidWaitingHighSurrogate()
accumulator.append(char)
}
fun flushAccumulate(char: Char) {
forbidWaitingHighSurrogate()
if (!expectsJoin()) flush()
accumulator.append(char)
}
fun flushAppend(char: Char) {
forbidWaitingHighSurrogate()
flush()
output.append(char)
}
fun surrogate(char: Char) {
if (char.isHighSurrogate()) {
forbidWaitingHighSurrogate()
waitingHighSurrogate = char
} else {
if (waitingHighSurrogate == Char.MIN_VALUE) throw IllegalArgumentException("Invalid Unicode: Low surrogate without high surrogate")
if (!expectsJoin()) flush()
accumulator.append(waitingHighSurrogate)
accumulator.append(char)
waitingHighSurrogate = Char.MIN_VALUE
}
}
fun result(): String {
flush()
return output.toString()
}
}
/** Represents a class of input character and its processing method when processing a translation line */
private enum class CharClass(val expectsRightJoin: Boolean = false) {
None {
override fun process(data: LineData, char: Char) = data.flushAppend(char)
},
Base {
override fun process(data: LineData, char: Char) = data.flushAccumulate(char)
},
LeftJoiner {
override fun process(data: LineData, char: Char) = data.accumulate(char)
},
RightJoiner(true) {
override fun process(data: LineData, char: Char) = data.flushAccumulate(char)
},
LeftRightJoiner(true) {
override fun process(data: LineData, char: Char) = data.accumulate(char)
},
Surrogate {
override fun process(data: LineData, char: Char) = data.surrogate(char)
};
abstract fun process(data: LineData, char: Char)
}
@VisibleForTesting
fun getKnownCombinations(): Set<String> = inverseMap.keys
/** Set at instatiation, if true the translation loader need not bother passing stuff through [remapDiacritics]. */
fun isEnabled() = enabled
private fun getCharClass(char: Char) = charClassMap[char] ?: CharClass.None
private fun getReplacementChar(joined: String) = inverseMap[joined] ?: createReplacementChar(joined)
private fun createReplacementChar(joined: String): Char {
val char = getCurrentFreeCode()
nextFreeDiacriticReplacementCodepoint--
if (nextFreeDiacriticReplacementCodepoint < FontRulesetIcons.UNUSED_CHARACTER_CODES_START.toUInt())
throw IllegalStateException("DiacriticsSupport has exhausted the Unicode private use area")
fakeAlphabet[char] = joined
inverseMap[joined] = char
return char
}
init {
if (enabled) {
val rangeStart = if (range.isEmpty()) defaultRangeStart else range.first
val rangeEnd = if (range.isEmpty()) defaultRangeEnd else range.last
for (char in rangeStart..rangeEnd)
charClassMap[char] = charCategoryToClass[char.category] ?: continue
for (char in leftDiacritics) charClassMap[char] = CharClass.LeftJoiner
for (char in rightDiacritics) charClassMap[char] = CharClass.RightJoiner
for (char in joinerDiacritics) charClassMap[char] = CharClass.LeftRightJoiner
}
}
/** Replaces the combos of diacritics/joiners with their affected characters with a "fake" alphabet */
fun remapDiacritics(value: String): String {
if (!enabled)
throw IllegalStateException("DiacriticSupport not set up properly for translation processing")
val data = LineData(value.length)
for (char in value) {
getCharClass(char).process(data, char)
}
return data.result()
}
}

View File

@ -6,7 +6,18 @@ import com.badlogic.gdx.graphics.g2d.BitmapFont
interface FontImplementation { interface FontImplementation {
fun setFontFamily(fontFamilyData: FontFamilyData, size: Int) fun setFontFamily(fontFamilyData: FontFamilyData, size: Int)
fun getFontSize(): Int fun getFontSize(): Int
fun getCharPixmap(char: Char): Pixmap
/** Why are we having two [getCharPixmap] overloads:
* - The Char one was used alone for a long time. We added the String one for Diacritic support - it still is meant to give one Glyph per input,
* but supports both single characters and short combos of diacritics with their target characters.
* - The desktop implementation currently uses (java.awt) metric.charWidth for the Char overload and metric.stringWidth for the String overload.
* - If there were a guarantee that these were always identical for a char and its toString(), then the Char overload would be redundant.
* - The author just wanted to make 100% sure **nothing** changes for non-Diacritic languages.
* - This could be tested with FasterUIDevelopment, as there the special Char overload is ignored.
* */
fun getCharPixmap(char: Char) = getCharPixmap(char.toString())
fun getCharPixmap(symbolString: String): Pixmap
fun getSystemFonts(): Sequence<FontFamilyData> fun getSystemFonts(): Sequence<FontFamilyData>
fun getBitmapFont(): BitmapFont { fun getBitmapFont(): BitmapFont {

View File

@ -81,7 +81,8 @@ class NativeBitmapFontData(
// Check alpha to guess whether this is a round icon // Check alpha to guess whether this is a round icon
// Needs to be done before disposing charPixmap, and we want to do that soon // Needs to be done before disposing charPixmap, and we want to do that soon
val assumeRoundIcon = charPixmap.guessIsRoundSurroundedByTransparency() val isFontRulesetIcon = ch.code >= FontRulesetIcons.UNUSED_CHARACTER_CODES_START && ch <= DiacriticSupport.getCurrentFreeCode()
val assumeRoundIcon = isFontRulesetIcon && charPixmap.guessIsRoundSurroundedByTransparency()
val rect = packer.pack(charPixmap) val rect = packer.pack(charPixmap)
charPixmap.dispose() charPixmap.dispose()
@ -89,7 +90,7 @@ class NativeBitmapFontData(
glyph.srcX = rect.x.toInt() glyph.srcX = rect.x.toInt()
glyph.srcY = rect.y.toInt() glyph.srcY = rect.y.toInt()
if (ch.code >= FontRulesetIcons.UNUSED_CHARACTER_CODES_START) if (isFontRulesetIcon)
glyph.setRulesetIconGeometry(assumeRoundIcon) glyph.setRulesetIconGeometry(assumeRoundIcon)
// If a page was added, create a new texture region for the incrementally added glyph. // If a page was added, create a new texture region for the incrementally added glyph.
@ -148,7 +149,9 @@ class NativeBitmapFontData(
} catch (_: Exception) { } catch (_: Exception) {
Pixmap(0, 0, Pixmap.Format.RGBA8888) // Empty space Pixmap(0, 0, Pixmap.Format.RGBA8888) // Empty space
} }
return fontImplementation.getCharPixmap(ch) if (DiacriticSupport.isEmpty())
return fontImplementation.getCharPixmap(ch)
return fontImplementation.getCharPixmap(DiacriticSupport.getStringFor(ch))
} }
override fun getGlyphs(run: GlyphLayout.GlyphRun, str: CharSequence, start: Int, end: Int, lastGlyph: BitmapFont.Glyph?) { override fun getGlyphs(run: GlyphLayout.GlyphRun, str: CharSequence, start: Int, end: Int, lastGlyph: BitmapFont.Glyph?) {

View File

@ -61,8 +61,12 @@ class DesktopFont : FontImplementation {
return font.size return font.size
} }
override fun getCharPixmap(char: Char): Pixmap { override fun getCharPixmap(char: Char) = getCharPixmapCommon(char.toString(), metric.charWidth(char))
var width = metric.charWidth(char)
override fun getCharPixmap(symbolString: String) = getCharPixmapCommon(symbolString, metric.stringWidth(symbolString))
private fun getCharPixmapCommon(symbolString: String, measuredWidth: Int): Pixmap {
var width = measuredWidth
var height = metric.height var height = metric.height
if (width == 0) { if (width == 0) {
// This happens e.g. for the Tab character // This happens e.g. for the Tab character
@ -75,7 +79,7 @@ class DesktopFont : FontImplementation {
g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON) g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON)
g.font = font g.font = font
g.color = Color.WHITE g.color = Color.WHITE
g.drawString(char.toString(), 0, metric.leading + metric.ascent) g.drawString(symbolString, 0, metric.leading + metric.ascent)
val pixmap = Pixmap(bi.width, bi.height, Pixmap.Format.RGBA8888) val pixmap = Pixmap(bi.width, bi.height, Pixmap.Format.RGBA8888)
val data = bi.getRGB(0, 0, bi.width, bi.height, null, 0, bi.width) val data = bi.getRGB(0, 0, bi.width, bi.height, null, 0, bi.width)

View File

@ -59,6 +59,46 @@ If any of the following steps are beyond your skillset, ask for help. All but th
- The first function of this entry is alphabetical sorting. Unfortunately, it is not easy to tell whether a specific combination is supported by Java. The simplest way to deal with this is trial and error - once your language is established and playable, see if Civilopedia entries seem properly sorted, if not, open an issue and tell us what _other_, more common language may have better sorting rules. - The first function of this entry is alphabetical sorting. Unfortunately, it is not easy to tell whether a specific combination is supported by Java. The simplest way to deal with this is trial and error - once your language is established and playable, see if Civilopedia entries seem properly sorted, if not, open an issue and tell us what _other_, more common language may have better sorting rules.
- This entry is also required to enable fastlane description upload to a correct folder - however, whether F-Droid supports your language is not guaranteed ([this page](https://f-droid.org/docs/Translation_and_Localization/) should help - but doesn't). - This entry is also required to enable fastlane description upload to a correct folder - however, whether F-Droid supports your language is not guaranteed ([this page](https://f-droid.org/docs/Translation_and_Localization/) should help - but doesn't).
## Diacritics support
When displaying text, the underlying libraries (libGdx and possibly lwjgl3/GWT) that Unciv uses assume one codepoint in the [UTF-16](https://en.wikipedia.org/wiki/UTF-16) representation corresponds to one rendered glyph,
which causes incorrect display of languages making heavy use of diacritics or of characters outside the [basic multilinguial plane](https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane) like most emoji.
A language file can activate a "trick", where combinations of codepoints that should render as one single glyph are mapped into a "fake alphabet",
which is created on the fly in the [Private Use Area](https://en.wikipedia.org/wiki/Private_Use_Areas) defined by Unicode.
To activate this feature, set `diacritics_support = true` in your translation. There are a few additional "settings" - translation lines where the "translation" is some control instruction instead.
All of these are optional, though your language may show glitches unless you define some. For example, Bangla _needs_ a definition for U+09CD, where the Unicode category does not fully define the required behaviour.
Each of the following definitions represents zero or more characters, and can simply list them as one string.
For readability, they can also be quoted (" surrounding the entire definition), characters can be separated by spaces, or you can use standard "U+xxxx" representations (these need space separators).
These entries, unlike the rest of a translation file, also support entry-specific comments: After the code(s), from a '#' to the end of the line.
Search for the information about the Unicode support in your language, e.g. on https://www.unicode.org/charts/ for information on which codes you might need to specify.
If your language does not need these, feel free to ignore, or use "" to avoid the "requires translation" mark.
### Limitations
- Consider this feature as being in an experimental stage.
- Can only work if the language's script still consists of individual glyphs rendered left to right.
- The underlying libraries (Java AWT or Android) must be able to render the combinations you need - sometimes you will need to select and possibly install a specific font to see the intended results.
- Using diacritics support incurs a performance penalty, but mostly when loading languages starting Unciv.
- The feature also has a "quantity" limitation from the size of the Unicode "Private Use Area" (only the one in the BMP can be used), and this must be shared by the feature whereby Unciv automatically displays ruleset object icons:
The total number of distinct diacritic "combinations" (or glyphs) your translation actually uses plus the number of objects in the loaded mods (or vanilla ruleset) must not exceed 6400.
- When enabled and the range is the default (or spans the unicode range for surrogates), then the engine will treat Unicode surrogate pairs correctly, assigning a fake alphabet codepoint for them and allowing diacritics to include them in a combo.
However, the parser is strict and throws an Exception on violations of the UTF-16 standard. If your translation crashes Unciv, check your editor for incorrect Unicode handling
(translation files are UTF-8 not UTF-16, but unfortunately most encoding converters allow transferring mismatched surrogate pairs).
Also, this possibility could so far not be successfully tested for emoji - no supporting font found, see "experimental".
### Settings (as translation entries in the language file)
- `diacritics_support`: This entry must be set to "true" for the diacritics support to work at all. Any other value will cause text to be passed through unchanged.
- `unicode_block_start_character` and `unicode_block_end_character`: These define the range of characters that should be considered. One character or code each. Defaults to the entire BMP range.
All characters in this range will be categorized, those undefined by Unicode, controls or punctuation, or those outside the range will pass through and reset the diacritics engine for the rest of the line, that is, pending potential combinations will be flushed.
Limiting this range - e.g. to the Unicode page dedicated to your language - is a performance optimization, but ultimately not required.
- `left_joining_diacritics`: Optionally define additional codes meant to join with the character to the left of them, by default the unicode categories "Mn" and "Mc" within the range described above are used.
- `right_joining_diacritics`: Optionally define additional codes meant to join with the character to the right of them, by default none.
- `left_and_right_joiners`: Optionally define additional codes meant to join with the character to the left AND with the character to the right, by default none
These are processed in listed order and can override previous categorizations per character codepoint.
Thus a code specified in `left_and_right_joiners` can be in the "Mn" unicode category, which would put it into the `left_joining_diacritics`, but will still work, because the later definition overrides the earlier one.
## Why not use a crowdsourcing translation website like <...>? ## Why not use a crowdsourcing translation website like <...>?
1. Testing. Currently, translations undergo a number of tests for verification. This allows some language changes to be accepted and others not, and it's all in the same platform with the same tests. External translation tools don't allow for this. 1. Testing. Currently, translations undergo a number of tests for verification. This allows some language changes to be accepted and others not, and it's all in the same platform with the same tests. External translation tools don't allow for this.

View File

@ -180,8 +180,8 @@ class FontDesktop : FontImplementation {
override fun getFontSize() = Fonts.ORIGINAL_FONT_SIZE.toInt() override fun getFontSize() = Fonts.ORIGINAL_FONT_SIZE.toInt()
override fun getCharPixmap(char: Char): Pixmap { override fun getCharPixmap(symbolString: String): Pixmap {
var width = metric.charWidth(char) var width = metric.stringWidth(symbolString)
var height = metric.height var height = metric.height
if (width == 0) { if (width == 0) {
height = Fonts.ORIGINAL_FONT_SIZE.toInt() height = Fonts.ORIGINAL_FONT_SIZE.toInt()
@ -193,7 +193,7 @@ class FontDesktop : FontImplementation {
g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON) g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON)
g.font = font g.font = font
g.color = java.awt.Color.WHITE g.color = java.awt.Color.WHITE
g.drawString(char.toString(), 0, metric.leading + metric.ascent) g.drawString(symbolString, 0, metric.leading + metric.ascent)
val pixmap = Pixmap(bi.width, bi.height, Pixmap.Format.RGBA8888) val pixmap = Pixmap(bi.width, bi.height, Pixmap.Format.RGBA8888)
val data = bi.getRGB(0, 0, bi.width, bi.height, null, 0, bi.width) val data = bi.getRGB(0, 0, bi.width, bi.height, null, 0, bi.width)

View File

@ -16,13 +16,14 @@ import com.unciv.models.translations.getPlaceholderText
import com.unciv.models.translations.squareBraceRegex import com.unciv.models.translations.squareBraceRegex
import com.unciv.models.translations.tr import com.unciv.models.translations.tr
import com.unciv.testing.GdxTestRunner import com.unciv.testing.GdxTestRunner
import com.unciv.testing.RedirectOutput
import com.unciv.testing.RedirectPolicy
import com.unciv.ui.components.fonts.DiacriticSupport
import com.unciv.utils.Log import com.unciv.utils.Log
import org.junit.Assert import org.junit.Assert
import org.junit.Before import org.junit.Before
import org.junit.Test import org.junit.Test
import org.junit.runner.RunWith import org.junit.runner.RunWith
import java.io.OutputStream
import java.io.PrintStream
@RunWith(GdxTestRunner::class) @RunWith(GdxTestRunner::class)
class TranslationTests { class TranslationTests {
@ -30,27 +31,23 @@ class TranslationTests {
private var ruleset = Ruleset() private var ruleset = Ruleset()
@Before @Before
// Since the ruleset and translation loader have their own output,
// We 'disable' the output stream for their outputs, and only enable it for the test itself.
@RedirectOutput(RedirectPolicy.Discard)
fun loadTranslations() { fun loadTranslations() {
// Since the ruleset and translation loader have their own output,
// We 'disable' the output stream for their outputs, and only enable it for the test itself.
val outputChannel = System.out
System.setOut(PrintStream(object : OutputStream() {
override fun write(b: Int) {}
}))
translations.readAllLanguagesTranslation() translations.readAllLanguagesTranslation()
RulesetCache.loadRulesets(noMods = true) RulesetCache.loadRulesets(noMods = true)
ruleset = RulesetCache.getVanillaRuleset() ruleset = RulesetCache.getVanillaRuleset()
System.setOut(outputChannel)
} }
@Test @Test
fun translationsLoad() { fun translationsLoad() {
Assert.assertTrue("This test will only pass there are translations", Assert.assertTrue("This test will only pass if there are translations",
translations.size > 0) translations.size > 0)
} }
// This test is incorrectly defined: it should read from the template.properties file and not fro the final translation files. // This test is incorrectly defined: it should read from the template.properties file and not from the final translation files.
// @Test // @Test
// fun allUnitActionsHaveTranslation() { // fun allUnitActionsHaveTranslation() {
// val actions: MutableSet<String> = HashSet() // val actions: MutableSet<String> = HashSet()
@ -325,6 +322,83 @@ class TranslationTests {
Assert.assertTrue(Stats.isStats(Stats(1f,2f,3f).toStringForNotifications())) Assert.assertTrue(Stats.isStats(Stats(1f,2f,3f).toStringForNotifications()))
} }
@Test
fun diacriticsSilentForEnglish() {
DiacriticSupport.reset()
val english = translations.values
.mapNotNull { entry ->
entry["English"]?.let { entry.entry to it }
}.toMap(HashMap())
val diacriticSupport = DiacriticSupport(english)
Assert.assertFalse(diacriticSupport.isEnabled())
}
@Test
fun diacriticsWorkForBangla() {
//todo This test was designed before the Bangla language was merged, and uses its own data.
// With the actual Bangla, the @before will already have loaded that, so there could be an additional, or a different test on the live one...
// Here's a helper to _generate_ the expected from the listOf:
// https://play.kotlinlang.org/#eyJ2ZXJzaW9uIjoiMi4wLjAiLCJwbGF0Zm9ybSI6ImphdmEiLCJhcmdzIjoiIiwibm9uZU1hcmtlcnMiOnRydWUsInRoZW1lIjoiaWRlYSIsImNvZGUiOiJwYWNrYWdlIHRvdWhpZHVycnJcblxuY29uc3QgdmFsIEJBTkdMQV9DSEFSU0VUX1NUQVJUID0gMHgwOTgwXG5jb25zdCB2YWwgQkFOR0xBX0NIQVJTRVRfRU5EID0gMHgwOWZmXG5cbnZhbCBCQU5HTEFfRElBQ1JJVElDUyA9IGxpc3RPZihcbiAgICAn4KaBJywgJ+CmgicsICfgpoMnLCAn4Ka8JyxcbiAgICAn4Ka+JywgJ+CmvycsICfgp4AnLCAn4KeBJyxcbiAgICAn4KeCJywgJ+CngycsICfgp4QnLCAn4KeHJyxcbiAgICAn4KeIJywgJ+CniycsICfgp4wnLCAn4KeNJyxcbiAgICAn4KeXJywgJ+CnoicsICfgp6MnLCAn4Ke+JyxcbilcblxuY29uc3QgdmFsIEJBTkdMQV9KT0lORVIgPSAn4KeNJ1xuXG5mdW4gaXNCYW5nbGFDaGFyKGNoOiBDaGFyKTogQm9vbGVhbiB7XG4gICAgcmV0dXJuIGNoLmNvZGUgPj0gQkFOR0xBX0NIQVJTRVRfU1RBUlQgJiYgY2guY29kZSA8PSBCQU5HTEFfQ0hBUlNFVF9FTkRcbn1cblxudmFsIGFsbFNlcXVlbmNlcyA9IG11dGFibGVTZXRPZjxTdHJpbmc+KClcblxuZnVuIG1haW4oKSB7XG4gICAgdmFsIGxpbmVzID0gbGlzdE9mKFxuICAgICAgICBcIuCmruCmvuCmqOCmmuCmv+CmpOCnjeCmsCDgprjgpq7gp43gpqrgpr7gpqbgppVcIiwgXCLgpqbgp4fgppbgp4HgpqhcIiwgXCLgpongp47gpqrgpqjgp43gpqgg4KaV4Kaw4KeB4KaoXCIsIFwi4KaG4KaC4Ka24Ka/4KaVXCIsIFwi4KaW4KeN4Kaw4Ka/4Ka34KeN4Kaf4Kaq4KeC4Kaw4KeN4KasXCIsIFwi4Ka44KaC4KaV4KeN4Ka34Ka/4Kaq4KeN4KakXCIsIFwi4Ka24KaV4KeN4Kak4Ka/XCIsIFwi4Ka34KeN4Kag4KeN4Kav4KeHXCJcbiAgICApXG5cbiAgICBsaW5lcy5mb3JFYWNoIHsgbGluZSAtPlxuICAgICAgICB2YWwgbGFzdFNlcXVlbmNlID0gU3RyaW5nQnVpbGRlcigpXG4gICAgICAgIGxpbmUuZm9yRWFjaCB7IGNoIC0+XG4gICAgICAgICAgICBpZiAoIWlzQmFuZ2xhQ2hhcihjaCkpIHtcbiAgICAgICAgICAgICAgICBpZiAobGFzdFNlcXVlbmNlLmlzTm90RW1wdHkoKSkge1xuICAgICAgICAgICAgICAgICAgICBhbGxTZXF1ZW5jZXMuYWRkKGxhc3RTZXF1ZW5jZS50b1N0cmluZygpKVxuICAgICAgICAgICAgICAgICAgICBsYXN0U2VxdWVuY2UuY2xlYXIoKVxuICAgICAgICAgICAgICAgIH1cbiAgICAgICAgICAgICAgICByZXR1cm5AZm9yRWFjaFxuICAgICAgICAgICAgfVxuXG4gICAgICAgICAgICBpZiAoQkFOR0xBX0RJQUNSSVRJQ1MuY29udGFpbnMoY2gpKSB7XG4gICAgICAgICAgICAgICAgbGFzdFNlcXVlbmNlLmFwcGVuZChjaClcbiAgICAgICAgICAgICAgICByZXR1cm5AZm9yRWFjaFxuICAgICAgICAgICAgfVxuXG4gICAgICAgICAgICBpZiAobGFzdFNlcXVlbmNlLmlzTm90RW1wdHkoKSAmJiBsYXN0U2VxdWVuY2UubGFzdCgpICE9IEJBTkdMQV9KT0lORVIpIHtcbiAgICAgICAgICAgICAgICBhbGxTZXF1ZW5jZXMuYWRkKGxhc3RTZXF1ZW5jZS50b1N0cmluZygpKVxuICAgICAgICAgICAgICAgIGxhc3RTZXF1ZW5jZS5jbGVhcigpXG4gICAgICAgICAgICB9XG4gICAgICAgICAgICBsYXN0U2VxdWVuY2UuYXBwZW5kKGNoKVxuICAgICAgICB9XG5cbiAgICAgICAgaWYgKGxhc3RTZXF1ZW5jZS5pc05vdEVtcHR5KCkpIHtcbiAgICAgICAgICAgIGFsbFNlcXVlbmNlcy5hZGQobGFzdFNlcXVlbmNlLnRvU3RyaW5nKCkpXG4gICAgICAgIH1cbiAgICB9XG5cbiAgICBwcmludGxuKGFsbFNlcXVlbmNlcy5maWx0ZXIgeyBpdC5sZW5ndGggPiAxIH0uam9pblRvU3RyaW5nKFwiXFxcIiwgXFxcIlwiLCBcInZhbCBleHBlY3RlZCA9IHNldE9mKFxcXCJcIiwgXCJcXFwiKVwiKSB7IGl0LmFzU2VxdWVuY2UoKS5qb2luVG9TdHJpbmcoKSB9KVxufSJ9
DiacriticSupport.reset()
val leftJoiningDiacritics = "ঁ ং ঃ ় া ি ী ু ূ ৃ ৄ ে ৈ ো ৌ ্ ৗ ৢ ৣ ৾".replace(" ", "")
val leftAndRightJoiners = ""
val diacriticSupport = DiacriticSupport(true, Char(0x0980U)..Char(0x09FDU), leftJoiningDiacritics, "", leftAndRightJoiners)
listOf(
"মানচিত্র সম্পাদক", "দেখুন", "উৎপন্ন করুন", "আংশিক", "খ্রিষ্টপূর্ব", "সংক্ষিপ্ত", "শক্তি", "ষ্ঠ্যে"
).forEach { diacriticSupport.remapDiacritics(it) }
val actual = diacriticSupport.getKnownCombinations()
val expected = setOf(
"ম, া", "চ, ি", "ত, ্, র", "ম, ্, প, া", "দ, ে", "খ, ু", "ন, ্, ন", "র, ু", "আ, ং", "শ, ি",
"খ, ্, র, ি", "ষ, ্, ট", "প, ূ", "র, ্, ব", "স, ং", "ক, ্, ষ, ি", "প, ্, ত", "ক, ্, ত, ি",
"ষ, ্, ঠ, ্, য, ে"
)
.map {
it.split(", ").joinToString("")
}.toSet()
Assert.assertEquals(expected, actual)
}
/** This test requires an actual translation for "Overview" from Bangla.properties, it passes silently without one.
* It's also dependent on the translation itself at the time of writing and will fail if a translator changes it.
*/
@Test
// @RedirectOutput(RedirectPolicy.Show) // has good visualization - comment in and run to see how the fake alphabet actually works
fun diacriticsTestBanglaRoundtrip() {
testRoundtrip("Bangla", "Overview", "সংক্ষিপ্ত বিবরণী")
}
@Test
fun testNonBasePlaneUnicode() {
translations.createTranslations("Test", hashMapOf("Test" to "Test\uD83D\uDC4D", "diacritics_support" to "true"))
testRoundtrip("Test", "Test", "Test\uD83D\uDC4D") { translated ->
val isOK = translated.startsWith("Test") && translated.length == 5 && translated.last() > DiacriticSupport.getCurrentFreeCode()
Assert.assertTrue("Translation with one emoji should have exactly one fake alphabet codepoint", isOK)
}
}
private fun testRoundtrip(language: String, term: String, input: String, additionalTest: ((String)->Unit)? = null) {
UncivGame.Current = UncivGame()
UncivGame.Current.settings = GameSettings()
UncivGame.Current.settings.language = language
for ((key, value) in translations)
UncivGame.Current.translations[key] = value
val translated = term.tr()
if (translated == term) return // No translation present, can't test
val output = translated.asIterable().joinToString("") { DiacriticSupport.getStringFor(it) }
fun Char.hex() = "U+" + code.toString(16).padStart(4, '0')
fun String.hex() = asIterable().joinToString(" ") { it.hex() }
fun String.literalAndHex() = "\"$this\" = ${hex()}"
val translatedHex = translated.asIterable().joinToString("; ") { it.hex() + " -> " + DiacriticSupport.getStringFor(it).literalAndHex() }
println("Mapping '$term' in $language to fake alphabet and back:\n\tinput: ${input.literalAndHex()}\n\ttranslated: $translatedHex\n\toutput: ${output.literalAndHex()}")
Assert.assertEquals(input, output)
additionalTest?.invoke(translated)
}
// @Test // @Test
// fun allConditionalsAreContainedInConditionalOrderTranslation() { // fun allConditionalsAreContainedInConditionalOrderTranslation() {