From 1f96e9b2e6c865c05e60520823446d5a5bf97fc9 Mon Sep 17 00:00:00 2001 From: Wadim Wawrzenczak Date: Wed, 20 May 2026 15:23:08 +0200 Subject: [PATCH 1/3] Implement consistent character replacement during pseudolocalization --- .../mojito/cli/command/PseudoLocCommand.java | 12 ++- .../l10n/mojito/rest/client/AssetClient.java | 11 +++ .../rest/entity/LocalizedAssetBody.java | 10 ++ .../l10n/mojito/okapi/PseudoLocalizeStep.java | 10 +- .../mojito/pseudoloc/PseudoLocalization.java | 95 ++++++++++++++----- .../box/l10n/mojito/rest/asset/AssetWS.java | 11 ++- .../mojito/rest/asset/LocalizedAssetBody.java | 11 +++ .../box/l10n/mojito/service/tm/TMService.java | 14 ++- .../pseudoloc/PseudoLocalizationTest.java | 47 +++++++++ 9 files changed, 193 insertions(+), 28 deletions(-) diff --git a/cli/src/main/java/com/box/l10n/mojito/cli/command/PseudoLocCommand.java b/cli/src/main/java/com/box/l10n/mojito/cli/command/PseudoLocCommand.java index 6a554c5948..2490f410e1 100644 --- a/cli/src/main/java/com/box/l10n/mojito/cli/command/PseudoLocCommand.java +++ b/cli/src/main/java/com/box/l10n/mojito/cli/command/PseudoLocCommand.java @@ -99,6 +99,15 @@ public class PseudoLocCommand extends Command { description = Param.DIR_PATH_EXCLUDE_PATTERNS_DESCRIPTION) List directoriesExcludePatterns = null; + @Parameter( + names = {"--substitute"}, + arity = 1, + required = false, + description = + "Character substitution mode: RANDOM picks a random diacritical replacement each time," + + " CONSISTENT always maps to the same replacement") + String substituteType; + @Autowired AssetClient assetClient; @Autowired CommandHelper commandHelper; @@ -192,7 +201,8 @@ LocalizedAssetBody getPseudoLocalizedAsset( assetByPathAndRepositoryId.getId(), assetContent, sourceFileMatch.getFileType().getFilterConfigIdOverride(), - filterOptions); + filterOptions, + substituteType); logger.trace("PseudoLocalizedAsset content = {}", pseudoLocalizedAsset.getContent()); return pseudoLocalizedAsset; diff --git a/restclient/src/main/java/com/box/l10n/mojito/rest/client/AssetClient.java b/restclient/src/main/java/com/box/l10n/mojito/rest/client/AssetClient.java index 8ca90a7086..dee06b9138 100644 --- a/restclient/src/main/java/com/box/l10n/mojito/rest/client/AssetClient.java +++ b/restclient/src/main/java/com/box/l10n/mojito/rest/client/AssetClient.java @@ -216,6 +216,16 @@ public LocalizedAssetBody getPseudoLocalizedAssetForContent( String content, FilterConfigIdOverride filterConfigIdOverride, List filterOptions) { + return getPseudoLocalizedAssetForContent( + assetId, content, filterConfigIdOverride, filterOptions, null); + } + + public LocalizedAssetBody getPseudoLocalizedAssetForContent( + Long assetId, + String content, + FilterConfigIdOverride filterConfigIdOverride, + List filterOptions, + String substituteType) { UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath(getBasePathForResource(assetId, "pseudo")); @@ -225,6 +235,7 @@ public LocalizedAssetBody getPseudoLocalizedAssetForContent( localizedAssetBody.setOutputBcp47tag(OUTPUT_BCP47_TAG); localizedAssetBody.setFilterConfigIdOverride(filterConfigIdOverride); localizedAssetBody.setFilterOptions(filterOptions); + localizedAssetBody.setSubstituteType(substituteType); return authenticatedRestTemplate.postForObject( uriBuilder.toUriString(), localizedAssetBody, LocalizedAssetBody.class); diff --git a/restclient/src/main/java/com/box/l10n/mojito/rest/entity/LocalizedAssetBody.java b/restclient/src/main/java/com/box/l10n/mojito/rest/entity/LocalizedAssetBody.java index d6983feed6..a2664885ac 100644 --- a/restclient/src/main/java/com/box/l10n/mojito/rest/entity/LocalizedAssetBody.java +++ b/restclient/src/main/java/com/box/l10n/mojito/rest/entity/LocalizedAssetBody.java @@ -66,6 +66,8 @@ public enum Status { Status status = Status.ALL; + String substituteType; + public Long getAssetId() { return assetId; } @@ -145,4 +147,12 @@ public String getPullRunName() { public void setPullRunName(String pullRunName) { this.pullRunName = pullRunName; } + + public String getSubstituteType() { + return substituteType; + } + + public void setSubstituteType(String substituteType) { + this.substituteType = substituteType; + } } diff --git a/webapp/src/main/java/com/box/l10n/mojito/okapi/PseudoLocalizeStep.java b/webapp/src/main/java/com/box/l10n/mojito/okapi/PseudoLocalizeStep.java index b6faed2acd..d17a6eced6 100644 --- a/webapp/src/main/java/com/box/l10n/mojito/okapi/PseudoLocalizeStep.java +++ b/webapp/src/main/java/com/box/l10n/mojito/okapi/PseudoLocalizeStep.java @@ -2,6 +2,7 @@ import com.box.l10n.mojito.entity.Asset; import com.box.l10n.mojito.pseudoloc.PseudoLocalization; +import com.box.l10n.mojito.pseudoloc.PseudoLocalization.SubstituteType; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckerFactory; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.TextUnitIntegrityChecker; import com.box.l10n.mojito.service.tm.TMTextUnitRepository; @@ -29,11 +30,17 @@ public class PseudoLocalizeStep extends BasePipelineStep { static Logger logger = LoggerFactory.getLogger(PseudoLocalizeStep.class); private Asset asset; + private SubstituteType substituteType; private LocaleId targetLocale; private Set textUnitIntegrityCheckers = new HashSet<>(); public PseudoLocalizeStep(Asset asset) { + this(asset, SubstituteType.RANDOM); + } + + public PseudoLocalizeStep(Asset asset, SubstituteType substituteType) { this.asset = asset; + this.substituteType = substituteType; } @Autowired IntegrityCheckerFactory integrityCheckerFactory; @@ -81,7 +88,8 @@ protected Event handleTextUnit(Event event) { if (textUnit.isTranslatable()) { String source = textUnitUtils.getSourceAsString(textUnit); String pseudoTranslation = - pseudoLocalization.convertStringToPseudoLoc(source, textUnitIntegrityCheckers); + pseudoLocalization.convertStringToPseudoLoc( + source, textUnitIntegrityCheckers, substituteType); textUnit.setTarget(targetLocale, new TextContainer(pseudoTranslation)); } diff --git a/webapp/src/main/java/com/box/l10n/mojito/pseudoloc/PseudoLocalization.java b/webapp/src/main/java/com/box/l10n/mojito/pseudoloc/PseudoLocalization.java index 2cc9335835..b38e2fe1dc 100644 --- a/webapp/src/main/java/com/box/l10n/mojito/pseudoloc/PseudoLocalization.java +++ b/webapp/src/main/java/com/box/l10n/mojito/pseudoloc/PseudoLocalization.java @@ -17,6 +17,11 @@ @Component public class PseudoLocalization { + public enum SubstituteType { + RANDOM, + CONSISTENT + } + /** Logger */ static Logger logger = LoggerFactory.getLogger(PseudoLocalization.class); @@ -87,15 +92,21 @@ public class PseudoLocalization { * @return pseudo localized string */ public String convertStringToPseudoLoc(String string, Set checkers) { + return convertStringToPseudoLoc(string, checkers, SubstituteType.RANDOM); + } + + public String convertStringToPseudoLoc( + String string, Set checkers, SubstituteType substituteType) { TextUnitIntegrityChecker checker = getIntegrityCheckerForPlaceholderProcessing(checkers); if (checker == null) { logger.debug("There is no checker for pseudolocalization placeholder processing."); - return convertStringToPseudoLoc(string); + return convertStringToPseudoLoc(string, substituteType); } else { logger.debug("Found checker for pseudolocalization placeholder processing."); LocalizableString localizableString = checker.extractNonLocalizableParts(string); - String pseudolocalized = convertStringToPseudoLoc(localizableString.getLocalizableString()); + String pseudolocalized = + convertStringToPseudoLoc(localizableString.getLocalizableString(), substituteType); localizableString.setLocalizableString(pseudolocalized); return checker.restoreNonLocalizableParts(localizableString); } @@ -108,10 +119,14 @@ public String convertStringToPseudoLoc(String string, Set convertAsciiToDiacriticsRandom(string); + case CONSISTENT -> convertAsciiToDiacriticsConsistent(string); + }; } /** - * Get a non ASCII character mapping to provided character or the character itself if there is no - * mapping - * - * @param character ASCII character to be mapped - * @return Non ASCII character or character itself + * Converts ASCII letters in the whole string into equivalent characters with accent/diacritics, + * selecting mapped characters consistently. This will always return the same mapped string for a + * given input. */ - private char getMappingCharFromMap(char character) { - char mappedChar = character; + private String convertAsciiToDiacriticsConsistent(String string) { + StringBuilder builder = new StringBuilder(); + + // keeps track of which mapped char we used last time + Map lastMappedIdx = new HashMap<>(); - String mappingCharsForChar = pseudoLocMap.get(mappedChar); + for (char character : string.toCharArray()) { + String mappingsForChar = pseudoLocMap.get(character); - if (mappingCharsForChar != null) { - int maxIndex = mappingCharsForChar.length() - 1; - int randomIndex = (int) (Math.random() * maxIndex); - mappedChar = mappingCharsForChar.charAt(randomIndex); + if (mappingsForChar == null) { + // don't replace if no mapping available + builder.append(character); + continue; + } + + // pick next mapped char (or go back to the beginning if used all of them) + int mappedIdx = (1 + lastMappedIdx.getOrDefault(character, -1)) % mappingsForChar.length(); + lastMappedIdx.put(character, mappedIdx); + char mappedCharacter = mappingsForChar.charAt(mappedIdx); + + builder.append(mappedCharacter); } + return builder.toString(); + } + + /** + * Converts ASCII letters in the whole string into equivalent characters with accent/diacritics, + * selecting mapped characters at random. This will return different string every time, even if + * input does not change. + */ + private String convertAsciiToDiacriticsRandom(String string) { + int stringLength = string.length(); + StringBuilder sb = new StringBuilder(); + + for (int i = 0; i < stringLength; i++) { + char character = string.charAt(i); + String mappingCharsForChar = pseudoLocMap.get(character); - return mappedChar; + if (mappingCharsForChar != null) { + int maxIndex = mappingCharsForChar.length() - 1; + int randomIndex = (int) (Math.random() * maxIndex); + character = mappingCharsForChar.charAt(randomIndex); + } + + sb.append(character); + } + return sb.toString(); } /** diff --git a/webapp/src/main/java/com/box/l10n/mojito/rest/asset/AssetWS.java b/webapp/src/main/java/com/box/l10n/mojito/rest/asset/AssetWS.java index c9d1c5fc9d..8b3e857d1a 100644 --- a/webapp/src/main/java/com/box/l10n/mojito/rest/asset/AssetWS.java +++ b/webapp/src/main/java/com/box/l10n/mojito/rest/asset/AssetWS.java @@ -11,6 +11,7 @@ import com.box.l10n.mojito.entity.TMXliff; import com.box.l10n.mojito.json.ObjectMapper; import com.box.l10n.mojito.okapi.asset.UnsupportedAssetFilterTypeException; +import com.box.l10n.mojito.pseudoloc.PseudoLocalization; import com.box.l10n.mojito.quartz.QuartzJobInfo; import com.box.l10n.mojito.quartz.QuartzPollableTaskScheduler; import com.box.l10n.mojito.rest.View; @@ -306,9 +307,17 @@ public LocalizedAssetBody getPseudoLocalizedAssetForContent( Asset asset = assetRepository.getOne(assetId); String normalizedContent = NormalizationUtils.normalize(localizedAssetBody.getContent()); + PseudoLocalization.SubstituteType substituteType = + localizedAssetBody.getSubstituteType() != null + ? localizedAssetBody.getSubstituteType() + : PseudoLocalization.SubstituteType.RANDOM; + String generateLocalized = tmService.generatePseudoLocalized( - asset, normalizedContent, localizedAssetBody.getFilterConfigIdOverride()); + asset, + normalizedContent, + localizedAssetBody.getFilterConfigIdOverride(), + substituteType); localizedAssetBody.setContent(generateLocalized); diff --git a/webapp/src/main/java/com/box/l10n/mojito/rest/asset/LocalizedAssetBody.java b/webapp/src/main/java/com/box/l10n/mojito/rest/asset/LocalizedAssetBody.java index 7e79958b14..7eb0b97a22 100644 --- a/webapp/src/main/java/com/box/l10n/mojito/rest/asset/LocalizedAssetBody.java +++ b/webapp/src/main/java/com/box/l10n/mojito/rest/asset/LocalizedAssetBody.java @@ -3,6 +3,7 @@ import com.box.l10n.mojito.okapi.FilterConfigIdOverride; import com.box.l10n.mojito.okapi.InheritanceMode; import com.box.l10n.mojito.okapi.Status; +import com.box.l10n.mojito.pseudoloc.PseudoLocalization; import java.util.List; /** @@ -50,6 +51,8 @@ public class LocalizedAssetBody { Status status = Status.ALL; + PseudoLocalization.SubstituteType substituteType; + public LocalizedAssetBody() {} public LocalizedAssetBody(String bcp47Tag, String content) { @@ -136,4 +139,12 @@ public String getPullRunName() { public void setPullRunName(String pullRunName) { this.pullRunName = pullRunName; } + + public PseudoLocalization.SubstituteType getSubstituteType() { + return substituteType; + } + + public void setSubstituteType(PseudoLocalization.SubstituteType substituteType) { + this.substituteType = substituteType; + } } diff --git a/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMService.java b/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMService.java index a5b830075a..1243eb844a 100644 --- a/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMService.java +++ b/webapp/src/main/java/com/box/l10n/mojito/service/tm/TMService.java @@ -41,6 +41,7 @@ import com.box.l10n.mojito.okapi.qualitycheck.QualityCheckStep; import com.box.l10n.mojito.okapi.steps.CheckForDoNotTranslateStep; import com.box.l10n.mojito.okapi.steps.FilterEventsToInMemoryRawDocumentStep; +import com.box.l10n.mojito.pseudoloc.PseudoLocalization; import com.box.l10n.mojito.quartz.QuartzJobInfo; import com.box.l10n.mojito.quartz.QuartzPollableTaskScheduler; import com.box.l10n.mojito.retry.DataIntegrityViolationExceptionRetryTemplate; @@ -1098,10 +1099,21 @@ void replaceUsedTmTextUnitVariantIds( public String generatePseudoLocalized( Asset asset, String content, FilterConfigIdOverride filterConfigIdOverride) throws UnsupportedAssetFilterTypeException { + return generatePseudoLocalized( + asset, content, filterConfigIdOverride, PseudoLocalization.SubstituteType.RANDOM); + } + + public String generatePseudoLocalized( + Asset asset, + String content, + FilterConfigIdOverride filterConfigIdOverride, + PseudoLocalization.SubstituteType substituteType) + throws UnsupportedAssetFilterTypeException { String bcp47tag = "en-x-psaccent"; - BasePipelineStep pseudoLocalizedStep = (BasePipelineStep) new PseudoLocalizeStep(asset); + BasePipelineStep pseudoLocalizedStep = + (BasePipelineStep) new PseudoLocalizeStep(asset, substituteType); return generateLocalizedBase( asset, content, filterConfigIdOverride, null, pseudoLocalizedStep, bcp47tag); } diff --git a/webapp/src/test/java/com/box/l10n/mojito/pseudoloc/PseudoLocalizationTest.java b/webapp/src/test/java/com/box/l10n/mojito/pseudoloc/PseudoLocalizationTest.java index 70b370b5b8..c583d5e3de 100644 --- a/webapp/src/test/java/com/box/l10n/mojito/pseudoloc/PseudoLocalizationTest.java +++ b/webapp/src/test/java/com/box/l10n/mojito/pseudoloc/PseudoLocalizationTest.java @@ -2,6 +2,7 @@ import static org.junit.Assert.*; +import com.box.l10n.mojito.pseudoloc.PseudoLocalization.SubstituteType; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.MessageFormatIntegrityChecker; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.TextUnitIntegrityChecker; import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.WhitespaceIntegrityChecker; @@ -149,4 +150,50 @@ public void testConvertPluralMessageFormatStringToPseudoLoc3() { "The plural text variation should be pseudolocalized while the placeholder should not", pseudoLocalized.contains("{# Comments or Tasks}")); } + + @Test + public void testConsistentSubstitutionIsDeterministic() { + PseudoLocalization ps = new PseudoLocalization(); + String first = ps.convertAsciiToDiacritics("Hello World", SubstituteType.CONSISTENT); + String second = ps.convertAsciiToDiacritics("Hello World", SubstituteType.CONSISTENT); + assertEquals("Consistent substitution should produce identical results", first, second); + } + + @Test + public void testConsistentSubstitutionProducesDiacritics() { + PseudoLocalization ps = new PseudoLocalization(); + String result = ps.convertAsciiToDiacritics("Hello", SubstituteType.CONSISTENT); + assertNotEquals("Consistent substitution should still transform the string", "Hello", result); + } + + @Test + public void testConsistentConvertStringToPseudoLoc() { + PseudoLocalization ps = new PseudoLocalization(); + String first = ps.convertStringToPseudoLoc("English Sentence", SubstituteType.CONSISTENT); + String second = ps.convertStringToPseudoLoc("English Sentence", SubstituteType.CONSISTENT); + assertEquals( + "Consistent pseudolocalization should be deterministic across calls", first, second); + } + + @Test + public void testConsistentConvertStringToPseudoLocWithCheckers() { + PseudoLocalization ps = new PseudoLocalization(); + Set checkers = new HashSet<>(); + checkers.add(new MessageFormatIntegrityChecker()); + String first = + ps.convertStringToPseudoLoc("Hello {name}, welcome!", checkers, SubstituteType.CONSISTENT); + String second = + ps.convertStringToPseudoLoc("Hello {name}, welcome!", checkers, SubstituteType.CONSISTENT); + assertEquals("Consistent substitution with checkers should be deterministic", first, second); + assertTrue( + "Placeholders should be preserved with consistent substitution", first.contains("{name}")); + } + + @Test + public void testConsistentDoesNotConvertUnmappedChars() { + PseudoLocalization ps = new PseudoLocalization(); + String result = ps.convertAsciiToDiacritics("qQV", SubstituteType.CONSISTENT); + assertEquals( + "Unmapped chars should remain unchanged with consistent substitution", "qQV", result); + } } From 231a370c480d706bcf0b6672be14948a2eb461e5 Mon Sep 17 00:00:00 2001 From: Wadim Wawrzenczak Date: Thu, 21 May 2026 16:59:45 +0200 Subject: [PATCH 2/3] Refactor for consistency with other enum params and improve comments --- .../mojito/cli/command/PseudoLocCommand.java | 7 ++++--- .../cli/command/SubstituteTypeConverter.java | 11 +++++++++++ .../l10n/mojito/rest/client/AssetClient.java | 12 ++---------- .../mojito/rest/entity/LocalizedAssetBody.java | 17 ++++++++++++++--- .../mojito/pseudoloc/PseudoLocalization.java | 3 +++ 5 files changed, 34 insertions(+), 16 deletions(-) create mode 100644 cli/src/main/java/com/box/l10n/mojito/cli/command/SubstituteTypeConverter.java diff --git a/cli/src/main/java/com/box/l10n/mojito/cli/command/PseudoLocCommand.java b/cli/src/main/java/com/box/l10n/mojito/cli/command/PseudoLocCommand.java index 2490f410e1..1627841c93 100644 --- a/cli/src/main/java/com/box/l10n/mojito/cli/command/PseudoLocCommand.java +++ b/cli/src/main/java/com/box/l10n/mojito/cli/command/PseudoLocCommand.java @@ -104,9 +104,10 @@ public class PseudoLocCommand extends Command { arity = 1, required = false, description = - "Character substitution mode: RANDOM picks a random diacritical replacement each time," - + " CONSISTENT always maps to the same replacement") - String substituteType; + "Character substitution mode: RANDOM (default) picks a random diacritical replacement each time," + + " CONSISTENT always maps to the same replacement within a given string.", + converter = SubstituteTypeConverter.class) + LocalizedAssetBody.SubstituteType substituteType = LocalizedAssetBody.SubstituteType.RANDOM; @Autowired AssetClient assetClient; diff --git a/cli/src/main/java/com/box/l10n/mojito/cli/command/SubstituteTypeConverter.java b/cli/src/main/java/com/box/l10n/mojito/cli/command/SubstituteTypeConverter.java new file mode 100644 index 0000000000..7d89b8edc6 --- /dev/null +++ b/cli/src/main/java/com/box/l10n/mojito/cli/command/SubstituteTypeConverter.java @@ -0,0 +1,11 @@ +package com.box.l10n.mojito.cli.command; + +import com.box.l10n.mojito.rest.entity.LocalizedAssetBody; + +public class SubstituteTypeConverter extends EnumConverter { + + @Override + protected Class getGenericClass() { + return LocalizedAssetBody.SubstituteType.class; + } +} diff --git a/restclient/src/main/java/com/box/l10n/mojito/rest/client/AssetClient.java b/restclient/src/main/java/com/box/l10n/mojito/rest/client/AssetClient.java index dee06b9138..eafbd249f3 100644 --- a/restclient/src/main/java/com/box/l10n/mojito/rest/client/AssetClient.java +++ b/restclient/src/main/java/com/box/l10n/mojito/rest/client/AssetClient.java @@ -209,23 +209,15 @@ public PollableTask getLocalizedAssetForContentAsync( * @param filterConfigIdOverride Optional, can be null. Allows to specify a specific Okapi filter * to use to process the asset * @param filterOptions + * @param substituteType Allows to choose * @return the pseudoloocalized asset content */ - public LocalizedAssetBody getPseudoLocalizedAssetForContent( - Long assetId, - String content, - FilterConfigIdOverride filterConfigIdOverride, - List filterOptions) { - return getPseudoLocalizedAssetForContent( - assetId, content, filterConfigIdOverride, filterOptions, null); - } - public LocalizedAssetBody getPseudoLocalizedAssetForContent( Long assetId, String content, FilterConfigIdOverride filterConfigIdOverride, List filterOptions, - String substituteType) { + LocalizedAssetBody.SubstituteType substituteType) { UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath(getBasePathForResource(assetId, "pseudo")); diff --git a/restclient/src/main/java/com/box/l10n/mojito/rest/entity/LocalizedAssetBody.java b/restclient/src/main/java/com/box/l10n/mojito/rest/entity/LocalizedAssetBody.java index a2664885ac..6fea782f9b 100644 --- a/restclient/src/main/java/com/box/l10n/mojito/rest/entity/LocalizedAssetBody.java +++ b/restclient/src/main/java/com/box/l10n/mojito/rest/entity/LocalizedAssetBody.java @@ -26,6 +26,17 @@ public enum Status { ACCEPTED } + /** + * During pseudolocalization, specifies how accent/diacritics characters that replace ASCII + * letters are chosen. + */ + public enum SubstituteType { + /** Replacement characters are picked at random. */ + RANDOM, + /** Replacement characters are picked consistently for a given string. */ + CONSISTENT + } + /** Asset id */ Long assetId; @@ -66,7 +77,7 @@ public enum Status { Status status = Status.ALL; - String substituteType; + SubstituteType substituteType; public Long getAssetId() { return assetId; @@ -148,11 +159,11 @@ public void setPullRunName(String pullRunName) { this.pullRunName = pullRunName; } - public String getSubstituteType() { + public SubstituteType getSubstituteType() { return substituteType; } - public void setSubstituteType(String substituteType) { + public void setSubstituteType(SubstituteType substituteType) { this.substituteType = substituteType; } } diff --git a/webapp/src/main/java/com/box/l10n/mojito/pseudoloc/PseudoLocalization.java b/webapp/src/main/java/com/box/l10n/mojito/pseudoloc/PseudoLocalization.java index b38e2fe1dc..6828f14d26 100644 --- a/webapp/src/main/java/com/box/l10n/mojito/pseudoloc/PseudoLocalization.java +++ b/webapp/src/main/java/com/box/l10n/mojito/pseudoloc/PseudoLocalization.java @@ -17,8 +17,11 @@ @Component public class PseudoLocalization { + /** Specifies how accent/diacritics characters that replace ASCII letters are chosen */ public enum SubstituteType { + /** Replacement characters are picked at random. */ RANDOM, + /** Replacement characters are picked consistently for a given string. */ CONSISTENT } From ff4834ba7cc2cb899442c685d2bc3b9353100a87 Mon Sep 17 00:00:00 2001 From: Wadim Wawrzenczak Date: Mon, 25 May 2026 15:36:35 +0200 Subject: [PATCH 3/3] Add tests for random pseudolocalization --- .../pseudoloc/PseudoLocalizationTest.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/webapp/src/test/java/com/box/l10n/mojito/pseudoloc/PseudoLocalizationTest.java b/webapp/src/test/java/com/box/l10n/mojito/pseudoloc/PseudoLocalizationTest.java index c583d5e3de..beeee5be94 100644 --- a/webapp/src/test/java/com/box/l10n/mojito/pseudoloc/PseudoLocalizationTest.java +++ b/webapp/src/test/java/com/box/l10n/mojito/pseudoloc/PseudoLocalizationTest.java @@ -28,6 +28,15 @@ public void testStringIsConvertedToDiacritics() { "The string should be converted to diacritics", "English Sentence", diacriticsString); } + @Test + public void testStringIsConvertedToDiacriticsRandom() { + PseudoLocalization ps = new PseudoLocalization(); + String first = ps.convertAsciiToDiacritics("English Sentence"); + String second = ps.convertAsciiToDiacritics("English Sentence"); + assertNotEquals( + "Default substitution should randomly select diacritics across calls", first, second); + } + @Test public void testStringIsNotConvertedToDiacritics() { // The chars q, Q, and V are not converted so they should not be converted @@ -44,6 +53,15 @@ public void testconvertStringToPseudoLoc() { assertNotEquals("The string should be pseudolocalized", "English Sentence", pseudoLocalized); } + @Test + public void testconvertStringToPseudoLocRandom() { + PseudoLocalization ps = new PseudoLocalization(); + String first = ps.convertStringToPseudoLoc("English Sentence"); + String second = ps.convertStringToPseudoLoc("English Sentence"); + assertNotEquals( + "Default pseudolocalization should substitute diacritics randomly", first, second); + } + @Test public void testconvertStringToPseudoLocDoesNot() { PseudoLocalization ps = new PseudoLocalization();