Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ public class PseudoLocCommand extends Command {
description = Param.DIR_PATH_EXCLUDE_PATTERNS_DESCRIPTION)
List<String> directoriesExcludePatterns = null;

@Parameter(
names = {"--substitute"},
arity = 1,
required = false,
description =
"Character substitution mode: RANDOM (default) picks a random diacritical replacement each time,"
+ " CONSISTENT always maps to the same replacement within a given string.",
converter = SubstituteTypeConverter.class)
LocalizedAssetBody.SubstituteType substituteType = LocalizedAssetBody.SubstituteType.RANDOM;

@Autowired AssetClient assetClient;

@Autowired CommandHelper commandHelper;
Expand Down Expand Up @@ -192,7 +202,8 @@ LocalizedAssetBody getPseudoLocalizedAsset(
assetByPathAndRepositoryId.getId(),
assetContent,
sourceFileMatch.getFileType().getFilterConfigIdOverride(),
filterOptions);
filterOptions,
substituteType);

logger.trace("PseudoLocalizedAsset content = {}", pseudoLocalizedAsset.getContent());
return pseudoLocalizedAsset;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.box.l10n.mojito.cli.command;

import com.box.l10n.mojito.rest.entity.LocalizedAssetBody;

public class SubstituteTypeConverter extends EnumConverter<LocalizedAssetBody.SubstituteType> {

@Override
protected Class<LocalizedAssetBody.SubstituteType> getGenericClass() {
return LocalizedAssetBody.SubstituteType.class;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,15 @@ public PollableTask getLocalizedAssetForContentAsync(
* @param filterConfigIdOverride Optional, can be null. Allows to specify a specific Okapi filter
* to use to process the asset
* @param filterOptions
* @param substituteType Allows to choose
* @return the pseudoloocalized asset content
*/
public LocalizedAssetBody getPseudoLocalizedAssetForContent(
Long assetId,
String content,
FilterConfigIdOverride filterConfigIdOverride,
List<String> filterOptions) {
List<String> filterOptions,
LocalizedAssetBody.SubstituteType substituteType) {

UriComponentsBuilder uriBuilder =
UriComponentsBuilder.fromPath(getBasePathForResource(assetId, "pseudo"));
Expand All @@ -225,6 +227,7 @@ public LocalizedAssetBody getPseudoLocalizedAssetForContent(
localizedAssetBody.setOutputBcp47tag(OUTPUT_BCP47_TAG);
localizedAssetBody.setFilterConfigIdOverride(filterConfigIdOverride);
localizedAssetBody.setFilterOptions(filterOptions);
localizedAssetBody.setSubstituteType(substituteType);

return authenticatedRestTemplate.postForObject(
uriBuilder.toUriString(), localizedAssetBody, LocalizedAssetBody.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ public enum Status {
ACCEPTED
}

/**
* During pseudolocalization, specifies how accent/diacritics characters that replace ASCII
* letters are chosen.
*/
public enum SubstituteType {
/** Replacement characters are picked at random. */
RANDOM,
/** Replacement characters are picked consistently for a given string. */
CONSISTENT
}

/** Asset id */
Long assetId;

Expand Down Expand Up @@ -66,6 +77,8 @@ public enum Status {

Status status = Status.ALL;

SubstituteType substituteType;

public Long getAssetId() {
return assetId;
}
Expand Down Expand Up @@ -145,4 +158,12 @@ public String getPullRunName() {
public void setPullRunName(String pullRunName) {
this.pullRunName = pullRunName;
}

public SubstituteType getSubstituteType() {
return substituteType;
}

public void setSubstituteType(SubstituteType substituteType) {
this.substituteType = substituteType;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.box.l10n.mojito.entity.Asset;
import com.box.l10n.mojito.pseudoloc.PseudoLocalization;
import com.box.l10n.mojito.pseudoloc.PseudoLocalization.SubstituteType;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.IntegrityCheckerFactory;
import com.box.l10n.mojito.service.assetintegritychecker.integritychecker.TextUnitIntegrityChecker;
import com.box.l10n.mojito.service.tm.TMTextUnitRepository;
Expand Down Expand Up @@ -29,11 +30,17 @@ public class PseudoLocalizeStep extends BasePipelineStep {
static Logger logger = LoggerFactory.getLogger(PseudoLocalizeStep.class);

private Asset asset;
private SubstituteType substituteType;
private LocaleId targetLocale;
private Set<TextUnitIntegrityChecker> textUnitIntegrityCheckers = new HashSet<>();

public PseudoLocalizeStep(Asset asset) {
this(asset, SubstituteType.RANDOM);
}

public PseudoLocalizeStep(Asset asset, SubstituteType substituteType) {
this.asset = asset;
this.substituteType = substituteType;
}

@Autowired IntegrityCheckerFactory integrityCheckerFactory;
Expand Down Expand Up @@ -81,7 +88,8 @@ protected Event handleTextUnit(Event event) {
if (textUnit.isTranslatable()) {
String source = textUnitUtils.getSourceAsString(textUnit);
String pseudoTranslation =
pseudoLocalization.convertStringToPseudoLoc(source, textUnitIntegrityCheckers);
pseudoLocalization.convertStringToPseudoLoc(
source, textUnitIntegrityCheckers, substituteType);
textUnit.setTarget(targetLocale, new TextContainer(pseudoTranslation));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@
@Component
public class PseudoLocalization {

/** Specifies how accent/diacritics characters that replace ASCII letters are chosen */
public enum SubstituteType {
/** Replacement characters are picked at random. */
RANDOM,
/** Replacement characters are picked consistently for a given string. */
CONSISTENT
}

/** Logger */
static Logger logger = LoggerFactory.getLogger(PseudoLocalization.class);

Expand Down Expand Up @@ -87,15 +95,21 @@ public class PseudoLocalization {
* @return pseudo localized string
*/
public String convertStringToPseudoLoc(String string, Set<TextUnitIntegrityChecker> checkers) {
return convertStringToPseudoLoc(string, checkers, SubstituteType.RANDOM);
}

public String convertStringToPseudoLoc(
String string, Set<TextUnitIntegrityChecker> checkers, SubstituteType substituteType) {
TextUnitIntegrityChecker checker = getIntegrityCheckerForPlaceholderProcessing(checkers);

if (checker == null) {
logger.debug("There is no checker for pseudolocalization placeholder processing.");
return convertStringToPseudoLoc(string);
return convertStringToPseudoLoc(string, substituteType);
} else {
logger.debug("Found checker for pseudolocalization placeholder processing.");
LocalizableString localizableString = checker.extractNonLocalizableParts(string);
String pseudolocalized = convertStringToPseudoLoc(localizableString.getLocalizableString());
String pseudolocalized =
convertStringToPseudoLoc(localizableString.getLocalizableString(), substituteType);
localizableString.setLocalizableString(pseudolocalized);
return checker.restoreNonLocalizableParts(localizableString);
}
Expand All @@ -108,10 +122,14 @@ public String convertStringToPseudoLoc(String string, Set<TextUnitIntegrityCheck
* @return pseudo localized string
*/
public String convertStringToPseudoLoc(String string) {
return convertStringToPseudoLoc(string, SubstituteType.RANDOM);
}

public String convertStringToPseudoLoc(String string, SubstituteType substituteType) {
StringBuilder sb = new StringBuilder();

if (!Strings.isNullOrEmpty(string)) {
String str = convertAsciiToDiacritics(string);
String str = convertAsciiToDiacritics(string, substituteType);
sb.append(expand(str));
sb.insert(0, '⟦');
sb.append('⟧');
Expand Down Expand Up @@ -159,36 +177,68 @@ public String expand(String string) {
* @return
*/
public String convertAsciiToDiacritics(String string) {
int stringLength = string.length();

StringBuilder sb = new StringBuilder();
for (int i = 0; i < stringLength; i++) {
char character = string.charAt(i);
sb.append(getMappingCharFromMap(character));
}
return convertAsciiToDiacritics(string, SubstituteType.RANDOM);
}

return sb.toString();
public String convertAsciiToDiacritics(String string, SubstituteType substituteType) {
return switch (substituteType) {
case RANDOM -> convertAsciiToDiacriticsRandom(string);
case CONSISTENT -> convertAsciiToDiacriticsConsistent(string);
};
}

/**
* Get a non ASCII character mapping to provided character or the character itself if there is no
* mapping
*
* @param character ASCII character to be mapped
* @return Non ASCII character or character itself
* Converts ASCII letters in the whole string into equivalent characters with accent/diacritics,
* selecting mapped characters consistently. This will always return the same mapped string for a
* given input.
*/
private char getMappingCharFromMap(char character) {
char mappedChar = character;
private String convertAsciiToDiacriticsConsistent(String string) {
StringBuilder builder = new StringBuilder();

// keeps track of which mapped char we used last time
Map<Character, Integer> lastMappedIdx = new HashMap<>();

String mappingCharsForChar = pseudoLocMap.get(mappedChar);
for (char character : string.toCharArray()) {
String mappingsForChar = pseudoLocMap.get(character);

if (mappingCharsForChar != null) {
int maxIndex = mappingCharsForChar.length() - 1;
int randomIndex = (int) (Math.random() * maxIndex);
mappedChar = mappingCharsForChar.charAt(randomIndex);
if (mappingsForChar == null) {
// don't replace if no mapping available
builder.append(character);
continue;
}

// pick next mapped char (or go back to the beginning if used all of them)
int mappedIdx = (1 + lastMappedIdx.getOrDefault(character, -1)) % mappingsForChar.length();
lastMappedIdx.put(character, mappedIdx);
char mappedCharacter = mappingsForChar.charAt(mappedIdx);

builder.append(mappedCharacter);
}
return builder.toString();
}

/**
* Converts ASCII letters in the whole string into equivalent characters with accent/diacritics,
* selecting mapped characters at random. This will return different string every time, even if
* input does not change.
*/
private String convertAsciiToDiacriticsRandom(String string) {
int stringLength = string.length();
StringBuilder sb = new StringBuilder();

for (int i = 0; i < stringLength; i++) {
char character = string.charAt(i);
String mappingCharsForChar = pseudoLocMap.get(character);

return mappedChar;
if (mappingCharsForChar != null) {
int maxIndex = mappingCharsForChar.length() - 1;
int randomIndex = (int) (Math.random() * maxIndex);
character = mappingCharsForChar.charAt(randomIndex);
}

sb.append(character);
}
return sb.toString();
}

/**
Expand Down
11 changes: 10 additions & 1 deletion webapp/src/main/java/com/box/l10n/mojito/rest/asset/AssetWS.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.box.l10n.mojito.entity.TMXliff;
import com.box.l10n.mojito.json.ObjectMapper;
import com.box.l10n.mojito.okapi.asset.UnsupportedAssetFilterTypeException;
import com.box.l10n.mojito.pseudoloc.PseudoLocalization;
import com.box.l10n.mojito.quartz.QuartzJobInfo;
import com.box.l10n.mojito.quartz.QuartzPollableTaskScheduler;
import com.box.l10n.mojito.rest.View;
Expand Down Expand Up @@ -306,9 +307,17 @@ public LocalizedAssetBody getPseudoLocalizedAssetForContent(
Asset asset = assetRepository.getOne(assetId);
String normalizedContent = NormalizationUtils.normalize(localizedAssetBody.getContent());

PseudoLocalization.SubstituteType substituteType =
localizedAssetBody.getSubstituteType() != null
? localizedAssetBody.getSubstituteType()
: PseudoLocalization.SubstituteType.RANDOM;

String generateLocalized =
tmService.generatePseudoLocalized(
asset, normalizedContent, localizedAssetBody.getFilterConfigIdOverride());
asset,
normalizedContent,
localizedAssetBody.getFilterConfigIdOverride(),
substituteType);

localizedAssetBody.setContent(generateLocalized);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.box.l10n.mojito.okapi.FilterConfigIdOverride;
import com.box.l10n.mojito.okapi.InheritanceMode;
import com.box.l10n.mojito.okapi.Status;
import com.box.l10n.mojito.pseudoloc.PseudoLocalization;
import java.util.List;

/**
Expand Down Expand Up @@ -50,6 +51,8 @@ public class LocalizedAssetBody {

Status status = Status.ALL;

PseudoLocalization.SubstituteType substituteType;

public LocalizedAssetBody() {}

public LocalizedAssetBody(String bcp47Tag, String content) {
Expand Down Expand Up @@ -136,4 +139,12 @@ public String getPullRunName() {
public void setPullRunName(String pullRunName) {
this.pullRunName = pullRunName;
}

public PseudoLocalization.SubstituteType getSubstituteType() {
return substituteType;
}

public void setSubstituteType(PseudoLocalization.SubstituteType substituteType) {
this.substituteType = substituteType;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import com.box.l10n.mojito.okapi.qualitycheck.QualityCheckStep;
import com.box.l10n.mojito.okapi.steps.CheckForDoNotTranslateStep;
import com.box.l10n.mojito.okapi.steps.FilterEventsToInMemoryRawDocumentStep;
import com.box.l10n.mojito.pseudoloc.PseudoLocalization;
import com.box.l10n.mojito.quartz.QuartzJobInfo;
import com.box.l10n.mojito.quartz.QuartzPollableTaskScheduler;
import com.box.l10n.mojito.retry.DataIntegrityViolationExceptionRetryTemplate;
Expand Down Expand Up @@ -1098,10 +1099,21 @@ void replaceUsedTmTextUnitVariantIds(
public String generatePseudoLocalized(
Asset asset, String content, FilterConfigIdOverride filterConfigIdOverride)
throws UnsupportedAssetFilterTypeException {
return generatePseudoLocalized(
asset, content, filterConfigIdOverride, PseudoLocalization.SubstituteType.RANDOM);
}

public String generatePseudoLocalized(
Asset asset,
String content,
FilterConfigIdOverride filterConfigIdOverride,
PseudoLocalization.SubstituteType substituteType)
throws UnsupportedAssetFilterTypeException {

String bcp47tag = "en-x-psaccent";

BasePipelineStep pseudoLocalizedStep = (BasePipelineStep) new PseudoLocalizeStep(asset);
BasePipelineStep pseudoLocalizedStep =
(BasePipelineStep) new PseudoLocalizeStep(asset, substituteType);
return generateLocalizedBase(
asset, content, filterConfigIdOverride, null, pseudoLocalizedStep, bcp47tag);
}
Expand Down
Loading
Loading