diff --git a/src/main/java/io/github/spannm/jackcess/impl/ColumnImpl.java b/src/main/java/io/github/spannm/jackcess/impl/ColumnImpl.java index 4fe080a..a2cb24b 100644 --- a/src/main/java/io/github/spannm/jackcess/impl/ColumnImpl.java +++ b/src/main/java/io/github/spannm/jackcess/impl/ColumnImpl.java @@ -147,6 +147,17 @@ public class ColumnImpl implements Column, Comparable, DateTimeConte */ public static final SortOrder GENERAL_SORT_ORDER = new SortOrder(GENERAL_SORT_ORDER_VALUE, 1); + /** + * Sort order used by MS Access databases configured with the Russian/Cyrillic collation (LCID 1049, version 0). + *

+ * Index entries for this sort order are encoded by {@code IndexData.RussianTextColumnDescriptor}, which + * currently delegates to {@link GeneralLegacyIndexCodes} as a structurally compatible interim solution + * until the proprietary Russian byte tables are reverse-engineered. + * + * @see IndexData + */ + public static final SortOrder RUSSIAN_SORT_ORDER = new SortOrder((short) 1049, 0); + /** * Sort order used by MS Access databases configured with the Turkish collation (LCID 1055, version 0). *

@@ -2364,8 +2375,13 @@ public DataType getType() { * {@link #GENERAL_97_SORT_ORDER}1033−1Access 97 * {@link #GENERAL_LEGACY_SORT_ORDER}10330Access 2000–2007 * {@link #GENERAL_SORT_ORDER}10331Access 2010+ + * {@link #TURKISH_SORT_ORDER}10550Turkish (interim) + * {@link #RUSSIAN_SORT_ORDER}10490Russian (interim) * - * Any other {@code SortOrder} (e.g. Turkish, LCID 1055) causes the backing {@link IndexData} to become + * For the Turkish and Russian sort orders, the backing {@link IndexData} encodes text using a structurally + * compatible but semantically approximate format (via GeneralLegacyIndexCodes) until the proprietary byte + * tables are reverse-engineered; see {@link IndexData#setUnsupportedReason} and the respective descriptor + * Javadoc. Any other (unrecognized) {@code SortOrder} causes the backing {@link IndexData} to become * read-only for write operations; see {@link IndexData#setUnsupportedReason}. *

* Sort orders are read via {@link ColumnImpl#readSortOrder} and written via diff --git a/src/main/java/io/github/spannm/jackcess/impl/IndexData.java b/src/main/java/io/github/spannm/jackcess/impl/IndexData.java index dc8dfad..aa60ebf 100644 --- a/src/main/java/io/github/spannm/jackcess/impl/IndexData.java +++ b/src/main/java/io/github/spannm/jackcess/impl/IndexData.java @@ -49,11 +49,13 @@ *

  • {@link ColumnImpl#GENERAL_SORT_ORDER} – "General" (Access 2010+, LCID 1033, version 1)
  • *
  • {@link ColumnImpl#GENERAL_LEGACY_SORT_ORDER} – "General Legacy" (Access 2000–2007, LCID 1033, version 0)
  • *
  • {@link ColumnImpl#GENERAL_97_SORT_ORDER} – "General" (Access 97, LCID 1033, version −1)
  • + *
  • {@link ColumnImpl#RUSSIAN_SORT_ORDER} – Russian/Cyrillic (LCID 1049, version 0)
  • + *
  • {@link ColumnImpl#TURKISH_SORT_ORDER} – Turkish (LCID 1055, version 0)
  • * * Any other sort order causes the index to be marked read-only via {@link #setUnsupportedReason}; write operations * will throw {@link UnsupportedOperationException}. This is the root cause of - * UCanAccess issue #35 for Turkish databases - * (SortOrder 1055). + * UCanAccess issue #35 for databases with unsupported collations + * (e.g. Turkish LCID 1055, Russian LCID 1049). * * * @@ -1385,6 +1387,8 @@ private static Entry createSpecialEntry(RowIdImpl rowId) { * {@link GenLegTextColumnDescriptor}Access 2000–2007 * {@link ColumnImpl#GENERAL_97_SORT_ORDER}1033-1 * {@link Gen97TextColumnDescriptor}Access 97 + * {@link ColumnImpl#RUSSIAN_SORT_ORDER}10490 + * {@link RussianTextColumnDescriptor}any (Russian/Cyrillic collation) * {@link ColumnImpl#TURKISH_SORT_ORDER}10550 * {@link TurkishTextColumnDescriptor}any (Turkish collation) * any other (e.g. Arabic 1025, Greek 1032)–– @@ -1418,6 +1422,8 @@ private ColumnDescriptor newColumnDescriptor(ColumnImpl col, byte flags) { return new GenLegTextColumnDescriptor(col, flags); } else if (ColumnImpl.GENERAL_97_SORT_ORDER.equals(sortOrder)) { return new Gen97TextColumnDescriptor(col, flags); + } else if (ColumnImpl.RUSSIAN_SORT_ORDER.equals(sortOrder)) { + return new RussianTextColumnDescriptor(col, flags); } else if (ColumnImpl.TURKISH_SORT_ORDER.equals(sortOrder)) { return new TurkishTextColumnDescriptor(col, flags); } @@ -1858,6 +1864,49 @@ protected void writeNonNullValue(Object value, ByteStream bout) throws IOExcepti } } + /** + * {@link ColumnDescriptor} for text columns using the Russian/Cyrillic sort order (LCID 1049, version 0). + *

    + * Implementation note – structural compatibility over semantic accuracy:
    + * MS Access stores index entries in a proprietary, order-preserving byte format that is specific to each + * collation. The exact byte tables for the Russian collation have not yet been reverse-engineered. + * Using a standard JVM {@link java.text.Collator} key ({@code CollationKey.toByteArray()}) is + * not an option: the ICU/CLDR sort-key format is fundamentally different from the MS Access + * format, and index pages written with JVM keys would be unreadable by MS Access (corrupted index). + *

    + * As a pragmatic interim solution this descriptor delegates to + * {@link GeneralLegacyIndexCodes#GEN_LEG_INSTANCE}, which produces structurally valid MS Access index + * bytes. The trade-off: + *

    + *

    + * Once the Russian byte tables are available, this class should be replaced by a dedicated + * {@code RussianIndexCodes} implementation that encodes entries with full Russian collation semantics, + * analogous to {@link GeneralLegacyIndexCodes} for the General sort order. + * + * @see ColumnImpl#RUSSIAN_SORT_ORDER + * @see GeneralLegacyIndexCodes + */ + private static final class RussianTextColumnDescriptor extends ColumnDescriptor { + private RussianTextColumnDescriptor(ColumnImpl column, byte flags) { + super(column, flags); + } + + @Override + protected void writeNonNullValue(Object value, ByteStream bout) throws IOException { + // Delegate to General-Legacy encoding to produce structurally valid MS Access index bytes. + // Russian/Cyrillic-specific collation weights are not yet supported. + // See class-level Javadoc for details and the path to a full implementation. + GeneralLegacyIndexCodes.GEN_LEG_INSTANCE.writeNonNullIndexTextValue(value, bout, isAscending()); + } + } + /** * ColumnDescriptor for guid columns. */