How to map SQL collation setting to a Java comparator? -


is there way translate database's collation setting (e.g. sql_latin1_general_cp1_ci_as) java comparator implementation can apply same ordering database does, using in java code?

is there existing library provides mapping?

i ended doing following:

  1. query current database's collation setting.
  2. next, parse description of collator sub-components such "case-insensitive" or "accent-sensitive".
  3. next, construct comparator corresponding these rules

enjoy!

/**  * returns comparator associated database's default collation.  * <p>  * beware! <a href="http://stackoverflow.com/a/361059/14731">some databases</a> sort unicode strings differently  * non-unicode strings, same collation setting.  * <p>  * @param unicode true if string being sorted unicode, false otherwise  * @return comparator associated database's default collation  * @throws databaseexception if unexpected database error occurs  */ public comparator<string> getcomparator(boolean unicode)     throws databaseexception {     // @see http://stackoverflow.com/a/5072926/14731, http://stackoverflow.com/a/27052010/14731 ,     // http://stackoverflow.com/q/32209137/14731     try (connection connection = server.getdatasource().getconnection())     {         try (preparedstatement statement = connection.preparestatement(             "select description sys.fn_helpcollations()\n" +             "where name = serverproperty('collation')"))         {             try (resultset rs = statement.executequery())             {                 if (!rs.next())                     throw new objectnotfoundexception(this);                 string description = rs.getstring(1);                 list<string> tokens = arrays.aslist(description.split(",\\s*"));                 // description format: language,property1,property2,...,propertyn,sorting,...                 comparatorbuilder comparatorbuilder = new comparatorbuilder();                  // skip language                 tokens = tokens.sublist(1, tokens.size());                 // see https://technet.microsoft.com/en-us/library/ms143515(v=sql.90).aspx list of possible tokens                 (string token: tokens)                 {                     if (token.tolowercase().contains("sort"))                     {                         // stop hit information related sorting order                         break;                     }                     switch (token)                     {                         case "case-insensitive":                         {                             comparatorbuilder.caseinsensitive(true);                             break;                         }                         case "accent-insensitive":                         {                             comparatorbuilder.accentinsensitive(true);                             break;                         }                         case "kanatype-insensitive":                         {                             comparatorbuilder.kanainsensitive(true);                             break;                         }                         case "width-insensitive":                         case "width-insensitive unicode data":                         {                             comparatorbuilder.widthinsensitive(true);                             break;                         }                         case "case-sensitive":                         case "accent-sensitive":                         case "kanatype-sensitive":                         case "width-sensitive":                         {                             // nothing, default setting.                             break;                         }                         default:                             throw new assertionerror(string.format("unexpected token: '%s'. description: '%s'", token, description));                     }                 }                 assert (!rs.next()): "database returned more rows expected";                 if (unicode)                     comparatorbuilder.discardhyphens(true);                 return comparatorbuilder.build();             }         }     }     catch (sqlexception e)     {         throw new databaseexception(e);     } }  import com.ibm.icu.text.transliterator; import java.text.normalizer; import java.util.comparator;  /**  * converts database collation java comparator.  * <p>  * @see https://msdn.microsoft.com/en-us/library/hh230914.aspx?f=255&mspperror=-2147217396  * @see http://zarez.net/?p=1893  * @author gili tzabari  */ class comparatorbuilder {     // sql server: https://technet.microsoft.com/en-us/library/ms143515(v=sql.90).aspx     private boolean caseinsensitive = false;     private boolean accentinsensitive = false;     private boolean kanainsensitive = false;     private boolean widthinsensitive = false;     /**      * indicates if hyphens should discarded prior sorting (default = false).      */     private boolean discardhyphens = false;      /**      * @return true if comparator ignores difference between uppercase , lowercase letters (default = false)      */     public boolean caseinsensitive()     {         return caseinsensitive;     }      /**      * @param value true if comparator ignores difference between uppercase , lowercase letters      * @return      */     public comparatorbuilder caseinsensitive(boolean value)     {         this.caseinsensitive = value;         return this;     }      /**      * @return true if comparator ignores difference between accented , unaccented characters (default = false)      */     public boolean accentinsensitive()     {         return accentinsensitive;     }      /**      * @param value true if comparator ignores difference between accented , unaccented characters      * @return      */     public comparatorbuilder accentinsensitive(boolean value)     {         this.accentinsensitive = value;         return this;     }      /**      * @return true if comparator ignores difference between 2 types of japanese kana characters: hiragana      *         , katakana (default = false)      */     public boolean kanainsensitive()     {         return kanainsensitive;     }      /**      * @param value true if comparator ignores difference between 2 types of japanese kana characters:      *              hiragana , katakana      * @return      */     public comparatorbuilder kanainsensitive(boolean value)     {         this.kanainsensitive = value;         return this;     }      /**      * @return true if comparator ignores difference between single-byte character , same character when      *         represented double-byte character (default = false)      */     public boolean widthinsensitive()     {         return widthinsensitive;     }      /**      * @param value true if comparator ignores difference between single-byte character , same character      *              when represented double-byte character      * @return      */     public comparatorbuilder widthinsensitive(boolean value)     {         this.widthinsensitive = value;         return this;     }      /**      * @return true if comparator discards hyphens prior sorting (default = false)      */     public boolean discardhyphens()     {         return discardhyphens;     }      /**      * @param value true if comparator discards hyphens prior sorting      * @return      */     public comparatorbuilder discardhyphens(boolean value)     {         this.discardhyphens = value;         return this;     }      /**      * @return comparator instance      */     public comparator<string> build()     {         return (java.lang.string first, java.lang.string second) ->         {             string firstnormalized = first;             string secondnormalized = second;             if (discardhyphens)             {                 firstnormalized = firstnormalized.replaceall("-", "");                 secondnormalized = secondnormalized.replaceall("-", "");             }             if (accentinsensitive)             {                 // @see http://stackoverflow.com/a/3322174/14731                 firstnormalized = normalizer.normalize(first, normalizer.form.nfd).replaceall("[^\\p{ascii}]", "");                 secondnormalized = normalizer.normalize(second, normalizer.form.nfd).replaceall("[^\\p{ascii}]", "");             }             if (kanainsensitive)             {                 // @see http://stackoverflow.com/a/6577778/14731                 transliterator transliterator = transliterator.getinstance("hiragana-katakana");                 firstnormalized = transliterator.transliterate(firstnormalized);                 secondnormalized = transliterator.transliterate(secondnormalized);             }             if (widthinsensitive)             {                 transliterator transliterator = transliterator.getinstance("halfwidth-fullwidth");                 firstnormalized = transliterator.transliterate(firstnormalized);                 secondnormalized = transliterator.transliterate(secondnormalized);             }             // case-normalization not easy seems. see             // http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug , implementation of             // string.comparetoignorecase(). better delegate trusted implementation.             if (caseinsensitive)                 return firstnormalized.comparetoignorecase(secondnormalized);             else                 return firstnormalized.compareto(secondnormalized);         };     } } 

Comments

Popular posts from this blog

html - Firefox flex bug applied to buttons? -

html - Missing border-right in select on Firefox -

python - build a suggestions list using fuzzywuzzy -