RegularExpressionConverter.java

  1. /*******************************************************************************
  2.  * Copyright 2012 André Rouél
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *   http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  ******************************************************************************/
  16. package net.sf.uadetector.internal.util;

  17. import java.util.Collection;
  18. import java.util.Collections;
  19. import java.util.HashSet;
  20. import java.util.Set;
  21. import java.util.TreeSet;
  22. import java.util.regex.Matcher;
  23. import java.util.regex.Pattern;

  24. import javax.annotation.Nonnegative;
  25. import javax.annotation.Nonnull;

  26. import net.sf.qualitycheck.Check;

  27. public final class RegularExpressionConverter {

  28.     public enum Flag {

  29.         /**
  30.          * Enables canonical equivalence.
  31.          */
  32.         CANON_EQ(Pattern.CANON_EQ, 'c'),

  33.         /**
  34.          * Enables case-insensitive matching.
  35.          */
  36.         CASE_INSENSITIVE(Pattern.CASE_INSENSITIVE, 'i'),

  37.         /**
  38.          * Permits whitespace and comments in pattern.
  39.          */
  40.         COMMENTS(Pattern.COMMENTS, 'x'),

  41.         /**
  42.          * Enables dotall mode.
  43.          */
  44.         DOTALL(Pattern.DOTALL, 's'),

  45.         /**
  46.          * Enables literal parsing of the pattern.
  47.          */
  48.         LITERAL(Pattern.LITERAL, 'l'),

  49.         /**
  50.          * Enables multiline mode.
  51.          */
  52.         MULTILINE(Pattern.MULTILINE, 'm'),

  53.         /**
  54.          * Enables Unicode-aware case folding.
  55.          */
  56.         UNICODE_CASE(Pattern.UNICODE_CASE, 'u'),

  57.         /**
  58.          * Enables Unix lines mode.
  59.          */
  60.         UNIX_LINES(Pattern.UNIX_LINES, 'e');

  61.         private static class FlagByCharacterComparator extends CompareNullSafe<Flag> {
  62.             private static final long serialVersionUID = 1L;

  63.             @Override
  64.             public int compareType(@Nonnull final Flag f1, @Nonnull final Flag f2) {
  65.                 final Character c1 = Character.valueOf(f1.getCharacter());
  66.                 final Character c2 = Character.valueOf(f2.getCharacter());
  67.                 return c1.compareTo(c2);
  68.             }
  69.         }

  70.         private static final FlagByCharacterComparator FLAG_COMPARATOR = new FlagByCharacterComparator();

  71.         /**
  72.          * Converts a set of flags as to a bitmask (sum of numerical values).
  73.          *
  74.          * @param flags
  75.          *            a set of flags
  76.          * @return sum of numerical values of passed flags or 0
  77.          */
  78.         public static int convertToBitmask(@Nonnull final Collection<Flag> flags) {
  79.             Check.notNull(flags, "flags");

  80.             int bitmask = 0;
  81.             for (final Flag flag : flags) {
  82.                 bitmask = bitmask | flag.getNumber();
  83.             }
  84.             return bitmask;
  85.         }

  86.         /**
  87.          * Converts a set of flags as to a string representation. The flags {@link Flag#CASE_INSENSITIVE},
  88.          * {@link Flag#DOTALL}, {@link Flag#MULTILINE} and {@link Flag#COMMENTS} are identical to the PERL regular
  89.          * expression modifiers.
  90.          *
  91.          * @param flags
  92.          *            a set of flags
  93.          * @return sum of numerical values of passed flags or 0
  94.          */
  95.         public static String convertToModifiers(@Nonnull final Collection<Flag> flags) {
  96.             Check.notNull(flags, "flags");

  97.             final StringBuilder modifiers = new StringBuilder(8);
  98.             final Set<Flag> sortedFlags = new TreeSet<Flag>(Collections.reverseOrder(FLAG_COMPARATOR));
  99.             sortedFlags.addAll(flags);
  100.             for (final Flag flag : sortedFlags) {
  101.                 modifiers.append(flag.getCharacter());
  102.             }
  103.             return modifiers.toString();
  104.         }

  105.         /**
  106.          * This method try to find a matching enum value by the given character representation. The character will be
  107.          * evaluated against the stored character of a flag.
  108.          *
  109.          * @param flag
  110.          *            representation of a flag as a character
  111.          * @return the matching enum value or {@code null}
  112.          * @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
  113.          *             if the given number is smaller than zero
  114.          */
  115.         public static Flag evaluateByCharacter(final char flag) {
  116.             Check.notNegative(flag, "flag");
  117.             Flag result = null;
  118.             for (final Flag value : values()) {
  119.                 if (value.getCharacter() == flag) {
  120.                     result = value;
  121.                     break;
  122.                 }
  123.             }
  124.             return result;
  125.         }

  126.         /**
  127.          * This method try to find a matching enum value by the given numerical representation. The number will be
  128.          * evaluated against the stored number of a flag.
  129.          *
  130.          * @param flag
  131.          *            representation of a flag as a character
  132.          * @return the matching enum value or {@code null}
  133.          * @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
  134.          *             if the given number is smaller than zero
  135.          */
  136.         public static Flag evaluateByNumber(final int flag) {
  137.             Check.notNegative(flag, "flag");
  138.             Flag result = null;
  139.             for (final Flag value : values()) {
  140.                 if (value.getNumber() == flag) {
  141.                     result = value;
  142.                     break;
  143.                 }
  144.             }
  145.             return result;
  146.         }

  147.         /**
  148.          * Parses a sum of flags as numerical values (bitmask) and translates it to set of enum values.
  149.          *
  150.          * @param bitmask
  151.          *            Sum of numerical values of flags
  152.          * @return a set of flags
  153.          * @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
  154.          *             if the given number is smaller than zero
  155.          */
  156.         @Nonnull
  157.         public static Set<Flag> parse(@Nonnegative final int bitmask) {
  158.             Check.notNegative(bitmask, "bitmask");

  159.             final Set<Flag> flags = new HashSet<Flag>();
  160.             for (final Flag flag : values()) {
  161.                 if ((bitmask & flag.getNumber()) != 0) {
  162.                     flags.add(flag);
  163.                 }
  164.             }
  165.             return flags;
  166.         }

  167.         /**
  168.          * Translates PERL style modifiers to a set of {@code Pattern} compatible ones.
  169.          *
  170.          * @param modifiers
  171.          *            modifiers as string of a PERL style regular expression
  172.          * @return a set of modifier flags that may include CASE_INSENSITIVE, MULTILINE, DOTALL and COMMENTS
  173.          */
  174.         public static Set<Flag> parse(@Nonnull final String modifiers) {
  175.             Check.notNull(modifiers, "modifiers");

  176.             final Set<Flag> flags = new HashSet<Flag>();
  177.             for (int i = 0; i < modifiers.length(); i++) {
  178.                 final Flag flag = Flag.evaluateByCharacter(modifiers.charAt(i));
  179.                 if (flag != null) {
  180.                     flags.add(flag);
  181.                 }
  182.             }
  183.             return flags;
  184.         }

  185.         /**
  186.          * Representation of a flag as a character
  187.          */
  188.         private final char character;

  189.         /**
  190.          * Representation of a flag as a number
  191.          */
  192.         private final int number;

  193.         private Flag(final int value, final char character) {
  194.             number = value;
  195.             this.character = character;
  196.         }

  197.         /**
  198.          * Returns this flag as character representation.
  199.          *
  200.          * @return representation as a character
  201.          */
  202.         public char getCharacter() {
  203.             return character;
  204.         }

  205.         /**
  206.          * Returns this flag as numerical representation.
  207.          *
  208.          * @return representation as a number
  209.          */
  210.         public int getNumber() {
  211.             return number;
  212.         }

  213.     }

  214.     /**
  215.      * Template to support the conversion into a PERL style regular expression
  216.      */
  217.     private static final String PATTERN_TO_REGEX_TEMPLATE = "/%s/%s";

  218.     /**
  219.      * Pattern for PERL style regular expression strings
  220.      */
  221.     private static final Pattern PERL_STYLE = Pattern.compile("^/.*/((i|m|s|x)*)?$");

  222.     /**
  223.      * Pattern for PERL style regular expression strings with more fault-tolerance to the modifiers
  224.      */
  225.     private static final Pattern PERL_STYLE_TOLERANT = Pattern.compile("^/.*/(([A-z])*)?$");

  226.     /**
  227.      * Converts a given {@code Pattern} into a PERL style regular expression.
  228.      *
  229.      * @param pattern
  230.      *            regular expression pattern
  231.      * @return PERL style regular expression as string
  232.      */
  233.     public static String convertPatternToPerlRegex(@Nonnull final Pattern pattern) {
  234.         Check.notNull(pattern, "pattern");
  235.         final String modifiers = Flag.convertToModifiers(Flag.parse(pattern.flags()));
  236.         return String.format(PATTERN_TO_REGEX_TEMPLATE, pattern.pattern(), modifiers);
  237.     }

  238.     /**
  239.      * Converts a PERL style regular expression into Java style.<br>
  240.      * <br>
  241.      * The leading and ending slash and the modifiers will be removed. The modifiers will be translated into equivalents
  242.      * flags of <code>java.util.Pattern</code>. If there are modifiers that are not valid an exception will be thrown.
  243.      *
  244.      * @param regex
  245.      *            A PERL style regular expression
  246.      * @return Pattern
  247.      */
  248.     public static Pattern convertPerlRegexToPattern(@Nonnull final String regex) {
  249.         return convertPerlRegexToPattern(regex, false);
  250.     }

  251.     /**
  252.      * Converts a PERL style regular expression into Java style.<br>
  253.      * <br>
  254.      * The leading and ending slash and the modifiers will be removed.
  255.      *
  256.      * @param regex
  257.      *            A PERL style regular expression
  258.      * @param faultTolerant
  259.      *            Fault-tolerant translating the flags
  260.      * @return Pattern
  261.      */
  262.     public static Pattern convertPerlRegexToPattern(@Nonnull final String regex, @Nonnull final boolean faultTolerant) {
  263.         Check.notNull(regex, "regex");

  264.         String pattern = regex.trim();
  265.         final Matcher matcher = faultTolerant ? PERL_STYLE_TOLERANT.matcher(pattern) : PERL_STYLE.matcher(pattern);
  266.         if (!matcher.matches()) {
  267.             throw new IllegalArgumentException("The given regular expression '" + pattern
  268.                     + "' seems to be not in PERL style or has unsupported modifiers.");
  269.         }

  270.         pattern = pattern.substring(1);
  271.         final int lastIndex = pattern.lastIndexOf('/');
  272.         pattern = pattern.substring(0, lastIndex);

  273.         final int flags = Flag.convertToBitmask(Flag.parse(matcher.group(1)));
  274.         return Pattern.compile(pattern, flags);
  275.     }

  276.     /**
  277.      * <strong>Attention:</strong> This class is not intended to create objects from it.
  278.      */
  279.     private RegularExpressionConverter() {
  280.         // This class is not intended to create objects from it.
  281.     }

  282. }