RegularExpressionConverter.java
- /*******************************************************************************
- * Copyright 2012 André Rouél
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
- package net.sf.uadetector.internal.util;
- import java.util.Collection;
- import java.util.Collections;
- import java.util.HashSet;
- import java.util.Set;
- import java.util.TreeSet;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import javax.annotation.Nonnegative;
- import javax.annotation.Nonnull;
- import net.sf.qualitycheck.Check;
- public final class RegularExpressionConverter {
- public enum Flag {
- /**
- * Enables canonical equivalence.
- */
- CANON_EQ(Pattern.CANON_EQ, 'c'),
- /**
- * Enables case-insensitive matching.
- */
- CASE_INSENSITIVE(Pattern.CASE_INSENSITIVE, 'i'),
- /**
- * Permits whitespace and comments in pattern.
- */
- COMMENTS(Pattern.COMMENTS, 'x'),
- /**
- * Enables dotall mode.
- */
- DOTALL(Pattern.DOTALL, 's'),
- /**
- * Enables literal parsing of the pattern.
- */
- LITERAL(Pattern.LITERAL, 'l'),
- /**
- * Enables multiline mode.
- */
- MULTILINE(Pattern.MULTILINE, 'm'),
- /**
- * Enables Unicode-aware case folding.
- */
- UNICODE_CASE(Pattern.UNICODE_CASE, 'u'),
- /**
- * Enables Unix lines mode.
- */
- UNIX_LINES(Pattern.UNIX_LINES, 'e');
- private static class FlagByCharacterComparator extends CompareNullSafe<Flag> {
- private static final long serialVersionUID = 1L;
- @Override
- public int compareType(@Nonnull final Flag f1, @Nonnull final Flag f2) {
- final Character c1 = Character.valueOf(f1.getCharacter());
- final Character c2 = Character.valueOf(f2.getCharacter());
- return c1.compareTo(c2);
- }
- }
- private static final FlagByCharacterComparator FLAG_COMPARATOR = new FlagByCharacterComparator();
- /**
- * Converts a set of flags as to a bitmask (sum of numerical values).
- *
- * @param flags
- * a set of flags
- * @return sum of numerical values of passed flags or 0
- */
- public static int convertToBitmask(@Nonnull final Collection<Flag> flags) {
- Check.notNull(flags, "flags");
- int bitmask = 0;
- for (final Flag flag : flags) {
- bitmask = bitmask | flag.getNumber();
- }
- return bitmask;
- }
- /**
- * Converts a set of flags as to a string representation. The flags {@link Flag#CASE_INSENSITIVE},
- * {@link Flag#DOTALL}, {@link Flag#MULTILINE} and {@link Flag#COMMENTS} are identical to the PERL regular
- * expression modifiers.
- *
- * @param flags
- * a set of flags
- * @return sum of numerical values of passed flags or 0
- */
- public static String convertToModifiers(@Nonnull final Collection<Flag> flags) {
- Check.notNull(flags, "flags");
- final StringBuilder modifiers = new StringBuilder(8);
- final Set<Flag> sortedFlags = new TreeSet<Flag>(Collections.reverseOrder(FLAG_COMPARATOR));
- sortedFlags.addAll(flags);
- for (final Flag flag : sortedFlags) {
- modifiers.append(flag.getCharacter());
- }
- return modifiers.toString();
- }
- /**
- * This method try to find a matching enum value by the given character representation. The character will be
- * evaluated against the stored character of a flag.
- *
- * @param flag
- * representation of a flag as a character
- * @return the matching enum value or {@code null}
- * @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
- * if the given number is smaller than zero
- */
- public static Flag evaluateByCharacter(final char flag) {
- Check.notNegative(flag, "flag");
- Flag result = null;
- for (final Flag value : values()) {
- if (value.getCharacter() == flag) {
- result = value;
- break;
- }
- }
- return result;
- }
- /**
- * This method try to find a matching enum value by the given numerical representation. The number will be
- * evaluated against the stored number of a flag.
- *
- * @param flag
- * representation of a flag as a character
- * @return the matching enum value or {@code null}
- * @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
- * if the given number is smaller than zero
- */
- public static Flag evaluateByNumber(final int flag) {
- Check.notNegative(flag, "flag");
- Flag result = null;
- for (final Flag value : values()) {
- if (value.getNumber() == flag) {
- result = value;
- break;
- }
- }
- return result;
- }
- /**
- * Parses a sum of flags as numerical values (bitmask) and translates it to set of enum values.
- *
- * @param bitmask
- * Sum of numerical values of flags
- * @return a set of flags
- * @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
- * if the given number is smaller than zero
- */
- @Nonnull
- public static Set<Flag> parse(@Nonnegative final int bitmask) {
- Check.notNegative(bitmask, "bitmask");
- final Set<Flag> flags = new HashSet<Flag>();
- for (final Flag flag : values()) {
- if ((bitmask & flag.getNumber()) != 0) {
- flags.add(flag);
- }
- }
- return flags;
- }
- /**
- * Translates PERL style modifiers to a set of {@code Pattern} compatible ones.
- *
- * @param modifiers
- * modifiers as string of a PERL style regular expression
- * @return a set of modifier flags that may include CASE_INSENSITIVE, MULTILINE, DOTALL and COMMENTS
- */
- public static Set<Flag> parse(@Nonnull final String modifiers) {
- Check.notNull(modifiers, "modifiers");
- final Set<Flag> flags = new HashSet<Flag>();
- for (int i = 0; i < modifiers.length(); i++) {
- final Flag flag = Flag.evaluateByCharacter(modifiers.charAt(i));
- if (flag != null) {
- flags.add(flag);
- }
- }
- return flags;
- }
- /**
- * Representation of a flag as a character
- */
- private final char character;
- /**
- * Representation of a flag as a number
- */
- private final int number;
- private Flag(final int value, final char character) {
- number = value;
- this.character = character;
- }
- /**
- * Returns this flag as character representation.
- *
- * @return representation as a character
- */
- public char getCharacter() {
- return character;
- }
- /**
- * Returns this flag as numerical representation.
- *
- * @return representation as a number
- */
- public int getNumber() {
- return number;
- }
- }
- /**
- * Template to support the conversion into a PERL style regular expression
- */
- private static final String PATTERN_TO_REGEX_TEMPLATE = "/%s/%s";
- /**
- * Pattern for PERL style regular expression strings
- */
- private static final Pattern PERL_STYLE = Pattern.compile("^/.*/((i|m|s|x)*)?$");
- /**
- * Pattern for PERL style regular expression strings with more fault-tolerance to the modifiers
- */
- private static final Pattern PERL_STYLE_TOLERANT = Pattern.compile("^/.*/(([A-z])*)?$");
- /**
- * Converts a given {@code Pattern} into a PERL style regular expression.
- *
- * @param pattern
- * regular expression pattern
- * @return PERL style regular expression as string
- */
- public static String convertPatternToPerlRegex(@Nonnull final Pattern pattern) {
- Check.notNull(pattern, "pattern");
- final String modifiers = Flag.convertToModifiers(Flag.parse(pattern.flags()));
- return String.format(PATTERN_TO_REGEX_TEMPLATE, pattern.pattern(), modifiers);
- }
- /**
- * Converts a PERL style regular expression into Java style.<br>
- * <br>
- * The leading and ending slash and the modifiers will be removed. The modifiers will be translated into equivalents
- * flags of <code>java.util.Pattern</code>. If there are modifiers that are not valid an exception will be thrown.
- *
- * @param regex
- * A PERL style regular expression
- * @return Pattern
- */
- public static Pattern convertPerlRegexToPattern(@Nonnull final String regex) {
- return convertPerlRegexToPattern(regex, false);
- }
- /**
- * Converts a PERL style regular expression into Java style.<br>
- * <br>
- * The leading and ending slash and the modifiers will be removed.
- *
- * @param regex
- * A PERL style regular expression
- * @param faultTolerant
- * Fault-tolerant translating the flags
- * @return Pattern
- */
- public static Pattern convertPerlRegexToPattern(@Nonnull final String regex, @Nonnull final boolean faultTolerant) {
- Check.notNull(regex, "regex");
- String pattern = regex.trim();
- final Matcher matcher = faultTolerant ? PERL_STYLE_TOLERANT.matcher(pattern) : PERL_STYLE.matcher(pattern);
- if (!matcher.matches()) {
- throw new IllegalArgumentException("The given regular expression '" + pattern
- + "' seems to be not in PERL style or has unsupported modifiers.");
- }
- pattern = pattern.substring(1);
- final int lastIndex = pattern.lastIndexOf('/');
- pattern = pattern.substring(0, lastIndex);
- final int flags = Flag.convertToBitmask(Flag.parse(matcher.group(1)));
- return Pattern.compile(pattern, flags);
- }
- /**
- * <strong>Attention:</strong> This class is not intended to create objects from it.
- */
- private RegularExpressionConverter() {
- // This class is not intended to create objects from it.
- }
- }