RegularExpressionConverter.java
/*******************************************************************************
* Copyright 2012 André Rouél
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package net.sf.uadetector.internal.util;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import net.sf.qualitycheck.Check;
public final class RegularExpressionConverter {
public enum Flag {
/**
* Enables canonical equivalence.
*/
CANON_EQ(Pattern.CANON_EQ, 'c'),
/**
* Enables case-insensitive matching.
*/
CASE_INSENSITIVE(Pattern.CASE_INSENSITIVE, 'i'),
/**
* Permits whitespace and comments in pattern.
*/
COMMENTS(Pattern.COMMENTS, 'x'),
/**
* Enables dotall mode.
*/
DOTALL(Pattern.DOTALL, 's'),
/**
* Enables literal parsing of the pattern.
*/
LITERAL(Pattern.LITERAL, 'l'),
/**
* Enables multiline mode.
*/
MULTILINE(Pattern.MULTILINE, 'm'),
/**
* Enables Unicode-aware case folding.
*/
UNICODE_CASE(Pattern.UNICODE_CASE, 'u'),
/**
* Enables Unix lines mode.
*/
UNIX_LINES(Pattern.UNIX_LINES, 'e');
private static class FlagByCharacterComparator extends CompareNullSafe<Flag> {
private static final long serialVersionUID = 1L;
@Override
public int compareType(@Nonnull final Flag f1, @Nonnull final Flag f2) {
final Character c1 = Character.valueOf(f1.getCharacter());
final Character c2 = Character.valueOf(f2.getCharacter());
return c1.compareTo(c2);
}
}
private static final FlagByCharacterComparator FLAG_COMPARATOR = new FlagByCharacterComparator();
/**
* Converts a set of flags as to a bitmask (sum of numerical values).
*
* @param flags
* a set of flags
* @return sum of numerical values of passed flags or 0
*/
public static int convertToBitmask(@Nonnull final Collection<Flag> flags) {
Check.notNull(flags, "flags");
int bitmask = 0;
for (final Flag flag : flags) {
bitmask = bitmask | flag.getNumber();
}
return bitmask;
}
/**
* Converts a set of flags as to a string representation. The flags {@link Flag#CASE_INSENSITIVE},
* {@link Flag#DOTALL}, {@link Flag#MULTILINE} and {@link Flag#COMMENTS} are identical to the PERL regular
* expression modifiers.
*
* @param flags
* a set of flags
* @return sum of numerical values of passed flags or 0
*/
public static String convertToModifiers(@Nonnull final Collection<Flag> flags) {
Check.notNull(flags, "flags");
final StringBuilder modifiers = new StringBuilder(8);
final Set<Flag> sortedFlags = new TreeSet<Flag>(Collections.reverseOrder(FLAG_COMPARATOR));
sortedFlags.addAll(flags);
for (final Flag flag : sortedFlags) {
modifiers.append(flag.getCharacter());
}
return modifiers.toString();
}
/**
* This method try to find a matching enum value by the given character representation. The character will be
* evaluated against the stored character of a flag.
*
* @param flag
* representation of a flag as a character
* @return the matching enum value or {@code null}
* @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
* if the given number is smaller than zero
*/
public static Flag evaluateByCharacter(final char flag) {
Check.notNegative(flag, "flag");
Flag result = null;
for (final Flag value : values()) {
if (value.getCharacter() == flag) {
result = value;
break;
}
}
return result;
}
/**
* This method try to find a matching enum value by the given numerical representation. The number will be
* evaluated against the stored number of a flag.
*
* @param flag
* representation of a flag as a character
* @return the matching enum value or {@code null}
* @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
* if the given number is smaller than zero
*/
public static Flag evaluateByNumber(final int flag) {
Check.notNegative(flag, "flag");
Flag result = null;
for (final Flag value : values()) {
if (value.getNumber() == flag) {
result = value;
break;
}
}
return result;
}
/**
* Parses a sum of flags as numerical values (bitmask) and translates it to set of enum values.
*
* @param bitmask
* Sum of numerical values of flags
* @return a set of flags
* @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
* if the given number is smaller than zero
*/
@Nonnull
public static Set<Flag> parse(@Nonnegative final int bitmask) {
Check.notNegative(bitmask, "bitmask");
final Set<Flag> flags = new HashSet<Flag>();
for (final Flag flag : values()) {
if ((bitmask & flag.getNumber()) != 0) {
flags.add(flag);
}
}
return flags;
}
/**
* Translates PERL style modifiers to a set of {@code Pattern} compatible ones.
*
* @param modifiers
* modifiers as string of a PERL style regular expression
* @return a set of modifier flags that may include CASE_INSENSITIVE, MULTILINE, DOTALL and COMMENTS
*/
public static Set<Flag> parse(@Nonnull final String modifiers) {
Check.notNull(modifiers, "modifiers");
final Set<Flag> flags = new HashSet<Flag>();
for (int i = 0; i < modifiers.length(); i++) {
final Flag flag = Flag.evaluateByCharacter(modifiers.charAt(i));
if (flag != null) {
flags.add(flag);
}
}
return flags;
}
/**
* Representation of a flag as a character
*/
private final char character;
/**
* Representation of a flag as a number
*/
private final int number;
private Flag(final int value, final char character) {
number = value;
this.character = character;
}
/**
* Returns this flag as character representation.
*
* @return representation as a character
*/
public char getCharacter() {
return character;
}
/**
* Returns this flag as numerical representation.
*
* @return representation as a number
*/
public int getNumber() {
return number;
}
}
/**
* Template to support the conversion into a PERL style regular expression
*/
private static final String PATTERN_TO_REGEX_TEMPLATE = "/%s/%s";
/**
* Pattern for PERL style regular expression strings
*/
private static final Pattern PERL_STYLE = Pattern.compile("^/.*/((i|m|s|x)*)?$");
/**
* Pattern for PERL style regular expression strings with more fault-tolerance to the modifiers
*/
private static final Pattern PERL_STYLE_TOLERANT = Pattern.compile("^/.*/(([A-z])*)?$");
/**
* Converts a given {@code Pattern} into a PERL style regular expression.
*
* @param pattern
* regular expression pattern
* @return PERL style regular expression as string
*/
public static String convertPatternToPerlRegex(@Nonnull final Pattern pattern) {
Check.notNull(pattern, "pattern");
final String modifiers = Flag.convertToModifiers(Flag.parse(pattern.flags()));
return String.format(PATTERN_TO_REGEX_TEMPLATE, pattern.pattern(), modifiers);
}
/**
* Converts a PERL style regular expression into Java style.<br>
* <br>
* The leading and ending slash and the modifiers will be removed. The modifiers will be translated into equivalents
* flags of <code>java.util.Pattern</code>. If there are modifiers that are not valid an exception will be thrown.
*
* @param regex
* A PERL style regular expression
* @return Pattern
*/
public static Pattern convertPerlRegexToPattern(@Nonnull final String regex) {
return convertPerlRegexToPattern(regex, false);
}
/**
* Converts a PERL style regular expression into Java style.<br>
* <br>
* The leading and ending slash and the modifiers will be removed.
*
* @param regex
* A PERL style regular expression
* @param faultTolerant
* Fault-tolerant translating the flags
* @return Pattern
*/
public static Pattern convertPerlRegexToPattern(@Nonnull final String regex, @Nonnull final boolean faultTolerant) {
Check.notNull(regex, "regex");
String pattern = regex.trim();
final Matcher matcher = faultTolerant ? PERL_STYLE_TOLERANT.matcher(pattern) : PERL_STYLE.matcher(pattern);
if (!matcher.matches()) {
throw new IllegalArgumentException("The given regular expression '" + pattern
+ "' seems to be not in PERL style or has unsupported modifiers.");
}
pattern = pattern.substring(1);
final int lastIndex = pattern.lastIndexOf('/');
pattern = pattern.substring(0, lastIndex);
final int flags = Flag.convertToBitmask(Flag.parse(matcher.group(1)));
return Pattern.compile(pattern, flags);
}
/**
* <strong>Attention:</strong> This class is not intended to create objects from it.
*/
private RegularExpressionConverter() {
// This class is not intended to create objects from it.
}
}