RegularExpressionConverter.java

/*******************************************************************************
 * Copyright 2012 André Rouél
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package net.sf.uadetector.internal.util;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;

import net.sf.qualitycheck.Check;

public final class RegularExpressionConverter {

	public enum Flag {

		/**
		 * Enables canonical equivalence.
		 */
		CANON_EQ(Pattern.CANON_EQ, 'c'),

		/**
		 * Enables case-insensitive matching.
		 */
		CASE_INSENSITIVE(Pattern.CASE_INSENSITIVE, 'i'),

		/**
		 * Permits whitespace and comments in pattern.
		 */
		COMMENTS(Pattern.COMMENTS, 'x'),

		/**
		 * Enables dotall mode.
		 */
		DOTALL(Pattern.DOTALL, 's'),

		/**
		 * Enables literal parsing of the pattern.
		 */
		LITERAL(Pattern.LITERAL, 'l'),

		/**
		 * Enables multiline mode.
		 */
		MULTILINE(Pattern.MULTILINE, 'm'),

		/**
		 * Enables Unicode-aware case folding.
		 */
		UNICODE_CASE(Pattern.UNICODE_CASE, 'u'),

		/**
		 * Enables Unix lines mode.
		 */
		UNIX_LINES(Pattern.UNIX_LINES, 'e');

		private static class FlagByCharacterComparator extends CompareNullSafe<Flag> {
			private static final long serialVersionUID = 1L;

			@Override
			public int compareType(@Nonnull final Flag f1, @Nonnull final Flag f2) {
				final Character c1 = Character.valueOf(f1.getCharacter());
				final Character c2 = Character.valueOf(f2.getCharacter());
				return c1.compareTo(c2);
			}
		}

		private static final FlagByCharacterComparator FLAG_COMPARATOR = new FlagByCharacterComparator();

		/**
		 * Converts a set of flags as to a bitmask (sum of numerical values).
		 * 
		 * @param flags
		 *            a set of flags
		 * @return sum of numerical values of passed flags or 0
		 */
		public static int convertToBitmask(@Nonnull final Collection<Flag> flags) {
			Check.notNull(flags, "flags");

			int bitmask = 0;
			for (final Flag flag : flags) {
				bitmask = bitmask | flag.getNumber();
			}
			return bitmask;
		}

		/**
		 * Converts a set of flags as to a string representation. The flags {@link Flag#CASE_INSENSITIVE},
		 * {@link Flag#DOTALL}, {@link Flag#MULTILINE} and {@link Flag#COMMENTS} are identical to the PERL regular
		 * expression modifiers.
		 * 
		 * @param flags
		 *            a set of flags
		 * @return sum of numerical values of passed flags or 0
		 */
		public static String convertToModifiers(@Nonnull final Collection<Flag> flags) {
			Check.notNull(flags, "flags");

			final StringBuilder modifiers = new StringBuilder(8);
			final Set<Flag> sortedFlags = new TreeSet<Flag>(Collections.reverseOrder(FLAG_COMPARATOR));
			sortedFlags.addAll(flags);
			for (final Flag flag : sortedFlags) {
				modifiers.append(flag.getCharacter());
			}
			return modifiers.toString();
		}

		/**
		 * This method try to find a matching enum value by the given character representation. The character will be
		 * evaluated against the stored character of a flag.
		 * 
		 * @param flag
		 *            representation of a flag as a character
		 * @return the matching enum value or {@code null}
		 * @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
		 *             if the given number is smaller than zero
		 */
		public static Flag evaluateByCharacter(final char flag) {
			Check.notNegative(flag, "flag");
			Flag result = null;
			for (final Flag value : values()) {
				if (value.getCharacter() == flag) {
					result = value;
					break;
				}
			}
			return result;
		}

		/**
		 * This method try to find a matching enum value by the given numerical representation. The number will be
		 * evaluated against the stored number of a flag.
		 * 
		 * @param flag
		 *            representation of a flag as a character
		 * @return the matching enum value or {@code null}
		 * @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
		 *             if the given number is smaller than zero
		 */
		public static Flag evaluateByNumber(final int flag) {
			Check.notNegative(flag, "flag");
			Flag result = null;
			for (final Flag value : values()) {
				if (value.getNumber() == flag) {
					result = value;
					break;
				}
			}
			return result;
		}

		/**
		 * Parses a sum of flags as numerical values (bitmask) and translates it to set of enum values.
		 * 
		 * @param bitmask
		 *            Sum of numerical values of flags
		 * @return a set of flags
		 * @throws net.sf.qualitycheck.exception.IllegalNegativeArgumentException
		 *             if the given number is smaller than zero
		 */
		@Nonnull
		public static Set<Flag> parse(@Nonnegative final int bitmask) {
			Check.notNegative(bitmask, "bitmask");

			final Set<Flag> flags = new HashSet<Flag>();
			for (final Flag flag : values()) {
				if ((bitmask & flag.getNumber()) != 0) {
					flags.add(flag);
				}
			}
			return flags;
		}

		/**
		 * Translates PERL style modifiers to a set of {@code Pattern} compatible ones.
		 * 
		 * @param modifiers
		 *            modifiers as string of a PERL style regular expression
		 * @return a set of modifier flags that may include CASE_INSENSITIVE, MULTILINE, DOTALL and COMMENTS
		 */
		public static Set<Flag> parse(@Nonnull final String modifiers) {
			Check.notNull(modifiers, "modifiers");

			final Set<Flag> flags = new HashSet<Flag>();
			for (int i = 0; i < modifiers.length(); i++) {
				final Flag flag = Flag.evaluateByCharacter(modifiers.charAt(i));
				if (flag != null) {
					flags.add(flag);
				}
			}
			return flags;
		}

		/**
		 * Representation of a flag as a character
		 */
		private final char character;

		/**
		 * Representation of a flag as a number
		 */
		private final int number;

		private Flag(final int value, final char character) {
			number = value;
			this.character = character;
		}

		/**
		 * Returns this flag as character representation.
		 * 
		 * @return representation as a character
		 */
		public char getCharacter() {
			return character;
		}

		/**
		 * Returns this flag as numerical representation.
		 * 
		 * @return representation as a number
		 */
		public int getNumber() {
			return number;
		}

	}

	/**
	 * Template to support the conversion into a PERL style regular expression
	 */
	private static final String PATTERN_TO_REGEX_TEMPLATE = "/%s/%s";

	/**
	 * Pattern for PERL style regular expression strings
	 */
	private static final Pattern PERL_STYLE = Pattern.compile("^/.*/((i|m|s|x)*)?$");

	/**
	 * Pattern for PERL style regular expression strings with more fault-tolerance to the modifiers
	 */
	private static final Pattern PERL_STYLE_TOLERANT = Pattern.compile("^/.*/(([A-z])*)?$");

	/**
	 * Converts a given {@code Pattern} into a PERL style regular expression.
	 * 
	 * @param pattern
	 *            regular expression pattern
	 * @return PERL style regular expression as string
	 */
	public static String convertPatternToPerlRegex(@Nonnull final Pattern pattern) {
		Check.notNull(pattern, "pattern");
		final String modifiers = Flag.convertToModifiers(Flag.parse(pattern.flags()));
		return String.format(PATTERN_TO_REGEX_TEMPLATE, pattern.pattern(), modifiers);
	}

	/**
	 * Converts a PERL style regular expression into Java style.<br>
	 * <br>
	 * The leading and ending slash and the modifiers will be removed. The modifiers will be translated into equivalents
	 * flags of <code>java.util.Pattern</code>. If there are modifiers that are not valid an exception will be thrown.
	 * 
	 * @param regex
	 *            A PERL style regular expression
	 * @return Pattern
	 */
	public static Pattern convertPerlRegexToPattern(@Nonnull final String regex) {
		return convertPerlRegexToPattern(regex, false);
	}

	/**
	 * Converts a PERL style regular expression into Java style.<br>
	 * <br>
	 * The leading and ending slash and the modifiers will be removed.
	 * 
	 * @param regex
	 *            A PERL style regular expression
	 * @param faultTolerant
	 *            Fault-tolerant translating the flags
	 * @return Pattern
	 */
	public static Pattern convertPerlRegexToPattern(@Nonnull final String regex, @Nonnull final boolean faultTolerant) {
		Check.notNull(regex, "regex");

		String pattern = regex.trim();
		final Matcher matcher = faultTolerant ? PERL_STYLE_TOLERANT.matcher(pattern) : PERL_STYLE.matcher(pattern);
		if (!matcher.matches()) {
			throw new IllegalArgumentException("The given regular expression '" + pattern
					+ "' seems to be not in PERL style or has unsupported modifiers.");
		}

		pattern = pattern.substring(1);
		final int lastIndex = pattern.lastIndexOf('/');
		pattern = pattern.substring(0, lastIndex);

		final int flags = Flag.convertToBitmask(Flag.parse(matcher.group(1)));
		return Pattern.compile(pattern, flags);
	}

	/**
	 * <strong>Attention:</strong> This class is not intended to create objects from it.
	 */
	private RegularExpressionConverter() {
		// This class is not intended to create objects from it.
	}

}