XmlDataHandler.java
- /*******************************************************************************
- * Copyright 2012 André Rouél
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
- package net.sf.uadetector.internal.data;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import javax.annotation.Nonnull;
- import net.sf.qualitycheck.Check;
- import net.sf.uadetector.internal.data.domain.Browser;
- import net.sf.uadetector.internal.data.domain.BrowserOperatingSystemMapping;
- import net.sf.uadetector.internal.data.domain.BrowserPattern;
- import net.sf.uadetector.internal.data.domain.BrowserType;
- import net.sf.uadetector.internal.data.domain.Device;
- import net.sf.uadetector.internal.data.domain.DevicePattern;
- import net.sf.uadetector.internal.data.domain.OperatingSystem;
- import net.sf.uadetector.internal.data.domain.OperatingSystemPattern;
- import net.sf.uadetector.internal.data.domain.Robot;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import org.xml.sax.Attributes;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
- import org.xml.sax.SAXParseException;
- import org.xml.sax.helpers.DefaultHandler;
- public final class XmlDataHandler extends DefaultHandler {
- public enum Tag {
- /**
- * Tag name of a browser entry
- */
- BROWSER("browser"),
- /**
- * Tag name of the ID of an browser pattern
- */
- BROWSER_ID("browser_id"),
- /**
- * Tag name of the informational URL of a browser entry
- */
- BROWSER_INFO_URL("browser_info_url"),
- /**
- * Tag name of a mapping entry between a browser and an operating system
- */
- BROWSER_OS_MAPPING("browser_os"),
- /**
- * Tag name of a browser pattern
- */
- BROWSER_PATTERN("browser_reg"),
- /**
- * Tag name of a browser type entry
- */
- BROWSER_TYPE("browser_type"),
- /**
- * Tag name of the type ID of a browser
- */
- BROWSER_TYPE_ID("type"),
- /**
- * Tag name of a producer company of an user agent
- */
- COMPANY("company"),
- /**
- * Tag name of the URL of a producer company from an user agent
- */
- COMPANY_URL("url_company"),
- /**
- * Tag name of a device
- */
- DEVICE("device"),
- /**
- * Tag name of a device pattern
- */
- DEVICE_ID("device_id"),
- /**
- * Tag name of the informational URL of an device entry
- */
- DEVICE_INFO_URL("device_info_url"),
- /**
- * Tag name of a device pattern
- */
- DEVICE_PATTERN("device_reg"),
- /**
- * Tag name of all devices
- */
- DEVICES("devices"),
- /**
- * Tag name of all device patterns
- */
- DEVICES_PATTERN("devices_reg"),
- /**
- * Tag name of an family of an user agent
- */
- FAMILY("family"),
- /**
- * Tag name of the icon of an entry
- */
- ICON("icon"),
- /**
- * Tag name of an ID of an user agent
- */
- ID("id"),
- /**
- * Tag name of the product name of an user agent
- */
- NAME("name"),
- /**
- * Tag name of an operating system entry
- */
- OPERATING_SYSTEM("os"),
- /**
- * Tag name of the ID of an operating system pattern
- */
- OPERATING_SYSTEM_ID("os_id"),
- /**
- * Tag name of the informational URL of an operating system entry
- */
- OPERATING_SYSTEM_INFO_URL("os_info_url"),
- /**
- * Tag name of an operating system pattern
- */
- OPERATING_SYSTEM_PATTERN("operating_system_reg"),
- /**
- * Tag name of the order of an user agent pattern
- */
- PATTERN_ORDER("order"),
- /**
- * Tag name of the regular expression of an user agent pattern
- */
- PATTERN_REGEX("regstring"),
- /**
- * Tag name of a robot entry
- */
- ROBOT("robot"),
- /**
- * Tag name of the informational URL of a robot entry
- */
- ROBOT_INFO_URL("bot_info_url"),
- /**
- * Tag name of the product URL of an user agent
- */
- URL("url"),
- /**
- * Tag name of an user agent string of a robot entry
- */
- USERAGENT("useragent"),
- /**
- * Tag name of the data version
- */
- VERSION("version");
- public static Tag evaluate(@Nonnull final String tagName) {
- Check.notNull(tagName, "tagName");
- Tag result = null;
- for (final Tag tag : values()) {
- if (tag.getTagName().equalsIgnoreCase(tagName)) {
- result = tag;
- break;
- }
- }
- return result;
- }
- public static boolean isBrowserOsMappingTag(final String tagName) {
- return BROWSER_OS_MAPPING.getTagName().equalsIgnoreCase(tagName);
- }
- public static boolean isBrowserPatternTag(final String tagName) {
- return BROWSER_PATTERN.getTagName().equalsIgnoreCase(tagName);
- }
- public static boolean isBrowserTag(final String tagName) {
- return BROWSER.getTagName().equalsIgnoreCase(tagName);
- }
- public static boolean isBrowserTypeTag(final String tagName) {
- return BROWSER_TYPE.getTagName().equalsIgnoreCase(tagName);
- }
- public static boolean isDevicePatternTag(final String tagName) {
- return DEVICE_PATTERN.getTagName().equalsIgnoreCase(tagName);
- }
- public static boolean isDeviceTag(final String tagName) {
- return DEVICE.getTagName().equalsIgnoreCase(tagName);
- }
- public static boolean isIdTag(final String tagName) {
- return ID.getTagName().equalsIgnoreCase(tagName);
- }
- public static boolean isOperatingSystemPatternTag(final String tagName) {
- return OPERATING_SYSTEM_PATTERN.getTagName().equalsIgnoreCase(tagName);
- }
- public static boolean isOperatingSystemTag(final String tagName) {
- return OPERATING_SYSTEM.getTagName().equalsIgnoreCase(tagName);
- }
- public static boolean isRobotTag(final String tagName) {
- return ROBOT.getTagName().equalsIgnoreCase(tagName);
- }
- @Nonnull
- private String tagName;
- private Tag(@Nonnull final String tagName) {
- this.tagName = tagName;
- }
- @Nonnull
- public String getTagName() {
- return tagName;
- }
- }
- /**
- * Character set to read the internal Document Type Definition (DTD) of UAS data
- */
- private static final String CHARSET = "UTF-8";
- /**
- * Corresponding logger for this class
- */
- private static final Logger LOG = LoggerFactory.getLogger(XmlDataHandler.class);
- /**
- * Path to the internal Document Type Definition (DTD) of UAS data files to be able to work completely offline
- */
- protected static final String UASDATA_DEF = "uadetector/uasxmldata.dtd";
- /**
- * URL to the Document Type Definition (DTD) of UAS data files
- */
- protected static final String UASDATA_DEF_URL = "http://user-agent-string.info/rpc/uasxmldata.dtd";
- /**
- * Logs an issue while parsing XML.
- *
- * @param prefix
- * log level as string to add at the beginning of the message
- * @param e
- * exception to log
- */
- protected static void logParsingIssue(final String prefix, final SAXParseException e) {
- final StringBuilder buffer = new StringBuilder();
- buffer.append(prefix);
- buffer.append(" while reading UAS data: ");
- buffer.append(e.getMessage());
- buffer.append(" (line: ");
- buffer.append(e.getLineNumber());
- if (e.getSystemId() != null) {
- buffer.append(" uri: ");
- buffer.append(e.getSystemId());
- }
- buffer.append(")");
- LOG.warn(buffer.toString());
- }
- private Browser.Builder browserBuilder = new Browser.Builder();
- private Device.Builder deviceBuilder = new Device.Builder();
- private BrowserOperatingSystemMapping.Builder browserOsMappingBuilder = new BrowserOperatingSystemMapping.Builder();
- private BrowserPattern.Builder browserPatternBuilder = new BrowserPattern.Builder();
- private DevicePattern.Builder devicePatternBuilder = new DevicePattern.Builder();
- private BrowserType.Builder browserTypeBuilder = new BrowserType.Builder();
- private StringBuilder buffer = new StringBuilder();
- private Tag currentTag = null;
- @Nonnull
- private final DataBuilder dataBuilder;
- /**
- * Flag to note that a fatal error occurred while parsing the document
- */
- private boolean error = false;
- private boolean isBrowser = false;
- private boolean isBrowserOsMapping = false;
- private boolean isBrowserPattern = false;
- private boolean isBrowserType = false;
- private boolean isDevice = false;
- private boolean isDevicePattern = false;
- private boolean isOperatingSystem = false;
- private boolean isOperatingSystemPattern = false;
- private boolean isRobot = false;
- private OperatingSystem.Builder operatingSystemBuilder = new OperatingSystem.Builder();
- private OperatingSystemPattern.Builder operatingSystemPatternBuilder = new OperatingSystemPattern.Builder();
- private Robot.Builder robotBuilder = new Robot.Builder();
- /**
- * Flag to note that a warning occurred while parsing the document
- */
- private boolean warning = false;
- public XmlDataHandler(@Nonnull final DataBuilder builder) {
- Check.notNull(builder, "builder");
- dataBuilder = builder;
- }
- private void addToBrowserBuilder() {
- if (isBrowser) {
- if (currentTag == Tag.ID) {
- browserBuilder.setId(buffer.toString());
- } else if (currentTag == Tag.BROWSER_TYPE_ID) {
- browserBuilder.setTypeId(buffer.toString());
- } else if (currentTag == Tag.NAME) {
- browserBuilder.setFamilyName(buffer.toString());
- } else if (currentTag == Tag.URL) {
- browserBuilder.setUrl(buffer.toString());
- } else if (currentTag == Tag.COMPANY) {
- browserBuilder.setProducer(buffer.toString());
- } else if (currentTag == Tag.COMPANY_URL) {
- browserBuilder.setProducerUrl(buffer.toString());
- } else if (currentTag == Tag.ICON) {
- browserBuilder.setIcon(buffer.toString());
- } else if (currentTag == Tag.BROWSER_INFO_URL) {
- browserBuilder.setInfoUrl(buffer.toString());
- }
- }
- }
- private void addToBrowserOsMappingBuilder() {
- if (isBrowserOsMapping && currentTag == Tag.BROWSER_ID) {
- browserOsMappingBuilder.setBrowserId(buffer.toString());
- } else if (isBrowserOsMapping && currentTag == Tag.OPERATING_SYSTEM_ID) {
- browserOsMappingBuilder.setOperatingSystemId(buffer.toString());
- }
- }
- private void addToBrowserPatternBuilder() {
- if (isBrowserPattern && currentTag == Tag.PATTERN_ORDER) {
- browserPatternBuilder.setPosition(buffer.toString());
- } else if (isBrowserPattern && currentTag == Tag.BROWSER_ID) {
- browserPatternBuilder.setId(buffer.toString());
- } else if (isBrowserPattern && currentTag == Tag.PATTERN_REGEX) {
- browserPatternBuilder.setPerlRegularExpression(buffer.toString());
- }
- }
- private void addToBrowserTypeBuilder() {
- if (isBrowserType && currentTag == Tag.ID) {
- browserTypeBuilder.setId(buffer.toString());
- } else if (isBrowserType && currentTag == Tag.BROWSER_TYPE_ID) {
- browserTypeBuilder.setName(buffer.toString());
- }
- }
- private void addToDeviceBuilder() {
- if (isDevice) {
- if (currentTag == Tag.ID) {
- deviceBuilder.setId(buffer.toString());
- } else if (currentTag == Tag.NAME) {
- deviceBuilder.setName(buffer.toString());
- } else if (currentTag == Tag.ICON) {
- deviceBuilder.setIcon(buffer.toString());
- } else if (currentTag == Tag.DEVICE_INFO_URL) {
- deviceBuilder.setInfoUrl(buffer.toString());
- }
- }
- }
- private void addToDevicePatternBuilder() {
- if (isDevicePattern && currentTag == Tag.PATTERN_ORDER) {
- devicePatternBuilder.setPosition(buffer.toString());
- } else if (isDevicePattern && currentTag == Tag.DEVICE_ID) {
- devicePatternBuilder.setId(buffer.toString());
- } else if (isDevicePattern && currentTag == Tag.PATTERN_REGEX) {
- devicePatternBuilder.setPerlRegularExpression(buffer.toString());
- }
- }
- private void addToOperatingSystemBuilder() {
- if (isOperatingSystem) {
- if (currentTag == Tag.ID) {
- operatingSystemBuilder.setId(buffer.toString());
- } else if (currentTag == Tag.FAMILY) {
- operatingSystemBuilder.setFamily(buffer.toString());
- } else if (currentTag == Tag.NAME) {
- operatingSystemBuilder.setName(buffer.toString());
- } else if (currentTag == Tag.URL) {
- operatingSystemBuilder.setUrl(buffer.toString());
- } else if (currentTag == Tag.COMPANY) {
- operatingSystemBuilder.setProducer(buffer.toString());
- } else if (currentTag == Tag.COMPANY_URL) {
- operatingSystemBuilder.setProducerUrl(buffer.toString());
- } else if (currentTag == Tag.ICON) {
- operatingSystemBuilder.setIcon(buffer.toString());
- } else if (currentTag == Tag.OPERATING_SYSTEM_INFO_URL) {
- operatingSystemBuilder.setInfoUrl(buffer.toString());
- }
- }
- }
- private void addToOperatingSystemPatternBuilder() {
- if (isOperatingSystemPattern) {
- if (currentTag == Tag.PATTERN_ORDER) {
- operatingSystemPatternBuilder.setPosition(buffer.toString());
- } else if (currentTag == Tag.OPERATING_SYSTEM_ID) {
- operatingSystemPatternBuilder.setId(buffer.toString());
- } else if (currentTag == Tag.PATTERN_REGEX) {
- operatingSystemPatternBuilder.setPerlRegularExpression(buffer.toString());
- }
- }
- }
- private void addToRobotBuilder() {
- if (isRobot) {
- if (currentTag == Tag.ID) {
- robotBuilder.setId(buffer.toString());
- } else if (currentTag == Tag.USERAGENT) {
- robotBuilder.setUserAgentString(buffer.toString());
- } else if (currentTag == Tag.FAMILY) {
- robotBuilder.setFamilyName(buffer.toString());
- } else if (currentTag == Tag.NAME) {
- robotBuilder.setName(buffer.toString());
- } else if (currentTag == Tag.COMPANY) {
- robotBuilder.setProducer(buffer.toString());
- } else if (currentTag == Tag.COMPANY_URL) {
- robotBuilder.setProducerUrl(buffer.toString());
- } else if (currentTag == Tag.ICON) {
- robotBuilder.setIcon(buffer.toString());
- } else if (currentTag == Tag.ROBOT_INFO_URL) {
- robotBuilder.setInfoUrl(buffer.toString());
- }
- }
- }
- @Override
- public void characters(final char ch[], final int start, final int length) throws SAXException {
- buffer.append(new String(ch, start, length));
- }
- @Override
- public void endElement(final String uri, final String localName, final String tagName) throws SAXException {
- transferToSpecificBuilderAndReset();
- if (Tag.isRobotTag(tagName)) {
- saveAndResetRobotBuilder();
- isRobot = false;
- } else if (Tag.isBrowserTag(tagName)) {
- saveAndResetBrowserBuilder();
- isBrowser = false;
- } else if (Tag.isOperatingSystemTag(tagName)) {
- saveAndResetOperatingSystemBuilder();
- isOperatingSystem = false;
- } else if (Tag.isBrowserTypeTag(tagName)) {
- saveAndResetBrowserTypeBuilder();
- isBrowserType = false;
- } else if (Tag.isBrowserPatternTag(tagName)) {
- saveAndResetBrowserPatternBuilder();
- isBrowserPattern = false;
- } else if (Tag.isBrowserOsMappingTag(tagName)) {
- saveAndResetBrowserOperatingSystemMapping();
- isBrowserOsMapping = false;
- } else if (Tag.isOperatingSystemPatternTag(tagName)) {
- saveAndResetOperatingSystemPatternBuilder();
- isOperatingSystemPattern = false;
- } else if (Tag.isDeviceTag(tagName)) {
- saveAndResetDeviceBuilder();
- isDevice = false;
- } else if (Tag.isDevicePatternTag(tagName)) {
- saveAndResetDevicePatternBuilder();
- isDevicePattern = false;
- }
- currentTag = null;
- }
- @Override
- public void error(final SAXParseException e) throws SAXException {
- error = true;
- logParsingIssue("Error", e);
- super.fatalError(e);
- }
- @Override
- public void fatalError(final SAXParseException e) throws SAXException {
- logParsingIssue("Fatal error", e);
- // this call throws a SAXException
- super.fatalError(e);
- }
- /**
- * Gets the flag whether an error occurred while parsing the document.
- *
- * @return {@code true} if an error occurred otherwise {@code false}
- */
- public boolean hasError() {
- return error;
- }
- /**
- * Gets the flag whether an warning occurred while parsing the document.
- *
- * @return {@code true} if an warning occurred otherwise {@code false}
- */
- public boolean hasWarning() {
- return warning;
- }
- @Override
- public InputSource resolveEntity(final String publicId, final String systemId) throws IOException, SAXException {
- if (UASDATA_DEF_URL.equals(systemId)) {
- final InputStream stream = this.getClass().getClassLoader().getResourceAsStream(UASDATA_DEF);
- return new InputSource(new InputStreamReader(stream, CHARSET));
- }
- throw new SAXException("unable to resolve remote entity, systemId = " + systemId);
- }
- private void saveAndResetBrowserBuilder() {
- dataBuilder.appendBrowserBuilder(browserBuilder);
- browserBuilder = new Browser.Builder();
- }
- private void saveAndResetBrowserOperatingSystemMapping() {
- dataBuilder.appendBrowserOperatingSystemMapping(browserOsMappingBuilder.build());
- browserOsMappingBuilder = new BrowserOperatingSystemMapping.Builder();
- }
- private void saveAndResetBrowserPatternBuilder() {
- try {
- dataBuilder.appendBrowserPattern(browserPatternBuilder.build());
- } catch (final IllegalArgumentException e) {
- LOG.warn("Can not append browser pattern: " + e.getLocalizedMessage());
- }
- browserPatternBuilder = new BrowserPattern.Builder();
- }
- private void saveAndResetBrowserTypeBuilder() {
- dataBuilder.appendBrowserType(browserTypeBuilder.build());
- browserTypeBuilder = new BrowserType.Builder();
- }
- private void saveAndResetDeviceBuilder() {
- dataBuilder.appendDeviceBuilder(deviceBuilder);
- deviceBuilder = new Device.Builder();
- }
- private void saveAndResetDevicePatternBuilder() {
- try {
- dataBuilder.appendDevicePattern(devicePatternBuilder.build());
- } catch (final IllegalArgumentException e) {
- LOG.warn("Can not append device pattern: " + e.getLocalizedMessage());
- }
- devicePatternBuilder = new DevicePattern.Builder();
- }
- private void saveAndResetOperatingSystemBuilder() {
- dataBuilder.appendOperatingSystemBuilder(operatingSystemBuilder);
- operatingSystemBuilder = new OperatingSystem.Builder();
- }
- private void saveAndResetOperatingSystemPatternBuilder() {
- try {
- dataBuilder.appendOperatingSystemPattern(operatingSystemPatternBuilder.build());
- } catch (final IllegalArgumentException e) {
- LOG.warn("Can not append OS pattern: " + e.getLocalizedMessage());
- }
- operatingSystemPatternBuilder = new OperatingSystemPattern.Builder();
- }
- private void saveAndResetRobotBuilder() {
- dataBuilder.appendRobot(robotBuilder.build());
- robotBuilder = new Robot.Builder();
- }
- @Override
- public void startElement(final String uri, final String localName, final String tagName, final Attributes attributes)
- throws SAXException {
- if (Tag.isRobotTag(tagName)) {
- isRobot = true;
- } else if (Tag.isBrowserTag(tagName)) {
- isBrowser = true;
- } else if (Tag.isOperatingSystemTag(tagName)) {
- isOperatingSystem = true;
- } else if (Tag.isBrowserTypeTag(tagName)) {
- isBrowserType = true;
- } else if (Tag.isBrowserPatternTag(tagName)) {
- isBrowserPattern = true;
- } else if (Tag.isBrowserOsMappingTag(tagName)) {
- isBrowserOsMapping = true;
- } else if (Tag.isOperatingSystemPatternTag(tagName)) {
- isOperatingSystemPattern = true;
- } else if (Tag.isDeviceTag(tagName)) {
- isDevice = true;
- } else if (Tag.isDevicePatternTag(tagName)) {
- isDevicePattern = true;
- }
- currentTag = Tag.evaluate(tagName);
- }
- /**
- * Transfers all characters of a specific tag to the corresponding builder and resets the string buffer.
- */
- private void transferToSpecificBuilderAndReset() {
- // version
- if (currentTag == Tag.VERSION) {
- dataBuilder.setVersion(buffer.toString());
- }
- // robot browser
- addToRobotBuilder();
- // build browser
- addToBrowserBuilder();
- // build operating system
- addToOperatingSystemBuilder();
- // build browser pattern
- addToBrowserPatternBuilder();
- // build browser type
- addToBrowserTypeBuilder();
- // build browser to operating system mapping
- addToBrowserOsMappingBuilder();
- // build operating system pattern
- addToOperatingSystemPatternBuilder();
- // build browser
- addToDeviceBuilder();
- // build browser pattern
- addToDevicePatternBuilder();
- buffer = new StringBuilder();
- }
- @Override
- public void warning(final SAXParseException e) throws SAXException {
- warning = true;
- logParsingIssue("Warning", e);
- super.warning(e);
- }
- }