XmlDataHandler.java
/*******************************************************************************
* Copyright 2012 André Rouél
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package net.sf.uadetector.internal.data;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import javax.annotation.Nonnull;
import net.sf.qualitycheck.Check;
import net.sf.uadetector.internal.data.domain.Browser;
import net.sf.uadetector.internal.data.domain.BrowserOperatingSystemMapping;
import net.sf.uadetector.internal.data.domain.BrowserPattern;
import net.sf.uadetector.internal.data.domain.BrowserType;
import net.sf.uadetector.internal.data.domain.Device;
import net.sf.uadetector.internal.data.domain.DevicePattern;
import net.sf.uadetector.internal.data.domain.OperatingSystem;
import net.sf.uadetector.internal.data.domain.OperatingSystemPattern;
import net.sf.uadetector.internal.data.domain.Robot;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
public final class XmlDataHandler extends DefaultHandler {
public enum Tag {
/**
* Tag name of a browser entry
*/
BROWSER("browser"),
/**
* Tag name of the ID of an browser pattern
*/
BROWSER_ID("browser_id"),
/**
* Tag name of the informational URL of a browser entry
*/
BROWSER_INFO_URL("browser_info_url"),
/**
* Tag name of a mapping entry between a browser and an operating system
*/
BROWSER_OS_MAPPING("browser_os"),
/**
* Tag name of a browser pattern
*/
BROWSER_PATTERN("browser_reg"),
/**
* Tag name of a browser type entry
*/
BROWSER_TYPE("browser_type"),
/**
* Tag name of the type ID of a browser
*/
BROWSER_TYPE_ID("type"),
/**
* Tag name of a producer company of an user agent
*/
COMPANY("company"),
/**
* Tag name of the URL of a producer company from an user agent
*/
COMPANY_URL("url_company"),
/**
* Tag name of a device
*/
DEVICE("device"),
/**
* Tag name of a device pattern
*/
DEVICE_ID("device_id"),
/**
* Tag name of the informational URL of an device entry
*/
DEVICE_INFO_URL("device_info_url"),
/**
* Tag name of a device pattern
*/
DEVICE_PATTERN("device_reg"),
/**
* Tag name of all devices
*/
DEVICES("devices"),
/**
* Tag name of all device patterns
*/
DEVICES_PATTERN("devices_reg"),
/**
* Tag name of an family of an user agent
*/
FAMILY("family"),
/**
* Tag name of the icon of an entry
*/
ICON("icon"),
/**
* Tag name of an ID of an user agent
*/
ID("id"),
/**
* Tag name of the product name of an user agent
*/
NAME("name"),
/**
* Tag name of an operating system entry
*/
OPERATING_SYSTEM("os"),
/**
* Tag name of the ID of an operating system pattern
*/
OPERATING_SYSTEM_ID("os_id"),
/**
* Tag name of the informational URL of an operating system entry
*/
OPERATING_SYSTEM_INFO_URL("os_info_url"),
/**
* Tag name of an operating system pattern
*/
OPERATING_SYSTEM_PATTERN("operating_system_reg"),
/**
* Tag name of the order of an user agent pattern
*/
PATTERN_ORDER("order"),
/**
* Tag name of the regular expression of an user agent pattern
*/
PATTERN_REGEX("regstring"),
/**
* Tag name of a robot entry
*/
ROBOT("robot"),
/**
* Tag name of the informational URL of a robot entry
*/
ROBOT_INFO_URL("bot_info_url"),
/**
* Tag name of the product URL of an user agent
*/
URL("url"),
/**
* Tag name of an user agent string of a robot entry
*/
USERAGENT("useragent"),
/**
* Tag name of the data version
*/
VERSION("version");
public static Tag evaluate(@Nonnull final String tagName) {
Check.notNull(tagName, "tagName");
Tag result = null;
for (final Tag tag : values()) {
if (tag.getTagName().equalsIgnoreCase(tagName)) {
result = tag;
break;
}
}
return result;
}
public static boolean isBrowserOsMappingTag(final String tagName) {
return BROWSER_OS_MAPPING.getTagName().equalsIgnoreCase(tagName);
}
public static boolean isBrowserPatternTag(final String tagName) {
return BROWSER_PATTERN.getTagName().equalsIgnoreCase(tagName);
}
public static boolean isBrowserTag(final String tagName) {
return BROWSER.getTagName().equalsIgnoreCase(tagName);
}
public static boolean isBrowserTypeTag(final String tagName) {
return BROWSER_TYPE.getTagName().equalsIgnoreCase(tagName);
}
public static boolean isDevicePatternTag(final String tagName) {
return DEVICE_PATTERN.getTagName().equalsIgnoreCase(tagName);
}
public static boolean isDeviceTag(final String tagName) {
return DEVICE.getTagName().equalsIgnoreCase(tagName);
}
public static boolean isIdTag(final String tagName) {
return ID.getTagName().equalsIgnoreCase(tagName);
}
public static boolean isOperatingSystemPatternTag(final String tagName) {
return OPERATING_SYSTEM_PATTERN.getTagName().equalsIgnoreCase(tagName);
}
public static boolean isOperatingSystemTag(final String tagName) {
return OPERATING_SYSTEM.getTagName().equalsIgnoreCase(tagName);
}
public static boolean isRobotTag(final String tagName) {
return ROBOT.getTagName().equalsIgnoreCase(tagName);
}
@Nonnull
private String tagName;
private Tag(@Nonnull final String tagName) {
this.tagName = tagName;
}
@Nonnull
public String getTagName() {
return tagName;
}
}
/**
* Character set to read the internal Document Type Definition (DTD) of UAS data
*/
private static final String CHARSET = "UTF-8";
/**
* Corresponding logger for this class
*/
private static final Logger LOG = LoggerFactory.getLogger(XmlDataHandler.class);
/**
* Path to the internal Document Type Definition (DTD) of UAS data files to be able to work completely offline
*/
protected static final String UASDATA_DEF = "uadetector/uasxmldata.dtd";
/**
* URL to the Document Type Definition (DTD) of UAS data files
*/
protected static final String UASDATA_DEF_URL = "http://user-agent-string.info/rpc/uasxmldata.dtd";
/**
* Logs an issue while parsing XML.
*
* @param prefix
* log level as string to add at the beginning of the message
* @param e
* exception to log
*/
protected static void logParsingIssue(final String prefix, final SAXParseException e) {
final StringBuilder buffer = new StringBuilder();
buffer.append(prefix);
buffer.append(" while reading UAS data: ");
buffer.append(e.getMessage());
buffer.append(" (line: ");
buffer.append(e.getLineNumber());
if (e.getSystemId() != null) {
buffer.append(" uri: ");
buffer.append(e.getSystemId());
}
buffer.append(")");
LOG.warn(buffer.toString());
}
private Browser.Builder browserBuilder = new Browser.Builder();
private Device.Builder deviceBuilder = new Device.Builder();
private BrowserOperatingSystemMapping.Builder browserOsMappingBuilder = new BrowserOperatingSystemMapping.Builder();
private BrowserPattern.Builder browserPatternBuilder = new BrowserPattern.Builder();
private DevicePattern.Builder devicePatternBuilder = new DevicePattern.Builder();
private BrowserType.Builder browserTypeBuilder = new BrowserType.Builder();
private StringBuilder buffer = new StringBuilder();
private Tag currentTag = null;
@Nonnull
private final DataBuilder dataBuilder;
/**
* Flag to note that a fatal error occurred while parsing the document
*/
private boolean error = false;
private boolean isBrowser = false;
private boolean isBrowserOsMapping = false;
private boolean isBrowserPattern = false;
private boolean isBrowserType = false;
private boolean isDevice = false;
private boolean isDevicePattern = false;
private boolean isOperatingSystem = false;
private boolean isOperatingSystemPattern = false;
private boolean isRobot = false;
private OperatingSystem.Builder operatingSystemBuilder = new OperatingSystem.Builder();
private OperatingSystemPattern.Builder operatingSystemPatternBuilder = new OperatingSystemPattern.Builder();
private Robot.Builder robotBuilder = new Robot.Builder();
/**
* Flag to note that a warning occurred while parsing the document
*/
private boolean warning = false;
public XmlDataHandler(@Nonnull final DataBuilder builder) {
Check.notNull(builder, "builder");
dataBuilder = builder;
}
private void addToBrowserBuilder() {
if (isBrowser) {
if (currentTag == Tag.ID) {
browserBuilder.setId(buffer.toString());
} else if (currentTag == Tag.BROWSER_TYPE_ID) {
browserBuilder.setTypeId(buffer.toString());
} else if (currentTag == Tag.NAME) {
browserBuilder.setFamilyName(buffer.toString());
} else if (currentTag == Tag.URL) {
browserBuilder.setUrl(buffer.toString());
} else if (currentTag == Tag.COMPANY) {
browserBuilder.setProducer(buffer.toString());
} else if (currentTag == Tag.COMPANY_URL) {
browserBuilder.setProducerUrl(buffer.toString());
} else if (currentTag == Tag.ICON) {
browserBuilder.setIcon(buffer.toString());
} else if (currentTag == Tag.BROWSER_INFO_URL) {
browserBuilder.setInfoUrl(buffer.toString());
}
}
}
private void addToBrowserOsMappingBuilder() {
if (isBrowserOsMapping && currentTag == Tag.BROWSER_ID) {
browserOsMappingBuilder.setBrowserId(buffer.toString());
} else if (isBrowserOsMapping && currentTag == Tag.OPERATING_SYSTEM_ID) {
browserOsMappingBuilder.setOperatingSystemId(buffer.toString());
}
}
private void addToBrowserPatternBuilder() {
if (isBrowserPattern && currentTag == Tag.PATTERN_ORDER) {
browserPatternBuilder.setPosition(buffer.toString());
} else if (isBrowserPattern && currentTag == Tag.BROWSER_ID) {
browserPatternBuilder.setId(buffer.toString());
} else if (isBrowserPattern && currentTag == Tag.PATTERN_REGEX) {
browserPatternBuilder.setPerlRegularExpression(buffer.toString());
}
}
private void addToBrowserTypeBuilder() {
if (isBrowserType && currentTag == Tag.ID) {
browserTypeBuilder.setId(buffer.toString());
} else if (isBrowserType && currentTag == Tag.BROWSER_TYPE_ID) {
browserTypeBuilder.setName(buffer.toString());
}
}
private void addToDeviceBuilder() {
if (isDevice) {
if (currentTag == Tag.ID) {
deviceBuilder.setId(buffer.toString());
} else if (currentTag == Tag.NAME) {
deviceBuilder.setName(buffer.toString());
} else if (currentTag == Tag.ICON) {
deviceBuilder.setIcon(buffer.toString());
} else if (currentTag == Tag.DEVICE_INFO_URL) {
deviceBuilder.setInfoUrl(buffer.toString());
}
}
}
private void addToDevicePatternBuilder() {
if (isDevicePattern && currentTag == Tag.PATTERN_ORDER) {
devicePatternBuilder.setPosition(buffer.toString());
} else if (isDevicePattern && currentTag == Tag.DEVICE_ID) {
devicePatternBuilder.setId(buffer.toString());
} else if (isDevicePattern && currentTag == Tag.PATTERN_REGEX) {
devicePatternBuilder.setPerlRegularExpression(buffer.toString());
}
}
private void addToOperatingSystemBuilder() {
if (isOperatingSystem) {
if (currentTag == Tag.ID) {
operatingSystemBuilder.setId(buffer.toString());
} else if (currentTag == Tag.FAMILY) {
operatingSystemBuilder.setFamily(buffer.toString());
} else if (currentTag == Tag.NAME) {
operatingSystemBuilder.setName(buffer.toString());
} else if (currentTag == Tag.URL) {
operatingSystemBuilder.setUrl(buffer.toString());
} else if (currentTag == Tag.COMPANY) {
operatingSystemBuilder.setProducer(buffer.toString());
} else if (currentTag == Tag.COMPANY_URL) {
operatingSystemBuilder.setProducerUrl(buffer.toString());
} else if (currentTag == Tag.ICON) {
operatingSystemBuilder.setIcon(buffer.toString());
} else if (currentTag == Tag.OPERATING_SYSTEM_INFO_URL) {
operatingSystemBuilder.setInfoUrl(buffer.toString());
}
}
}
private void addToOperatingSystemPatternBuilder() {
if (isOperatingSystemPattern) {
if (currentTag == Tag.PATTERN_ORDER) {
operatingSystemPatternBuilder.setPosition(buffer.toString());
} else if (currentTag == Tag.OPERATING_SYSTEM_ID) {
operatingSystemPatternBuilder.setId(buffer.toString());
} else if (currentTag == Tag.PATTERN_REGEX) {
operatingSystemPatternBuilder.setPerlRegularExpression(buffer.toString());
}
}
}
private void addToRobotBuilder() {
if (isRobot) {
if (currentTag == Tag.ID) {
robotBuilder.setId(buffer.toString());
} else if (currentTag == Tag.USERAGENT) {
robotBuilder.setUserAgentString(buffer.toString());
} else if (currentTag == Tag.FAMILY) {
robotBuilder.setFamilyName(buffer.toString());
} else if (currentTag == Tag.NAME) {
robotBuilder.setName(buffer.toString());
} else if (currentTag == Tag.COMPANY) {
robotBuilder.setProducer(buffer.toString());
} else if (currentTag == Tag.COMPANY_URL) {
robotBuilder.setProducerUrl(buffer.toString());
} else if (currentTag == Tag.ICON) {
robotBuilder.setIcon(buffer.toString());
} else if (currentTag == Tag.ROBOT_INFO_URL) {
robotBuilder.setInfoUrl(buffer.toString());
}
}
}
@Override
public void characters(final char ch[], final int start, final int length) throws SAXException {
buffer.append(new String(ch, start, length));
}
@Override
public void endElement(final String uri, final String localName, final String tagName) throws SAXException {
transferToSpecificBuilderAndReset();
if (Tag.isRobotTag(tagName)) {
saveAndResetRobotBuilder();
isRobot = false;
} else if (Tag.isBrowserTag(tagName)) {
saveAndResetBrowserBuilder();
isBrowser = false;
} else if (Tag.isOperatingSystemTag(tagName)) {
saveAndResetOperatingSystemBuilder();
isOperatingSystem = false;
} else if (Tag.isBrowserTypeTag(tagName)) {
saveAndResetBrowserTypeBuilder();
isBrowserType = false;
} else if (Tag.isBrowserPatternTag(tagName)) {
saveAndResetBrowserPatternBuilder();
isBrowserPattern = false;
} else if (Tag.isBrowserOsMappingTag(tagName)) {
saveAndResetBrowserOperatingSystemMapping();
isBrowserOsMapping = false;
} else if (Tag.isOperatingSystemPatternTag(tagName)) {
saveAndResetOperatingSystemPatternBuilder();
isOperatingSystemPattern = false;
} else if (Tag.isDeviceTag(tagName)) {
saveAndResetDeviceBuilder();
isDevice = false;
} else if (Tag.isDevicePatternTag(tagName)) {
saveAndResetDevicePatternBuilder();
isDevicePattern = false;
}
currentTag = null;
}
@Override
public void error(final SAXParseException e) throws SAXException {
error = true;
logParsingIssue("Error", e);
super.fatalError(e);
}
@Override
public void fatalError(final SAXParseException e) throws SAXException {
logParsingIssue("Fatal error", e);
// this call throws a SAXException
super.fatalError(e);
}
/**
* Gets the flag whether an error occurred while parsing the document.
*
* @return {@code true} if an error occurred otherwise {@code false}
*/
public boolean hasError() {
return error;
}
/**
* Gets the flag whether an warning occurred while parsing the document.
*
* @return {@code true} if an warning occurred otherwise {@code false}
*/
public boolean hasWarning() {
return warning;
}
@Override
public InputSource resolveEntity(final String publicId, final String systemId) throws IOException, SAXException {
if (UASDATA_DEF_URL.equals(systemId)) {
final InputStream stream = this.getClass().getClassLoader().getResourceAsStream(UASDATA_DEF);
return new InputSource(new InputStreamReader(stream, CHARSET));
}
throw new SAXException("unable to resolve remote entity, systemId = " + systemId);
}
private void saveAndResetBrowserBuilder() {
dataBuilder.appendBrowserBuilder(browserBuilder);
browserBuilder = new Browser.Builder();
}
private void saveAndResetBrowserOperatingSystemMapping() {
dataBuilder.appendBrowserOperatingSystemMapping(browserOsMappingBuilder.build());
browserOsMappingBuilder = new BrowserOperatingSystemMapping.Builder();
}
private void saveAndResetBrowserPatternBuilder() {
try {
dataBuilder.appendBrowserPattern(browserPatternBuilder.build());
} catch (final IllegalArgumentException e) {
LOG.warn("Can not append browser pattern: " + e.getLocalizedMessage());
}
browserPatternBuilder = new BrowserPattern.Builder();
}
private void saveAndResetBrowserTypeBuilder() {
dataBuilder.appendBrowserType(browserTypeBuilder.build());
browserTypeBuilder = new BrowserType.Builder();
}
private void saveAndResetDeviceBuilder() {
dataBuilder.appendDeviceBuilder(deviceBuilder);
deviceBuilder = new Device.Builder();
}
private void saveAndResetDevicePatternBuilder() {
try {
dataBuilder.appendDevicePattern(devicePatternBuilder.build());
} catch (final IllegalArgumentException e) {
LOG.warn("Can not append device pattern: " + e.getLocalizedMessage());
}
devicePatternBuilder = new DevicePattern.Builder();
}
private void saveAndResetOperatingSystemBuilder() {
dataBuilder.appendOperatingSystemBuilder(operatingSystemBuilder);
operatingSystemBuilder = new OperatingSystem.Builder();
}
private void saveAndResetOperatingSystemPatternBuilder() {
try {
dataBuilder.appendOperatingSystemPattern(operatingSystemPatternBuilder.build());
} catch (final IllegalArgumentException e) {
LOG.warn("Can not append OS pattern: " + e.getLocalizedMessage());
}
operatingSystemPatternBuilder = new OperatingSystemPattern.Builder();
}
private void saveAndResetRobotBuilder() {
dataBuilder.appendRobot(robotBuilder.build());
robotBuilder = new Robot.Builder();
}
@Override
public void startElement(final String uri, final String localName, final String tagName, final Attributes attributes)
throws SAXException {
if (Tag.isRobotTag(tagName)) {
isRobot = true;
} else if (Tag.isBrowserTag(tagName)) {
isBrowser = true;
} else if (Tag.isOperatingSystemTag(tagName)) {
isOperatingSystem = true;
} else if (Tag.isBrowserTypeTag(tagName)) {
isBrowserType = true;
} else if (Tag.isBrowserPatternTag(tagName)) {
isBrowserPattern = true;
} else if (Tag.isBrowserOsMappingTag(tagName)) {
isBrowserOsMapping = true;
} else if (Tag.isOperatingSystemPatternTag(tagName)) {
isOperatingSystemPattern = true;
} else if (Tag.isDeviceTag(tagName)) {
isDevice = true;
} else if (Tag.isDevicePatternTag(tagName)) {
isDevicePattern = true;
}
currentTag = Tag.evaluate(tagName);
}
/**
* Transfers all characters of a specific tag to the corresponding builder and resets the string buffer.
*/
private void transferToSpecificBuilderAndReset() {
// version
if (currentTag == Tag.VERSION) {
dataBuilder.setVersion(buffer.toString());
}
// robot browser
addToRobotBuilder();
// build browser
addToBrowserBuilder();
// build operating system
addToOperatingSystemBuilder();
// build browser pattern
addToBrowserPatternBuilder();
// build browser type
addToBrowserTypeBuilder();
// build browser to operating system mapping
addToBrowserOsMappingBuilder();
// build operating system pattern
addToOperatingSystemPatternBuilder();
// build browser
addToDeviceBuilder();
// build browser pattern
addToDevicePatternBuilder();
buffer = new StringBuilder();
}
@Override
public void warning(final SAXParseException e) throws SAXException {
warning = true;
logParsingIssue("Warning", e);
super.warning(e);
}
}