XmlDataReader.java

  1. /*******************************************************************************
  2.  * Copyright 2012 André Rouél
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *   http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  ******************************************************************************/
  16. package net.sf.uadetector.datareader;

  17. import java.io.ByteArrayInputStream;
  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.net.URL;
  21. import java.nio.charset.Charset;

  22. import javax.annotation.Nonnull;
  23. import javax.xml.parsers.ParserConfigurationException;
  24. import javax.xml.parsers.SAXParser;
  25. import javax.xml.parsers.SAXParserFactory;

  26. import net.sf.qualitycheck.Check;
  27. import net.sf.uadetector.exception.CanNotOpenStreamException;
  28. import net.sf.uadetector.internal.data.Data;
  29. import net.sf.uadetector.internal.data.DataBuilder;
  30. import net.sf.uadetector.internal.data.XmlDataHandler;
  31. import net.sf.uadetector.internal.util.Closeables;
  32. import net.sf.uadetector.internal.util.UrlUtil;

  33. import org.slf4j.Logger;
  34. import org.slf4j.LoggerFactory;
  35. import org.xml.sax.SAXException;

  36. /**
  37.  * Reader for the XML data for UASparser from <a
  38.  * href="http://user-agent-string.info/">http://user-agent-string.info</a>.<br>
  39.  * <br>
  40.  * This reader is safe when used concurrently by multiple threads.
  41.  *
  42.  * @author André Rouél
  43.  */
  44. public final class XmlDataReader implements DataReader {

  45.     protected static final class XmlParser {

  46.         private static final String MSG_NOT_PARSED_AS_EXPECTED = "The UAS data has not been parsed as expected.";

  47.         public static void parse(@Nonnull final InputStream stream, @Nonnull final DataBuilder builder)
  48.                 throws ParserConfigurationException, SAXException, IOException {
  49.             final SAXParserFactory factory = SAXParserFactory.newInstance();
  50.             factory.setValidating(true);
  51.             final SAXParser parser = factory.newSAXParser();
  52.             final XmlDataHandler handler = new XmlDataHandler(builder);
  53.             parser.parse(stream, handler);
  54.             validate(handler);
  55.         }

  56.         protected static void validate(@Nonnull final XmlDataHandler handler) {
  57.             if (handler.hasError()) {
  58.                 throw new IllegalStateException(MSG_NOT_PARSED_AS_EXPECTED);
  59.             }
  60.         }

  61.         private XmlParser() {
  62.             // This class is not intended to create objects from it.
  63.         }

  64.     }

  65.     /**
  66.      * Default character set to read UAS data
  67.      */
  68.     private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");

  69.     /**
  70.      * Corresponding default logger for this class
  71.      */
  72.     private static final Logger LOG = LoggerFactory.getLogger(XmlDataReader.class);

  73.     /**
  74.      * Reads the <em>UAS data</em> in XML format based on the given URL.<br>
  75.      * <br>
  76.      * When during the reading errors occur which lead to a termination of the read operation, the information will be
  77.      * written to a log. The termination of the read operation will not lead to a program termination and in this case
  78.      * this method returns {@link Data#EMPTY}.
  79.      *
  80.      * @param inputStream
  81.      *            an input stream for reading <em>UAS data</em>
  82.      * @param charset
  83.      *            the character set in which the data should be read
  84.      * @return read in <em>UAS data</em> as {@code Data} instance
  85.      * @throws net.sf.qualitycheck.exception.IllegalNullArgumentException
  86.      *             if any of the given arguments is {@code null}
  87.      * @throws net.sf.uadetector.exception.CanNotOpenStreamException
  88.      *             if no stream to the given {@code URL} can be established
  89.      */
  90.     protected static Data readXml(@Nonnull final InputStream inputStream, @Nonnull final Charset charset) {
  91.         Check.notNull(inputStream, "inputStream");
  92.         Check.notNull(charset, "charset");

  93.         final DataBuilder builder = new DataBuilder();
  94.         boolean hasErrors = false;
  95.         try {
  96.             XmlParser.parse(inputStream, builder);
  97.         } catch (final ParserConfigurationException e) {
  98.             hasErrors = true;
  99.             LOG.warn(e.getLocalizedMessage());
  100.         } catch (final SAXException e) {
  101.             hasErrors = true;
  102.             LOG.warn(e.getLocalizedMessage());
  103.         } catch (final IOException e) {
  104.             hasErrors = true;
  105.             LOG.warn(e.getLocalizedMessage());
  106.         } catch (final IllegalStateException e) {
  107.             hasErrors = true;
  108.             LOG.warn(e.getLocalizedMessage());
  109.         } catch (final Exception e) {
  110.             hasErrors = true;
  111.             LOG.warn(e.getLocalizedMessage(), e);
  112.         } finally {
  113.             Closeables.closeAndConvert(inputStream, true);
  114.         }

  115.         return hasErrors ? Data.EMPTY : builder.build();
  116.     }

  117.     /**
  118.      * Reads the <em>UAS data</em> in XML format from the given string.
  119.      *
  120.      * @param data
  121.      *            <em>UAS data</em> as string
  122.      * @return read in User-Agent data as {@code Data} instance otherwise {@link Data#EMPTY}
  123.      *
  124.      * @throws net.sf.qualitycheck.exception.IllegalNullArgumentException
  125.      *             if any of the given argument is {@code null}
  126.      */
  127.     @Override
  128.     public Data read(@Nonnull final String data) {
  129.         Check.notNull(data, "data");

  130.         return readXml(new ByteArrayInputStream(data.getBytes(DEFAULT_CHARSET)), DEFAULT_CHARSET);
  131.     }

  132.     /**
  133.      * Reads the <em>UAS data</em> in XML format based on the given URL.
  134.      *
  135.      * @param url
  136.      *            {@code URL} to User-Agent informations
  137.      * @param charset
  138.      *            the character set in which the data should be read
  139.      * @return read in User-Agent data as {@code Data} instance otherwise {@link Data#EMPTY}
  140.      *
  141.      * @throws net.sf.qualitycheck.exception.IllegalNullArgumentException
  142.      *             if any of the given arguments is {@code null}
  143.      */
  144.     @Override
  145.     public Data read(@Nonnull final URL url, @Nonnull final Charset charset) {
  146.         Check.notNull(url, "url");
  147.         Check.notNull(charset, "charset");

  148.         Data data = Data.EMPTY;
  149.         try {
  150.             data = readXml(UrlUtil.open(url), charset);
  151.         } catch (final CanNotOpenStreamException e) {
  152.             LOG.warn(e.getLocalizedMessage());
  153.         }

  154.         return data;
  155.     }

  156. }