001/** 002 * Copyright 2014 Tampere University of Technology, Pori Department 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package core.tut.pori.utils; 017 018import java.io.IOException; 019import java.io.InputStream; 020import java.net.URL; 021import java.util.EnumSet; 022 023import javax.xml.bind.annotation.XmlEnum; 024import javax.xml.bind.annotation.XmlEnumValue; 025 026import org.apache.commons.codec.DecoderException; 027import org.apache.commons.codec.binary.Hex; 028import org.apache.commons.lang3.StringUtils; 029import org.apache.log4j.Logger; 030 031/** 032 * This class can be used to validate whether content given as an URL link contains image content. 033 * 034 * The validation of the URL is done by downloading the first few bytes of the content and comparing the retrieved bytes to the list of known magic numbers. 035 * 036 * The supported video formats for this validator are: 037 * <ul> 038 * <li>GIF</li> 039 * <li>JPEG</li> 040 * <li>PNG</li> 041 * </ul> 042 * 043 * The supported photo formats for this validator are: 044 * <ul> 045 * <li>AVI</li> 046 * <li>Flash Video</li> 047 * <li>MKV</li> 048 * <li>MPEG Video</li> 049 * <li>MOV</li> 050 * </ul> 051 * 052 * Note that the magic number validation is a simplified operation and no end bytes for file types will ever be checked, whether they denote the file type or not. 053 * 054 */ 055public class MediaUrlValidator { 056 /* media types */ 057 /** media type name for audio content */ 058 public static final String MEDIA_TYPE_AUDIO = "AUDIO"; 059 /** media type name for photo/image content */ 060 public static final String MEDIA_TYPE_PHOTO = "PHOTO"; 061 /** media type name for video content */ 062 public static final String MEDIA_TYPE_VIDEO = "VIDEO"; 063 /** media type name for unknown/unspecified content */ 064 public static final String MEDIA_TYPE_UNKNOWN = "UNKNOWN"; 065 private static final Logger LOGGER = Logger.getLogger(MediaUrlValidator.class); 066 /** avi actually is of format 52 49 46 46 xx xx xx xx 41 56 49 20 4C 49 53 54 where xx denote file size, the 52 49 46 46 also denote other formats such as wave files, but we'll simply ignore this minor issue */ 067 private static final byte[] MAGIC_BYTE_AVI; 068 private static final byte[] MAGIC_BYTE_BMP; 069 private static final byte[] MAGIC_BYTE_FLASH_VIDEO; 070 private static final byte[] MAGIC_BYTE_GIF_1; 071 private static final byte[] MAGIC_BYTE_GIF_2; 072 /** note: a valid jpeg file should also end with ffd9, but this is will not be checked by the validator */ 073 private static final byte[] MAGIC_BYTE_JPEG; 074 private static final byte[] MAGIC_BYTE_MKV; 075 /** mpeg file is actually 00 00 01 Bx where x is a number. Last byte "Bx" is ignored because Hex.decodeHex requires Even number of bytes */ 076 private static final byte[] MAGIC_BYTE_MPEG_VIDEO; 077 private static final byte[] MAGIC_BYTE_MOV; 078 private static final byte[] MAGIC_BYTE_PNG; 079 static{ 080 try { 081 MAGIC_BYTE_BMP = Hex.decodeHex("424d".toCharArray()); 082 MAGIC_BYTE_GIF_1 = Hex.decodeHex("474946383961".toCharArray()); 083 MAGIC_BYTE_GIF_2 = Hex.decodeHex("474946383761".toCharArray()); 084 MAGIC_BYTE_JPEG = Hex.decodeHex("ffd8".toCharArray()); 085 MAGIC_BYTE_PNG = Hex.decodeHex("89504e470d0a1a0a".toCharArray()); 086 MAGIC_BYTE_MOV = Hex.decodeHex("000000146674797071742020".toCharArray()); 087 MAGIC_BYTE_MPEG_VIDEO = Hex.decodeHex("000001".toCharArray()); 088 MAGIC_BYTE_MKV = Hex.decodeHex("1a45dfa3934282886d6174726f736b61".toCharArray()); 089 MAGIC_BYTE_FLASH_VIDEO = Hex.decodeHex("464c5601".toCharArray()); 090 MAGIC_BYTE_AVI = Hex.decodeHex("52494646".toCharArray()); 091 } catch (DecoderException ex) { // this should never happen 092 LOGGER.error(ex,ex); 093 throw new IllegalArgumentException(ex.getMessage()); 094 } 095 } 096 private static final int BUFFER_SIZE = 20; // take first 20 bytes in to the buffer 097 098 /** 099 * Media type declaration. 100 */ 101 @XmlEnum 102 public enum MediaType { 103 /** media type is unknown or unspecified */ 104 @XmlEnumValue(value = MEDIA_TYPE_UNKNOWN) 105 UNKNOWN(0), 106 /** media is of photo/image content */ 107 @XmlEnumValue(value = MEDIA_TYPE_PHOTO) 108 PHOTO(1), 109 /** media is of video content */ 110 @XmlEnumValue(value = MEDIA_TYPE_VIDEO) 111 VIDEO(2), 112 /** media is of audio content */ 113 @XmlEnumValue(value = MEDIA_TYPE_AUDIO) 114 AUDIO(3); 115 116 private int _value; 117 118 /** 119 * 120 * @param value 121 */ 122 private MediaType(int value){ 123 _value = value; 124 } 125 126 /** 127 * 128 * @return the media type as integer 129 */ 130 public int toInt(){ 131 return _value; 132 } 133 134 /** 135 * 136 * @param value 137 * @return the value as MediaType 138 * @throws IllegalArgumentException on bad value 139 */ 140 public static MediaType fromInt(int value) throws IllegalArgumentException { 141 for(MediaType mt : MediaType.values()){ 142 if(mt._value == value){ 143 return mt; 144 } 145 } 146 throw new IllegalArgumentException("Bad "+MediaType.class.toString()+" : "+value); 147 } 148 149 /** 150 * 151 * @param mediaTypes 152 * @return the passed media types converted to primitive int array or null if empty or null set was passed 153 */ 154 public static int[] toInt(EnumSet<MediaType> mediaTypes){ 155 if(mediaTypes == null || mediaTypes.isEmpty()){ 156 return null; 157 } 158 int[] types = new int[mediaTypes.size()]; 159 int index = -1; 160 for(MediaType t : mediaTypes){ 161 types[++index] = t.toInt(); 162 } 163 return types; 164 } 165 } // enum MediaType 166 167 /** 168 * 169 * @param array must be at least as long as the comparator 170 * @param with 171 * @return true if the given array starts with the given with 172 */ 173 private static boolean startsWith(byte[] array, byte[] with){ 174 for(int i=0; i<with.length; ++i){ 175 if(array[i] != with[i]){ 176 return false; 177 } 178 } 179 return true; 180 } 181 182 /** 183 * 184 * @param url 185 * @return media type for the given URL 186 */ 187 public MediaType validateUrl(String url){ 188 if(StringUtils.isBlank(url)){ 189 LOGGER.warn("Empty URL."); 190 return MediaType.UNKNOWN; 191 } 192 193 LOGGER.debug("Validating URL: "+url); 194 try { 195 URL u = new URL(url); 196 try (InputStream input = u.openStream()){ 197 byte[] bytes = new byte[BUFFER_SIZE]; 198 if(input.read(bytes) < BUFFER_SIZE){ 199 LOGGER.warn("Failed to read first "+BUFFER_SIZE+" bytes."); 200 return MediaType.UNKNOWN; 201 } 202 203 if(startsWith(bytes, MAGIC_BYTE_BMP)){ 204 LOGGER.debug("Detected a bmp file."); 205 return MediaType.PHOTO; 206 }else if(startsWith(bytes, MAGIC_BYTE_PNG)){ 207 LOGGER.debug("Detected a png file."); 208 return MediaType.PHOTO; 209 }else if(startsWith(bytes, MAGIC_BYTE_JPEG)){ 210 LOGGER.debug("Detected a jpeg file."); 211 return MediaType.PHOTO; 212 }else if(startsWith(bytes, MAGIC_BYTE_GIF_1) || startsWith(bytes, MAGIC_BYTE_GIF_2)){ 213 LOGGER.debug("Detected a gif file."); 214 return MediaType.PHOTO; 215 }else if(startsWith(bytes, MAGIC_BYTE_AVI)){ 216 LOGGER.debug("Detected an avi file."); 217 return MediaType.VIDEO; 218 }else if(startsWith(bytes, MAGIC_BYTE_FLASH_VIDEO)){ 219 LOGGER.debug("Detected a flash file."); 220 return MediaType.VIDEO; 221 }else if(startsWith(bytes, MAGIC_BYTE_MKV)){ 222 LOGGER.debug("Detected a mkv file."); 223 return MediaType.VIDEO; 224 }else if(startsWith(bytes, MAGIC_BYTE_MOV)){ 225 LOGGER.debug("Detected a mov file."); 226 return MediaType.VIDEO; 227 }else if(startsWith(bytes, MAGIC_BYTE_MPEG_VIDEO)){ 228 LOGGER.debug("Detected a mpg file."); 229 return MediaType.VIDEO; 230 } 231 } 232 } catch (IOException | IllegalArgumentException ex) { // java's URL connection randomly throws illegal argument exception on certain valid urls, so catch and ignore it 233 LOGGER.warn("Failed to read URL: "+url); 234 LOGGER.debug(ex, ex); 235 } 236 return MediaType.UNKNOWN; 237 } 238}