001/** 002 * Copyright 2014 Tampere University of Technology, Pori Department 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package service.tut.pori.twitterjazz; 017 018import java.util.ArrayList; 019import java.util.Arrays; 020import java.util.Collection; 021import java.util.EnumSet; 022import java.util.Iterator; 023import java.util.List; 024 025import org.apache.commons.lang3.ArrayUtils; 026import org.apache.http.client.utils.DateUtils; 027import org.apache.log4j.Logger; 028 029import service.tut.pori.contentanalysis.Definitions; 030import service.tut.pori.contentanalysis.MediaObject; 031import service.tut.pori.contentanalysis.MediaObjectList; 032import service.tut.pori.contentanalysis.Photo; 033import service.tut.pori.contentanalysis.PhotoDAO; 034import service.tut.pori.contentanalysis.PhotoList; 035import service.tut.pori.contentstorage.TwitterDAO; 036import service.tut.pori.contentstorage.TwitterPhotoStorage; 037import service.tut.pori.contentstorage.TwitterPhotoStorage.TwitterEntry; 038import service.tut.pori.users.twitter.TwitterUserDAO; 039import twitter4j.Paging; 040import twitter4j.ResponseList; 041import twitter4j.Status; 042import twitter4j.Twitter; 043import twitter4j.TwitterException; 044import twitter4j.auth.AccessToken; 045import core.tut.pori.context.ServiceInitializer; 046import core.tut.pori.http.parameters.DataGroups; 047import core.tut.pori.users.UserIdentity; 048 049/** 050 * High-level client for extracting profile details from Twitter. 051 * 052 * This class is NOT thread-safe 053 */ 054public class TwitterExtractor { 055 private static final int DEFAULT_LIMIT = 200; 056 private static final String HEADER_DATE = "date"; 057 private static final Logger LOGGER = Logger.getLogger(TwitterExtractor.class); 058 private static final int MAX_RETRIES = 5; 059 private static final int STATUS_CODE_LIMIT_EXCEEDED_V1 = 420; 060 private static final int STATUS_CODE_LIMIT_EXCEEDED_V11 = 429; 061 private UserIdentity _userId = null; 062 private Twitter _twitter = null; 063 private boolean _filterDescriptions = true; // filter video and photo descriptions for descriptions not belonging to the profile owner 064 private boolean _abortOnRateLimit = false; 065 066 /** 067 * Valid content types for a profile 068 * 069 */ 070 public enum ContentType{ 071 /** Include tags generated by other back-ends. Can only be used in combination with PHOTO_DESCRIPTION */ 072 GENERATED_TAGS, 073 /** photo descriptions generated from Twitter photos and status messages */ 074 PHOTO_DESCRIPTIONS, 075 /** Twitter status messages */ 076 STATUS_MESSAGES, 077 /** video descriptions generated from Twitter photos and status messages */ 078 VIDEO_DESCRIPTIONS; 079 080 /** 081 * 082 * @param values 083 * @return set of ContentTypes or null if null or empty collection was passed 084 * @throws IllegalArgumentException on bad value 085 */ 086 public static EnumSet<ContentType> fromString(Collection<String> values) throws IllegalArgumentException { 087 EnumSet<ContentType> contentTypes = null; 088 if(values != null && !values.isEmpty()){ 089 contentTypes = EnumSet.noneOf(ContentType.class); 090 for(String value : values){ 091 ContentType found = null; 092 for(ContentType t : values()){ 093 if(t.name().equalsIgnoreCase(value)){ 094 found = t; 095 break; 096 } 097 } // for types 098 if(found == null){ 099 throw new IllegalArgumentException("Bad ContentType: "+value); 100 } 101 contentTypes.add(found); 102 } // for values 103 } 104 return contentTypes; 105 } 106 } // enum ContentType 107 108 /** 109 * 110 * @param userId 111 * @return the extractor or null on failure 112 */ 113 public static TwitterExtractor getExtractor(UserIdentity userId){ 114 TwitterExtractor extractor = null; 115 AccessToken token = ServiceInitializer.getDAOHandler().getSQLDAO(TwitterUserDAO.class).getAccessToken(userId); 116 if(token == null){ 117 LOGGER.warn("No token."); 118 return null; 119 } 120 extractor = new TwitterExtractor(userId); 121 extractor._twitter = TJContentCore.getTwitterFactory().getInstance(token); 122 return extractor; 123 } 124 125 /** 126 * 127 * @param userIdentity 128 */ 129 private TwitterExtractor(UserIdentity userIdentity){ 130 _userId = userIdentity; 131 } 132 133 /** 134 * 135 * @param contentTypes 136 * @param screenNames 137 * @return list of found profiles or null on failure 138 */ 139 public List<TwitterProfile> getProfiles(EnumSet<ContentType> contentTypes, String[] screenNames){ 140 if(ArrayUtils.isEmpty(screenNames)){ 141 LOGGER.warn("Empty screen name list."); 142 return null; 143 } 144 145 List<TwitterUserDetails> userDetails = null; 146 int retriesLeft = MAX_RETRIES; 147 while(userDetails == null && retriesLeft > 0){ 148 try { 149 LOGGER.debug("Retrieving user details..."); 150 userDetails = TwitterUserDetails.getTwitterUserDetails(_twitter.lookupUsers(screenNames)); 151 if(userDetails == null){ 152 LOGGER.warn("Failed to resolve user details."); 153 return null; 154 } 155 } catch (TwitterException ex) { 156 if(!handleTwitterException(ex)){ 157 return null; 158 } 159 --retriesLeft; 160 } 161 } 162 163 List<TwitterProfile> profiles = new ArrayList<>(userDetails.size()); 164 165 if(contentTypes != null && !contentTypes.isEmpty()){ 166 boolean generatedTags = contentTypes.contains(ContentType.GENERATED_TAGS); 167 if(generatedTags && contentTypes.size() == 1){ 168 throw new IllegalArgumentException("Only "+ContentType.GENERATED_TAGS.name()+" given."); 169 } 170 boolean hasStatusMessages = contentTypes.contains(ContentType.STATUS_MESSAGES); 171 boolean hasVideoDescriptions = contentTypes.contains(ContentType.VIDEO_DESCRIPTIONS); 172 boolean hasPhotoDescriptions = contentTypes.contains(ContentType.PHOTO_DESCRIPTIONS); 173 if(hasVideoDescriptions || hasPhotoDescriptions || hasStatusMessages){ 174 for(TwitterUserDetails details : userDetails){ 175 TwitterProfile profile = new TwitterProfile(details); 176 profiles.add(profile); 177 178 List<Status> statuses = getStatuses(details.getScreenName()); 179 if(statuses != null){ 180 if(hasStatusMessages){ 181 profile.setStatusMessages(getStatusMessages(statuses)); 182 } 183 184 List<String> filter = null; 185 if(_filterDescriptions){ 186 filter = Arrays.asList(details.getTwitterId()); 187 } 188 189 if(hasPhotoDescriptions){ 190 List<TwitterPhotoDescription> descriptions = getPhotoDescriptions(generatedTags, statuses, filter); 191 LOGGER.debug("Photo descriptions extracted: "+(descriptions == null ? "0" : descriptions.size())); 192 profile.setPhotoDescriptions(descriptions); 193 } 194 195 if(hasVideoDescriptions){ 196 List<TwitterVideoDescription> descriptions = getVideoDescriptions(statuses, filter); 197 LOGGER.debug("Video descriptions extracted: "+(descriptions == null ? "0" : descriptions.size())); 198 profile.setVideoDescriptions(descriptions); 199 } 200 } // if 201 } // for 202 } // if 203 }else{ 204 LOGGER.debug("No content types requested."); 205 for(TwitterUserDetails details : userDetails){ 206 profiles.add(new TwitterProfile(details)); 207 } 208 } 209 210 return profiles; 211 } 212 213 /** 214 * 215 * @param contentTypes 216 * @return the profile or null on failure 217 * @throws IllegalArgumentException on bad content types 218 */ 219 public TwitterProfile getProfile(EnumSet<ContentType> contentTypes) throws IllegalArgumentException{ 220 TwitterUserDetails userDetails = null; 221 int retriesLeft = MAX_RETRIES; 222 while(userDetails == null && retriesLeft > 0){ 223 try { 224 LOGGER.debug("Retrieving user details..."); 225 userDetails = TwitterUserDetails.getTwitterUserDetails(_twitter.verifyCredentials()); 226 if(userDetails == null){ 227 LOGGER.warn("Failed to resolve user details."); 228 return null; 229 } 230 } catch (TwitterException ex) { 231 if(!handleTwitterException(ex)){ 232 return null; 233 } 234 --retriesLeft; 235 } 236 } 237 userDetails.setUserId(_userId); 238 TwitterProfile profile = new TwitterProfile(userDetails); 239 240 if(contentTypes != null && !contentTypes.isEmpty()){ 241 boolean generatedTags = contentTypes.contains(ContentType.GENERATED_TAGS); 242 if(generatedTags && contentTypes.size() == 1){ 243 throw new IllegalArgumentException("Only "+ContentType.GENERATED_TAGS.name()+" given."); 244 } 245 boolean hasStatusMessages = contentTypes.contains(ContentType.STATUS_MESSAGES); 246 boolean hasVideoDescriptions = contentTypes.contains(ContentType.VIDEO_DESCRIPTIONS); 247 boolean hasPhotoDescriptions = contentTypes.contains(ContentType.PHOTO_DESCRIPTIONS); 248 if(hasVideoDescriptions || hasPhotoDescriptions || hasStatusMessages){ 249 List<Status> statuses = getStatuses(null); 250 if(statuses != null){ 251 if(hasStatusMessages){ 252 profile.setStatusMessages(getStatusMessages(statuses)); 253 } 254 255 List<String> filter = null; 256 if(_filterDescriptions){ 257 filter = Arrays.asList(userDetails.getTwitterId()); 258 } 259 260 if(hasPhotoDescriptions){ 261 profile.setPhotoDescriptions(getPhotoDescriptions(generatedTags, statuses, filter)); 262 } 263 264 if(hasVideoDescriptions){ 265 profile.setVideoDescriptions(getVideoDescriptions(statuses, filter)); 266 } 267 } // if 268 } // if 269 }else{ 270 LOGGER.debug("No content types requested."); 271 } 272 273 return profile; 274 } 275 276 /** 277 * 278 * @return all photo descriptions found on the current user's timeline without generated tags 279 */ 280 public List<TwitterPhotoDescription> getPhotoDescriptions() { 281 return getPhotoDescriptions(false, null); 282 } 283 284 /** 285 * 286 * @param descriptions 287 * @param entityId 288 * @return true if the description collection already contains a description with the given entity id 289 */ 290 private boolean photosContains(Collection<TwitterPhotoDescription> descriptions, String entityId){ 291 for(TwitterPhotoDescription d : descriptions){ 292 if(entityId.equals(d.getEntityId())){ 293 return true; 294 } 295 } 296 return false; 297 } 298 299 /** 300 * 301 * @param generatedTags 302 * @param statuses 303 * @param twitterUserIdFilter 304 * @return list of photo descriptions or null if none was found 305 */ 306 private List<TwitterPhotoDescription> getPhotoDescriptions(boolean generatedTags, List<Status> statuses, Collection<String> twitterUserIdFilter) { 307 if(statuses == null){ 308 return null; 309 } 310 int sCount = statuses.size(); 311 LOGGER.debug("Processing status messages for photo descriptions, messages: "+sCount); 312 313 List<String> entityIds = new ArrayList<>(sCount); 314 List<TwitterPhotoDescription> descriptions = new ArrayList<>(sCount); 315 for(Status s : statuses){ 316 if(twitterUserIdFilter != null && !twitterUserIdFilter.contains(String.valueOf(s.getUser().getId()))){ 317 LOGGER.debug("Ignoring status message based on the given filter twitter user id filter."); 318 continue; 319 } 320 List<TwitterPhotoDescription> p = TwitterPhotoDescription.getTwitterPhotoDescriptions(s); // the factory object will check for the correct type 321 if(p != null){ 322 for(TwitterPhotoDescription d : p){ 323 String entityId = d.getEntityId(); 324 if(photosContains(descriptions, entityId)){ // ignore duplicates 325 continue; 326 } 327 descriptions.add(d); 328 entityIds.add(entityId); 329 } 330 } //if 331 } 332 333 if(descriptions.isEmpty()){ 334 LOGGER.debug("No photo descriptions in the given list of statuses."); 335 return null; 336 } 337 338 List<TwitterEntry> entries = ServiceInitializer.getDAOHandler().getSQLDAO(TwitterDAO.class).getEntriesByEntityId(entityIds, _userId); 339 if(entries == null){ 340 LOGGER.debug("None of the photos are known by the system."); 341 }else{ 342 PhotoList gTags = null; 343 if(generatedTags){ 344 LOGGER.debug("Retrieving generated tags."); 345 List<String> guids = new ArrayList<>(entries.size()); 346 for(TwitterEntry e : entries){ 347 guids.add(e.getGUID()); 348 } 349 gTags = ServiceInitializer.getDAOHandler().getSolrDAO(PhotoDAO.class).getPhotos(new DataGroups(Definitions.DATA_GROUP_KEYWORDS), guids, null, null, null); 350 } 351 352 LOGGER.debug("Resolving photo GUIDs for descriptions."); 353 for(TwitterPhotoDescription d : descriptions){ 354 String objectId = d.getEntityId(); 355 String screenName = d.getFromName(); 356 for(Iterator<TwitterEntry> eIter = entries.iterator(); eIter.hasNext();){ 357 TwitterEntry e = eIter.next(); 358 if(e.getEntityId().equals(objectId) && e.getScreenName().equals(screenName)){ // the same entity id might appear for different users' content so also match by screen name 359 String guid = e.getGUID(); 360 d.setPhotoGUID(guid); 361 d.setServiceType(TwitterPhotoStorage.SERVICE_TYPE); // no need to check from database, all photos from TwitterDAO entries are of the same type 362 if(gTags != null){ // if tags were found 363 Photo p = gTags.getPhoto(guid); // in practice this should always return a photo... 364 if(p != null){ 365 MediaObjectList objects = p.getMediaObjects(); 366 if(!MediaObjectList.isEmpty(objects)){ 367 for(MediaObject vo : objects.getMediaObjects()){ 368 d.addTag(TwitterPhotoTag.getTwitterTag(vo)); 369 } // for 370 } // if photo had media objects 371 }else{ // ..though there is theoretical possibility that the photo has been removed in between retrievals (which are not in a transaction), and does not exist anymore 372 LOGGER.warn("No photo found, GUID: "+guid); 373 d.setPhotoGUID(null); // not valid anymore 374 } // else 375 } // if 376 eIter.remove(); // remove entry to prevent unnecessary looping in the following loops 377 } 378 } // for entries 379 } // for descriptions 380 } 381 382 return descriptions; 383 } 384 385 /** 386 * 387 * @param generatedTags if true the generated tags will be retrieved from the database 388 * @param twitterUserIdFilter if given, only descriptions from the given users will be returned 389 * @return list of photo descriptions or null if none was found 390 */ 391 public List<TwitterPhotoDescription> getPhotoDescriptions(boolean generatedTags, Collection<String> twitterUserIdFilter) { 392 return getPhotoDescriptions(generatedTags, getStatuses(null), twitterUserIdFilter); 393 } 394 395 /** 396 * 397 * @return all video descriptions found on user's timeline 398 */ 399 public List<TwitterVideoDescription> getVideoDescriptions(){ 400 return getVideoDescriptions(null); 401 } 402 403 /** 404 * 405 * @param twitterUserIdFilter if given, only descriptions from these users will be returned 406 * @return list of video descriptions or null if none was found 407 */ 408 public List<TwitterVideoDescription> getVideoDescriptions(Collection<String> twitterUserIdFilter) { 409 return getVideoDescriptions(getStatuses(null), twitterUserIdFilter); 410 } 411 412 /** 413 * extract video descriptions from the given list of statuses 414 * 415 * @param statuses 416 * @param twitterUserIdFilter 417 * @return list of video descriptions or null if none was found 418 */ 419 private List<TwitterVideoDescription> getVideoDescriptions(List<Status> statuses, Collection<String> twitterUserIdFilter){ 420 if(statuses == null){ 421 return null; 422 } 423 424 int sCount = statuses.size(); 425 LOGGER.debug("Processing status messages for photo descriptions, messages: "+sCount); 426 427 List<TwitterVideoDescription> descriptions = new ArrayList<>(sCount); 428 for(Status s : statuses){ 429 if(twitterUserIdFilter != null && !twitterUserIdFilter.contains(String.valueOf(s.getUser().getId()))){ 430 LOGGER.debug("Ignoring status message based on the given filter twitter user id filter."); 431 continue; 432 } 433 TwitterVideoDescription v = TwitterVideoDescription.getTwitterVideoDescription(s); 434 if(v != null){ 435 descriptions.add(v); 436 } 437 } 438 439 return (descriptions.isEmpty() ? null : descriptions); 440 } 441 442 /** 443 * 444 * @param screenName if null, the home timeline of the authenticated user will be retrieved 445 * @return list of statuses or null if none found 446 * @throws IllegalArgumentException 447 */ 448 private List<Status> getStatuses(String screenName) throws IllegalArgumentException{ 449 Paging paging = new Paging(); 450 paging.setCount(DEFAULT_LIMIT); 451 List<Status> statuses = new ArrayList<>(); 452 int retriesLeft = MAX_RETRIES; 453 while(retriesLeft > 0){ 454 try { 455 ResponseList<Status> temp = (screenName == null ? _twitter.getHomeTimeline(paging) : _twitter.getUserTimeline(screenName, paging)); 456 int received = temp.size(); 457 if(received < 1){ // did not receive anything 458 break; 459 }else if(received == DEFAULT_LIMIT){ // there may be more 460 long lowestId = Long.MAX_VALUE; 461 for(Status s : temp){ 462 long tempId = s.getId(); 463 if(tempId < lowestId){ 464 lowestId = tempId; 465 } 466 statuses.add(s); 467 } // for 468 469 paging.setMaxId(lowestId-1); 470 }else{ // this is all there is 471 for(Status s : temp){ 472 statuses.add(s); 473 } 474 break; 475 } // else 476 } catch (TwitterException ex) { 477 if(!handleTwitterException(ex)){ 478 return null; 479 } 480 --retriesLeft; 481 } 482 } // while 483 484 return (statuses.isEmpty() ? null : statuses); 485 } 486 487 /** 488 * 489 * @return list of status messages or null if none is available 490 */ 491 public List<TwitterStatusMessage> getStatusMessages() { 492 return getStatusMessages(getStatuses(null)); 493 } 494 495 /** 496 * extracts status messages from the given list of statuses 497 * 498 * @param statuses 499 * @return list of status messages or null if none was found 500 */ 501 private List<TwitterStatusMessage> getStatusMessages(List<Status> statuses){ 502 if(statuses == null){ 503 return null; 504 } 505 List<TwitterStatusMessage> messages = new ArrayList<>(statuses.size()); 506 for(Status s : statuses){ 507 messages.add(TwitterStatusMessage.getTwitterStatusMessage(s)); 508 } 509 510 return (messages.isEmpty() ? null : messages); 511 } 512 513 /** 514 * @return the userId 515 */ 516 public UserIdentity getUserId() { 517 return _userId; 518 } 519 520 /** 521 * filter video and photo descriptions for descriptions not belonging to the profile owner 522 * 523 * @return the filterDescriptions 524 */ 525 public boolean isFilterDescriptions() { 526 return _filterDescriptions; 527 } 528 529 /** 530 * filter video and photo descriptions for descriptions not belonging to the profile owner 531 * 532 * @param filterDescriptions the filterDescriptions to set 533 */ 534 public void setFilterDescriptions(boolean filterDescriptions) { 535 _filterDescriptions = filterDescriptions; 536 } 537 538 /** 539 * @return the abortOnRateLimit 540 */ 541 public boolean isAbortOnRateLimit() { 542 return _abortOnRateLimit; 543 } 544 545 /** 546 * @param abortOnRateLimit the abortOnRateLimit to set 547 */ 548 public void setAbortOnRateLimit(boolean abortOnRateLimit) { 549 _abortOnRateLimit = abortOnRateLimit; 550 } 551 552 /** 553 * 554 * @param exception 555 * @return true if the exception was handled successfully 556 */ 557 private boolean handleTwitterException(TwitterException exception) { 558 int code = exception.getStatusCode(); 559 LOGGER.debug("Twitter responded with code: "+code); 560 if(code != STATUS_CODE_LIMIT_EXCEEDED_V11 && code != STATUS_CODE_LIMIT_EXCEEDED_V1){ 561 LOGGER.error(exception, exception); 562 return false; 563 } 564 565 LOGGER.debug(exception, exception); 566 if(_abortOnRateLimit){ 567 LOGGER.warn("Abort on rate limit on, aborting..."); 568 return false; 569 } 570 571 long waitTime = ((long)exception.getRateLimitStatus().getResetTimeInSeconds())*1000-DateUtils.parseDate(exception.getResponseHeader(HEADER_DATE)).getTime()+2000; // use twitter's time, as the server clocks may be out-of-sync, add random +2 second delay just in case 572 if(waitTime < 1){ 573 LOGGER.warn("Invalid wait time: "+waitTime); 574 return false; 575 } 576 577 try { 578 LOGGER.debug("Waiting for next request window: "+waitTime); 579 Thread.sleep(waitTime); 580 } catch (InterruptedException ex) { 581 LOGGER.error(ex, ex); 582 return false; 583 } 584 585 return true; 586 } 587}