001/**
002 * Copyright 2014 Tampere University of Technology, Pori Department
003 * 
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 * 
008 *   http://www.apache.org/licenses/LICENSE-2.0
009 * 
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package service.tut.pori.twitterjazz;
017
018import java.util.ArrayList;
019import java.util.Arrays;
020import java.util.Collection;
021import java.util.EnumSet;
022import java.util.Iterator;
023import java.util.List;
024
025import org.apache.commons.lang3.ArrayUtils;
026import org.apache.http.client.utils.DateUtils;
027import org.apache.log4j.Logger;
028
029import service.tut.pori.contentanalysis.Definitions;
030import service.tut.pori.contentanalysis.MediaObject;
031import service.tut.pori.contentanalysis.MediaObjectList;
032import service.tut.pori.contentanalysis.Photo;
033import service.tut.pori.contentanalysis.PhotoDAO;
034import service.tut.pori.contentanalysis.PhotoList;
035import service.tut.pori.contentstorage.TwitterDAO;
036import service.tut.pori.contentstorage.TwitterPhotoStorage;
037import service.tut.pori.contentstorage.TwitterPhotoStorage.TwitterEntry;
038import service.tut.pori.users.twitter.TwitterUserDAO;
039import twitter4j.Paging;
040import twitter4j.ResponseList;
041import twitter4j.Status;
042import twitter4j.Twitter;
043import twitter4j.TwitterException;
044import twitter4j.auth.AccessToken;
045import core.tut.pori.context.ServiceInitializer;
046import core.tut.pori.http.parameters.DataGroups;
047import core.tut.pori.users.UserIdentity;
048
049/**
050 * High-level client for extracting profile details from Twitter.
051 * 
052 * This class is NOT thread-safe
053 */
054public class TwitterExtractor {
055  private static final int DEFAULT_LIMIT = 200;
056  private static final String HEADER_DATE = "date";
057  private static final Logger LOGGER = Logger.getLogger(TwitterExtractor.class);
058  private static final int MAX_RETRIES = 5;
059  private static final int STATUS_CODE_LIMIT_EXCEEDED_V1 = 420;
060  private static final int STATUS_CODE_LIMIT_EXCEEDED_V11 = 429;
061  private UserIdentity _userId = null;
062  private Twitter _twitter = null;
063  private boolean _filterDescriptions = true; // filter video and photo descriptions for descriptions not belonging to the profile owner
064  private boolean _abortOnRateLimit = false;
065
066  /**
067   * Valid content types for a profile
068   * 
069   */
070  public enum ContentType{
071    /** Include tags generated by other back-ends. Can only be used in combination with PHOTO_DESCRIPTION */
072    GENERATED_TAGS,
073    /** photo descriptions generated from Twitter photos and status messages */
074    PHOTO_DESCRIPTIONS,
075    /** Twitter status messages */
076    STATUS_MESSAGES,
077    /** video descriptions generated from Twitter photos and status messages */
078    VIDEO_DESCRIPTIONS;
079    
080    /**
081     * 
082     * @param values
083     * @return set of ContentTypes or null if null or empty collection was passed
084     * @throws IllegalArgumentException on bad value
085     */
086    public static EnumSet<ContentType> fromString(Collection<String> values) throws IllegalArgumentException {
087      EnumSet<ContentType> contentTypes = null;
088      if(values != null && !values.isEmpty()){
089        contentTypes = EnumSet.noneOf(ContentType.class);
090        for(String value : values){
091          ContentType found = null;
092          for(ContentType t : values()){
093            if(t.name().equalsIgnoreCase(value)){
094              found = t;
095              break;
096            }
097          } // for types
098          if(found == null){
099            throw new IllegalArgumentException("Bad ContentType: "+value);
100          }
101          contentTypes.add(found);
102        } // for values
103      }
104      return contentTypes;
105    }
106  } // enum ContentType
107
108  /**
109   * 
110   * @param userId
111   * @return the extractor or null on failure
112   */
113  public static TwitterExtractor getExtractor(UserIdentity userId){
114    TwitterExtractor extractor = null;
115    AccessToken token = ServiceInitializer.getDAOHandler().getSQLDAO(TwitterUserDAO.class).getAccessToken(userId);
116    if(token == null){
117      LOGGER.warn("No token.");
118      return null;
119    }
120    extractor = new TwitterExtractor(userId);
121    extractor._twitter = TJContentCore.getTwitterFactory().getInstance(token);
122    return extractor;
123  }
124
125  /**
126   * 
127   * @param userIdentity
128   */
129  private TwitterExtractor(UserIdentity userIdentity){
130    _userId = userIdentity;
131  }
132
133  /**
134   * 
135   * @param contentTypes
136   * @param screenNames
137   * @return list of found profiles or null on failure
138   */
139  public List<TwitterProfile> getProfiles(EnumSet<ContentType> contentTypes, String[] screenNames){
140    if(ArrayUtils.isEmpty(screenNames)){
141      LOGGER.warn("Empty screen name list.");
142      return null;
143    }
144
145    List<TwitterUserDetails> userDetails = null;
146    int retriesLeft = MAX_RETRIES;
147    while(userDetails == null && retriesLeft > 0){
148      try {
149        LOGGER.debug("Retrieving user details...");
150        userDetails = TwitterUserDetails.getTwitterUserDetails(_twitter.lookupUsers(screenNames));
151        if(userDetails == null){
152          LOGGER.warn("Failed to resolve user details.");
153          return null;
154        }
155      } catch (TwitterException ex) {
156        if(!handleTwitterException(ex)){
157          return null;
158        }
159        --retriesLeft;
160      }
161    }
162
163    List<TwitterProfile> profiles = new ArrayList<>(userDetails.size());
164
165    if(contentTypes != null && !contentTypes.isEmpty()){
166      boolean generatedTags = contentTypes.contains(ContentType.GENERATED_TAGS);
167      if(generatedTags && contentTypes.size() == 1){
168        throw new IllegalArgumentException("Only "+ContentType.GENERATED_TAGS.name()+" given.");
169      }
170      boolean hasStatusMessages = contentTypes.contains(ContentType.STATUS_MESSAGES);
171      boolean hasVideoDescriptions = contentTypes.contains(ContentType.VIDEO_DESCRIPTIONS);
172      boolean hasPhotoDescriptions = contentTypes.contains(ContentType.PHOTO_DESCRIPTIONS);
173      if(hasVideoDescriptions || hasPhotoDescriptions || hasStatusMessages){
174        for(TwitterUserDetails details : userDetails){
175          TwitterProfile profile = new TwitterProfile(details);
176          profiles.add(profile);
177
178          List<Status> statuses = getStatuses(details.getScreenName());
179          if(statuses != null){
180            if(hasStatusMessages){
181              profile.setStatusMessages(getStatusMessages(statuses));
182            }
183
184            List<String> filter = null;
185            if(_filterDescriptions){
186              filter = Arrays.asList(details.getTwitterId());
187            }
188
189            if(hasPhotoDescriptions){
190              List<TwitterPhotoDescription> descriptions = getPhotoDescriptions(generatedTags, statuses, filter);
191              LOGGER.debug("Photo descriptions extracted: "+(descriptions == null ? "0" : descriptions.size()));
192              profile.setPhotoDescriptions(descriptions);
193            }
194
195            if(hasVideoDescriptions){
196              List<TwitterVideoDescription> descriptions = getVideoDescriptions(statuses, filter);
197              LOGGER.debug("Video descriptions extracted: "+(descriptions == null ? "0" : descriptions.size()));
198              profile.setVideoDescriptions(descriptions);
199            }
200          } // if
201        } // for
202      } // if
203    }else{
204      LOGGER.debug("No content types requested.");
205      for(TwitterUserDetails details : userDetails){
206        profiles.add(new TwitterProfile(details));    
207      }
208    }
209
210    return profiles;
211  }
212
213  /**
214   * 
215   * @param contentTypes
216   * @return the profile or null on failure
217   * @throws IllegalArgumentException on bad content types
218   */
219  public TwitterProfile getProfile(EnumSet<ContentType> contentTypes) throws IllegalArgumentException{
220    TwitterUserDetails userDetails = null;
221    int retriesLeft = MAX_RETRIES;
222    while(userDetails == null && retriesLeft > 0){
223      try {
224        LOGGER.debug("Retrieving user details...");
225        userDetails = TwitterUserDetails.getTwitterUserDetails(_twitter.verifyCredentials());
226        if(userDetails == null){
227          LOGGER.warn("Failed to resolve user details.");
228          return null;
229        }
230      } catch (TwitterException ex) {
231        if(!handleTwitterException(ex)){
232          return null;
233        }
234        --retriesLeft;
235      }
236    }
237    userDetails.setUserId(_userId);
238    TwitterProfile profile = new TwitterProfile(userDetails);
239
240    if(contentTypes != null && !contentTypes.isEmpty()){
241      boolean generatedTags = contentTypes.contains(ContentType.GENERATED_TAGS);
242      if(generatedTags && contentTypes.size() == 1){
243        throw new IllegalArgumentException("Only "+ContentType.GENERATED_TAGS.name()+" given.");
244      }
245      boolean hasStatusMessages = contentTypes.contains(ContentType.STATUS_MESSAGES);
246      boolean hasVideoDescriptions = contentTypes.contains(ContentType.VIDEO_DESCRIPTIONS);
247      boolean hasPhotoDescriptions = contentTypes.contains(ContentType.PHOTO_DESCRIPTIONS);
248      if(hasVideoDescriptions || hasPhotoDescriptions || hasStatusMessages){
249        List<Status> statuses = getStatuses(null);
250        if(statuses != null){
251          if(hasStatusMessages){
252            profile.setStatusMessages(getStatusMessages(statuses));
253          }
254
255          List<String> filter = null;
256          if(_filterDescriptions){
257            filter = Arrays.asList(userDetails.getTwitterId());
258          }
259
260          if(hasPhotoDescriptions){
261            profile.setPhotoDescriptions(getPhotoDescriptions(generatedTags, statuses, filter));
262          }
263
264          if(hasVideoDescriptions){
265            profile.setVideoDescriptions(getVideoDescriptions(statuses, filter));
266          }
267        } // if
268      } // if
269    }else{
270      LOGGER.debug("No content types requested.");
271    }
272
273    return profile;
274  }
275
276  /**
277   * 
278   * @return all photo descriptions found on the current user's timeline without generated tags
279   */
280  public List<TwitterPhotoDescription> getPhotoDescriptions() {
281    return getPhotoDescriptions(false, null);
282  }
283  
284  /**
285   * 
286   * @param descriptions
287   * @param entityId
288   * @return true if the description collection already contains a description with the given entity id
289   */
290  private boolean photosContains(Collection<TwitterPhotoDescription> descriptions, String entityId){
291    for(TwitterPhotoDescription d : descriptions){
292      if(entityId.equals(d.getEntityId())){
293        return true;
294      }
295    }
296    return false;
297  }
298
299  /**
300   * 
301   * @param generatedTags 
302   * @param statuses
303   * @param twitterUserIdFilter
304   * @return list of photo descriptions or null if none was found
305   */
306  private List<TwitterPhotoDescription> getPhotoDescriptions(boolean generatedTags, List<Status> statuses, Collection<String> twitterUserIdFilter) {
307    if(statuses == null){
308      return null;
309    }
310    int sCount = statuses.size();
311    LOGGER.debug("Processing status messages for photo descriptions, messages: "+sCount);
312
313    List<String> entityIds = new ArrayList<>(sCount);
314    List<TwitterPhotoDescription> descriptions = new ArrayList<>(sCount);
315    for(Status s : statuses){
316      if(twitterUserIdFilter != null && !twitterUserIdFilter.contains(String.valueOf(s.getUser().getId()))){
317        LOGGER.debug("Ignoring status message based on the given filter twitter user id filter.");
318        continue;
319      }
320      List<TwitterPhotoDescription> p = TwitterPhotoDescription.getTwitterPhotoDescriptions(s); // the factory object will check for the correct type
321      if(p != null){
322        for(TwitterPhotoDescription d : p){
323          String entityId = d.getEntityId();
324          if(photosContains(descriptions, entityId)){ // ignore duplicates
325            continue;
326          }
327          descriptions.add(d);
328          entityIds.add(entityId);
329        }
330      } //if
331    }
332
333    if(descriptions.isEmpty()){
334      LOGGER.debug("No photo descriptions in the given list of statuses.");
335      return null;
336    }
337
338    List<TwitterEntry> entries = ServiceInitializer.getDAOHandler().getSQLDAO(TwitterDAO.class).getEntriesByEntityId(entityIds, _userId);
339    if(entries == null){
340      LOGGER.debug("None of the photos are known by the system.");
341    }else{
342      PhotoList gTags = null;
343      if(generatedTags){
344        LOGGER.debug("Retrieving generated tags.");
345        List<String> guids = new ArrayList<>(entries.size());
346        for(TwitterEntry e : entries){
347          guids.add(e.getGUID());
348        }
349        gTags = ServiceInitializer.getDAOHandler().getSolrDAO(PhotoDAO.class).getPhotos(new DataGroups(Definitions.DATA_GROUP_KEYWORDS), guids, null, null, null);
350      }
351      
352      LOGGER.debug("Resolving photo GUIDs for descriptions.");
353      for(TwitterPhotoDescription d : descriptions){
354        String objectId = d.getEntityId();
355        String screenName = d.getFromName();
356        for(Iterator<TwitterEntry> eIter = entries.iterator(); eIter.hasNext();){
357          TwitterEntry e = eIter.next();
358          if(e.getEntityId().equals(objectId) && e.getScreenName().equals(screenName)){ // the same entity id might appear for different users' content so also match by screen name
359            String guid = e.getGUID();
360            d.setPhotoGUID(guid);
361            d.setServiceType(TwitterPhotoStorage.SERVICE_TYPE); // no need to check from database, all photos from TwitterDAO entries are of the same type
362            if(gTags != null){  // if tags were found
363              Photo p = gTags.getPhoto(guid); // in practice this should always return a photo...
364              if(p != null){
365                MediaObjectList objects = p.getMediaObjects();
366                if(!MediaObjectList.isEmpty(objects)){
367                  for(MediaObject vo : objects.getMediaObjects()){
368                    d.addTag(TwitterPhotoTag.getTwitterTag(vo));
369                  } // for
370                } // if photo had media objects
371              }else{ // ..though there is theoretical possibility that the photo has been removed in between retrievals (which are not in a transaction), and does not exist anymore
372                LOGGER.warn("No photo found, GUID: "+guid);
373                d.setPhotoGUID(null); // not valid anymore
374              } // else
375            } // if
376            eIter.remove(); // remove entry to prevent unnecessary looping in the following loops
377          }
378        } // for entries
379      } // for descriptions
380    }
381
382    return descriptions;
383  }   
384
385  /**
386   * 
387   * @param generatedTags if true the generated tags will be retrieved from the database
388   * @param twitterUserIdFilter if given, only descriptions from the given users will be returned
389   * @return list of photo descriptions or null if none was found
390   */
391  public List<TwitterPhotoDescription> getPhotoDescriptions(boolean generatedTags, Collection<String> twitterUserIdFilter) {
392    return getPhotoDescriptions(generatedTags, getStatuses(null), twitterUserIdFilter);
393  }
394
395  /**
396   * 
397   * @return all video descriptions found on user's timeline
398   */
399  public List<TwitterVideoDescription> getVideoDescriptions(){
400    return getVideoDescriptions(null);
401  }
402
403  /**
404   * 
405   * @param twitterUserIdFilter if given, only descriptions from these users will be returned
406   * @return list of video descriptions or null if none was found
407   */
408  public List<TwitterVideoDescription> getVideoDescriptions(Collection<String> twitterUserIdFilter) {
409    return getVideoDescriptions(getStatuses(null), twitterUserIdFilter);
410  }
411
412  /**
413   * extract video descriptions from the given list of statuses
414   * 
415   * @param statuses
416   * @param twitterUserIdFilter
417   * @return list of video descriptions or null if none was found
418   */
419  private List<TwitterVideoDescription> getVideoDescriptions(List<Status> statuses, Collection<String> twitterUserIdFilter){
420    if(statuses == null){
421      return null;
422    }
423
424    int sCount = statuses.size();
425    LOGGER.debug("Processing status messages for photo descriptions, messages: "+sCount);
426
427    List<TwitterVideoDescription> descriptions = new ArrayList<>(sCount);
428    for(Status s : statuses){
429      if(twitterUserIdFilter != null && !twitterUserIdFilter.contains(String.valueOf(s.getUser().getId()))){
430        LOGGER.debug("Ignoring status message based on the given filter twitter user id filter.");
431        continue;
432      }
433      TwitterVideoDescription v = TwitterVideoDescription.getTwitterVideoDescription(s);
434      if(v != null){
435        descriptions.add(v);
436      }
437    }
438
439    return (descriptions.isEmpty() ? null : descriptions);
440  }
441
442  /**
443   * 
444   * @param screenName if null, the home timeline of the authenticated user will be retrieved
445   * @return list of statuses or null if none found 
446   * @throws IllegalArgumentException
447   */
448  private List<Status> getStatuses(String screenName) throws IllegalArgumentException{
449    Paging paging = new Paging();
450    paging.setCount(DEFAULT_LIMIT);
451    List<Status> statuses = new ArrayList<>();
452    int retriesLeft = MAX_RETRIES;
453    while(retriesLeft > 0){
454      try {
455        ResponseList<Status> temp = (screenName == null ? _twitter.getHomeTimeline(paging) : _twitter.getUserTimeline(screenName, paging));
456        int received = temp.size();
457        if(received < 1){ // did not receive anything
458          break;
459        }else if(received == DEFAULT_LIMIT){ // there may be more
460          long lowestId = Long.MAX_VALUE;
461          for(Status s : temp){
462            long tempId = s.getId();
463            if(tempId < lowestId){
464              lowestId = tempId;
465            }
466            statuses.add(s);
467          } // for
468
469          paging.setMaxId(lowestId-1);
470        }else{ // this is all there is
471          for(Status s : temp){
472            statuses.add(s);
473          }
474          break;
475        } // else
476      } catch (TwitterException ex) {
477        if(!handleTwitterException(ex)){
478          return null;
479        }
480        --retriesLeft;
481      }
482    } // while
483
484    return (statuses.isEmpty() ? null : statuses);
485  }
486
487  /**
488   * 
489   * @return list of status messages or null if none is available
490   */
491  public List<TwitterStatusMessage> getStatusMessages() {
492    return getStatusMessages(getStatuses(null));
493  }
494
495  /**
496   * extracts status messages from the given list of statuses
497   * 
498   * @param statuses
499   * @return list of status messages or null if none was found
500   */
501  private List<TwitterStatusMessage> getStatusMessages(List<Status> statuses){
502    if(statuses == null){
503      return null;
504    }
505    List<TwitterStatusMessage> messages = new ArrayList<>(statuses.size());
506    for(Status s : statuses){
507      messages.add(TwitterStatusMessage.getTwitterStatusMessage(s));
508    }
509
510    return (messages.isEmpty() ? null : messages);
511  }
512
513  /**
514   * @return the userId
515   */
516  public UserIdentity getUserId() {
517    return _userId;
518  }
519
520  /**
521   * filter video and photo descriptions for descriptions not belonging to the profile owner
522   * 
523   * @return the filterDescriptions
524   */
525  public boolean isFilterDescriptions() {
526    return _filterDescriptions;
527  }
528
529  /**
530   * filter video and photo descriptions for descriptions not belonging to the profile owner
531   * 
532   * @param filterDescriptions the filterDescriptions to set
533   */
534  public void setFilterDescriptions(boolean filterDescriptions) {
535    _filterDescriptions = filterDescriptions;
536  }
537
538  /**
539   * @return the abortOnRateLimit
540   */
541  public boolean isAbortOnRateLimit() {
542    return _abortOnRateLimit;
543  }
544
545  /**
546   * @param abortOnRateLimit the abortOnRateLimit to set
547   */
548  public void setAbortOnRateLimit(boolean abortOnRateLimit) {
549    _abortOnRateLimit = abortOnRateLimit;
550  }
551
552  /**
553   * 
554   * @param exception
555   * @return true if the exception was handled successfully
556   */
557  private boolean handleTwitterException(TwitterException exception) {
558    int code = exception.getStatusCode();
559    LOGGER.debug("Twitter responded with code: "+code);
560    if(code != STATUS_CODE_LIMIT_EXCEEDED_V11 && code != STATUS_CODE_LIMIT_EXCEEDED_V1){
561      LOGGER.error(exception, exception);
562      return false; 
563    }
564
565    LOGGER.debug(exception, exception);
566    if(_abortOnRateLimit){
567      LOGGER.warn("Abort on rate limit on, aborting...");
568      return false;
569    }
570
571    long waitTime = ((long)exception.getRateLimitStatus().getResetTimeInSeconds())*1000-DateUtils.parseDate(exception.getResponseHeader(HEADER_DATE)).getTime()+2000; // use twitter's time, as the server clocks may be out-of-sync, add random +2 second delay just in case
572    if(waitTime < 1){
573      LOGGER.warn("Invalid wait time: "+waitTime);
574      return false;
575    }
576
577    try {
578      LOGGER.debug("Waiting for next request window: "+waitTime);
579      Thread.sleep(waitTime);
580    } catch (InterruptedException ex) {
581      LOGGER.error(ex, ex);
582      return false;
583    }
584
585    return true;
586  }
587}