001/**
002 * Copyright 2014 Tampere University of Technology, Pori Department
003 * 
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 * 
008 *   http://www.apache.org/licenses/LICENSE-2.0
009 * 
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package service.tut.pori.contentanalysistest;
017
018import java.io.IOException;
019import java.io.InputStream;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Collections;
023import java.util.Comparator;
024import java.util.EnumSet;
025import java.util.HashMap;
026import java.util.List;
027import java.util.Map;
028import java.util.Set;
029
030import javax.xml.parsers.DocumentBuilderFactory;
031import javax.xml.parsers.ParserConfigurationException;
032
033import org.apache.commons.lang3.ArrayUtils;
034import org.apache.commons.lang3.StringUtils;
035import org.apache.commons.lang3.tuple.Pair;
036import org.apache.http.client.methods.CloseableHttpResponse;
037import org.apache.http.client.methods.HttpGet;
038import org.apache.http.impl.client.CloseableHttpClient;
039import org.apache.http.impl.client.HttpClients;
040import org.apache.log4j.Logger;
041import org.jsoup.Jsoup;
042import org.jsoup.nodes.Document;
043import org.jsoup.select.Elements;
044import org.w3c.dom.Element;
045import org.w3c.dom.NodeList;
046import org.xml.sax.SAXException;
047
048import service.tut.pori.contentanalysis.AnalysisBackend;
049import service.tut.pori.contentanalysis.AnalysisBackend.Capability;
050import service.tut.pori.contentanalysis.AsyncTask.TaskStatus;
051import service.tut.pori.contentanalysis.BackendDAO;
052import service.tut.pori.contentanalysis.BackendStatus;
053import service.tut.pori.contentanalysis.BackendStatusList;
054import service.tut.pori.contentanalysis.CAContentCore;
055import service.tut.pori.contentanalysis.PhotoDAO;
056import service.tut.pori.contentanalysis.PhotoTaskDAO;
057import service.tut.pori.contentanalysis.PhotoTaskDetails;
058import service.tut.pori.contentanalysis.video.VideoContentCore;
059import service.tut.pori.contentanalysis.video.VideoTaskDAO;
060import service.tut.pori.contentanalysis.video.VideoTaskDetails;
061import service.tut.pori.contentstorage.ContentStorageCore;
062import service.tut.pori.facebookjazz.FacebookExtractor;
063import service.tut.pori.facebookjazz.FacebookProfile;
064import service.tut.pori.fuzzyvisuals.FuzzyAnalyzer;
065import service.tut.pori.twitterjazz.TwitterExtractor;
066import service.tut.pori.twitterjazz.TwitterExtractor.ContentType;
067import service.tut.pori.twitterjazz.TwitterProfile;
068import service.tut.pori.users.google.OAuth2Token;
069import service.tut.pori.users.google.GoogleCredential;
070import service.tut.pori.users.google.GoogleUserCore;
071import core.tut.pori.context.ServiceInitializer;
072import core.tut.pori.http.parameters.DataGroups;
073import core.tut.pori.users.UserIdentity;
074
075
076/**
077 * A collection of various test methods, and more or less everything not yet (or never will be) part of the official specification/implementation.
078 * One should NOT use these methods in any serious code.
079 *
080 */
081@Deprecated
082public final class ContentAnalysisTestUtils {
083  /** data group for including href elements */
084  public static final String DATA_GROUPS_HREF = "href";
085  /** data group for including img elements */
086  public static final String DATA_GROUPS_IMG = "img";
087  /** data group for including video elements */
088  public static final String DATA_GROUPS_VIDEO = "video";
089  private static final String ATTRIBUTE_HREF = "href";
090  private static final String ATTRIBUTE_SRC = "src";
091  private static final Logger LOGGER = Logger.getLogger(ContentAnalysisTestUtils.class);
092  private static final String SELECT_A_HREF = "a["+ATTRIBUTE_HREF+"]";
093  private static final String SELECT_IMAGE_SRC = "img["+ATTRIBUTE_SRC+"]";
094  private static final String SELECT_VIDEO_SRC = "video["+ATTRIBUTE_SRC+"]";
095  private static final String SEPARATOR = ",";
096
097  /**
098   * 
099   * 
100   */
101  private ContentAnalysisTestUtils(){
102    // nothing needed
103  }
104
105  /**
106   * 
107   * @param s can be a single capability or a list of capabilities separated by comma
108   * @return capability set parsed from the given string
109   * @throws IllegalArgumentException 
110   */
111  public static EnumSet<Capability> fromCapabilityString(String s) throws IllegalArgumentException{
112    if(StringUtils.isBlank(s)){
113      return null;
114    }
115    EnumSet<Capability> capabilities = EnumSet.noneOf(Capability.class);
116    String[] caps = s.split(SEPARATOR);
117    for(int i=0;i<caps.length;++i){
118      Capability capability = null;
119      for(Capability c : Capability.values()){
120        if(c.name().equalsIgnoreCase(caps[i].trim())){
121          capability = c;
122          break;
123        }
124      }
125      if(capability == null){
126        throw new IllegalArgumentException("Bad capability string: "+caps[i]);
127      }
128      capabilities.add(capability);
129    }
130    return capabilities;
131  }
132
133  /**
134   * 
135   * @param capabilityString
136   * @param description
137   * @param enabled
138   * @param taskDatagroups
139   * @param url
140   * @return id of the generated back-end
141   */
142  public static Integer createAnalysisBackend(List<String> capabilityString, String description, Boolean enabled, DataGroups taskDatagroups, String url){
143    AnalysisBackend end = new AnalysisBackend();
144    end.setCapabilities(fromCapabilityString(org.apache.commons.lang3.StringUtils.join(capabilityString, ',')));
145    if(enabled != null){
146      end.setEnabled(enabled);
147    }
148    end.setDescription(description);
149    end.setDefaultTaskDataGroups(taskDatagroups);
150    end.setAnalysisUri(url);
151    ServiceInitializer.getDAOHandler().getSQLDAO(BackendDAO.class).createBackend(end);
152    return end.getBackendId();
153  }
154
155  /**
156   * 
157   * @param backendId
158   * @param capabilityString
159   * @param description
160   * @param enabled
161   * @param taskDatagroups
162   * @param url
163   */
164  public static void modifyAnalysisBackend(Integer backendId, List<String> capabilityString, String description, Boolean enabled, DataGroups taskDatagroups, String url){
165    AnalysisBackend end = new AnalysisBackend();
166    end.setCapabilities(fromCapabilityString(org.apache.commons.lang3.StringUtils.join(capabilityString,',')));
167    end.setEnabled(enabled);
168    end.setDescription(description);
169    end.setAnalysisUri(url);
170    end.setBackendId(backendId);
171    end.setDefaultTaskDataGroups(taskDatagroups);
172    ServiceInitializer.getDAOHandler().getSQLDAO(BackendDAO.class).updateBackend(end);
173  }
174
175  /**
176   * 
177   * @param backendId
178   */
179  public static void removeAnalysisBackend(Integer backendId){
180    ServiceInitializer.getDAOHandler().getSQLDAO(BackendDAO.class).removeBackend(backendId);
181  }
182
183  /**
184   * 
185   * @param guid
186   * @param userId
187   * @return true if the user has access to the given GUID
188   */
189  public static String hasAccess(String guid, UserIdentity userId){
190    switch(ServiceInitializer.getDAOHandler().getSolrDAO(PhotoDAO.class).getAccessDetails(userId, guid).getPermission()){
191      case NO_ACCESS:
192        return "No access";
193      case PRIVATE_ACCESS:
194        return "Private access";
195      case PUBLIC_ACCESS:
196        return "Public access";
197      default:
198        break;
199    }
200    return "Unknown access mode";
201  }
202
203  /**
204   * 
205   * @param backendId 
206   * @return back-end details for the requested back-ends
207   */
208  public static BackendDetails listAnalysisBackends(int[] backendId) {
209    BackendDetails list = new BackendDetails();
210    List<Integer> backendIds = (ArrayUtils.isEmpty(backendId) ? null : Arrays.asList(ArrayUtils.toObject(backendId)));
211    list.setBackends(ServiceInitializer.getDAOHandler().getSQLDAO(BackendDAO.class).getBackends(backendIds));
212    return list;
213  }
214
215  /**
216   * 
217   * @param dataGroups 
218   * @param userId
219   * @return facebook profile
220   * @throws IllegalArgumentException
221   */
222  public static FacebookProfile extractFacebookProfile(DataGroups dataGroups, UserIdentity userId) throws IllegalArgumentException{
223    FacebookExtractor e = FacebookExtractor.getExtractor(userId);
224    if(e == null){
225      throw new IllegalArgumentException("Failed to retrieve profile for the given user.");
226    }else{
227      return e.getProfile(facebookContentTypesFromDatagroups(dataGroups));
228    }
229  }
230
231  /**
232   * 
233   * @param dataGroups 
234   * @param userId
235   * @return twitter profile
236   * @throws IllegalArgumentException
237   */
238  public static TwitterProfile extractTwitterProfile(DataGroups dataGroups, UserIdentity userId) throws IllegalArgumentException{
239    TwitterExtractor e = TwitterExtractor.getExtractor(userId);
240    if(e == null){
241      throw new IllegalArgumentException("Failed to retrieve profile for the given user.");
242    }else{
243      return e.getProfile(twitterContentTypesFromDatagroups(dataGroups));
244    }
245  }
246
247  /**
248   * 
249   * @param dataGroups if contains all data group or empty is passed, all content types will be returned
250   * @return content types extracted from the given data groups or null if none was found or null data group list was given
251   */
252  private static EnumSet<service.tut.pori.facebookjazz.FacebookExtractor.ContentType> facebookContentTypesFromDatagroups(DataGroups dataGroups){
253    if(DataGroups.isEmpty(dataGroups) || DataGroups.hasDataGroup(DataGroups.DATA_GROUP_ALL, dataGroups)){
254      return EnumSet.allOf(service.tut.pori.facebookjazz.FacebookExtractor.ContentType.class);
255    }else{
256      EnumSet<service.tut.pori.facebookjazz.FacebookExtractor.ContentType> retval = EnumSet.noneOf(service.tut.pori.facebookjazz.FacebookExtractor.ContentType.class);
257      for(service.tut.pori.facebookjazz.FacebookExtractor.ContentType t : service.tut.pori.facebookjazz.FacebookExtractor.ContentType.values()){
258        if(DataGroups.hasDataGroup(t.name(), dataGroups)){
259          retval.add(t);
260        }
261      }
262      if(retval.isEmpty()){
263        return null;
264      }else{
265        return retval;
266      }
267    }
268  }
269
270  /**
271   * 
272   * @param dataGroups if contains all data group or empty is passed, all content types will be returned
273   * @return content types extracted from the given data groups or null if none was found or null or empty data group list was given
274   */
275  private static EnumSet<ContentType> twitterContentTypesFromDatagroups(DataGroups dataGroups){
276    if(DataGroups.isEmpty(dataGroups) || DataGroups.hasDataGroup(DataGroups.DATA_GROUP_ALL, dataGroups)){
277      return EnumSet.allOf(ContentType.class);
278    }else{
279      EnumSet<ContentType> retval = EnumSet.noneOf(ContentType.class);
280      for(ContentType t : ContentType.values()){
281        if(DataGroups.hasDataGroup(t.name(), dataGroups)){
282          retval.add(t);
283        }
284      }
285      if(retval.isEmpty()){
286        return null;
287      }else{
288        return retval;
289      }
290    }
291  }
292
293  /**
294   * 
295   * @param backendId
296   * @param taskId
297   * @throws IllegalArgumentException
298   */
299  public static void reschedulePhotoTask(int[] backendId, Long taskId) throws IllegalArgumentException {
300    PhotoTaskDAO taskDao = ServiceInitializer.getDAOHandler().getSQLDAO(PhotoTaskDAO.class);
301    try{
302      PhotoTaskDetails details = taskDao.getTask(null, null, null, taskId);
303
304      BackendStatusList statuses = details.getBackends();
305      if(BackendStatusList.isEmpty(statuses)){
306        LOGGER.debug("No pre-existing statuses.");
307        statuses = new BackendStatusList();
308      }
309
310      for(int i=0;i<backendId.length;++i){ // reset status information to not started
311        statuses.setBackendStatus(new BackendStatus(new AnalysisBackend(backendId[i]), TaskStatus.NOT_STARTED));
312      }
313      taskDao.updateTaskStatus(statuses, taskId);
314
315      CAContentCore.scheduleTask(details);
316    } catch(ClassCastException ex){
317      LOGGER.error(ex, ex);
318      throw new IllegalArgumentException("Attempted to schedule task, which is not a photo analysis or feedback task.");
319    }
320  }
321
322  /**
323   * 
324   * @param backendId
325   * @param taskId
326   * @throws IllegalArgumentException
327   */
328  public static void rescheduleVideoTask(int[] backendId, Long taskId) throws IllegalArgumentException {
329    VideoTaskDAO taskDao = ServiceInitializer.getDAOHandler().getSQLDAO(VideoTaskDAO.class);
330    try{
331      VideoTaskDetails details = taskDao.getTask(null, null, null, taskId);
332
333      BackendStatusList statuses = details.getBackends();
334      if(BackendStatusList.isEmpty(statuses)){
335        LOGGER.debug("No pre-existing statuses.");
336        statuses = new BackendStatusList();
337      }
338
339      for(int i=0;i<backendId.length;++i){ // reset status information to not started
340        statuses.setBackendStatus(new BackendStatus(new AnalysisBackend(backendId[i]), TaskStatus.NOT_STARTED));
341      }
342      taskDao.updateTaskStatus(statuses, taskId);
343
344      VideoContentCore.scheduleTask(details);
345    } catch(ClassCastException ex){
346      LOGGER.error(ex, ex);
347      throw new IllegalArgumentException("Attempted to schedule task, which is not a photo analysis or feedback task.");
348    }
349  }
350
351  /**
352   * 
353   * @param backendId
354   * @param enabled
355   * @throws IllegalArgumentException
356   */
357  public static void enableBackend(Integer backendId, Boolean enabled) throws IllegalArgumentException {
358    BackendDAO backendDAO = ServiceInitializer.getDAOHandler().getSQLDAO(BackendDAO.class);
359    AnalysisBackend backend = backendDAO.getBackend(backendId);
360    if(backend == null){
361      throw new IllegalArgumentException("No such backend, id: "+backendId);
362    }
363
364    backend.setEnabled(enabled);
365    backendDAO.updateBackend(backend);
366  }
367
368  /**
369   * 
370   * @param authenticatedUser
371   * @param guid 
372   * @return access uri to picasa
373   * @throws IllegalArgumentException on bad values
374   */
375  public static String getPicasaUri(UserIdentity authenticatedUser, String guid) throws IllegalArgumentException{
376    GoogleCredential gc = GoogleUserCore.getCredential(authenticatedUser);
377    if(gc == null){
378      throw new IllegalArgumentException("Failed to resolve Google credentials for the user.");
379    }
380
381    String googleUserId = gc.getId();
382    Pair<String, String> ids = ServiceInitializer.getDAOHandler().getSQLDAO(PicasaAlbumDAO.class).getIdPair(googleUserId, guid);
383    if(ids == null){
384      throw new IllegalArgumentException("Could not find album for guid : "+guid);
385    }
386
387    Map<String, String> albums = resolvePicasaAlbumNames(authenticatedUser, googleUserId);
388    if(albums == null){
389      throw new IllegalArgumentException("Could not find album, id : "+ids.getLeft());
390    }
391    String name = albums.get(ids.getLeft());
392    if(StringUtils.isBlank(name)){
393      throw new IllegalArgumentException("Could not resolve name for album, id : "+ids.getLeft());
394    }
395
396    return "https://picasaweb.google.com/"+googleUserId+"/"+name+"#"+ids.getRight();
397  }
398
399  /**
400   * 
401   * @param authenticatedUser
402   * @param googleUserId
403   * @return map of album ids and names or null on error
404   */
405  private static Map<String, String> resolvePicasaAlbumNames(UserIdentity authenticatedUser, String googleUserId) {
406    OAuth2Token token = GoogleUserCore.getToken(authenticatedUser);
407    if(token == null){
408      throw new IllegalArgumentException("No active access token for the user.");
409    }
410
411    Map<String, String> albumIdNameMap = new HashMap<>();
412
413    try(CloseableHttpClient client = HttpClients.createDefault()){
414      HttpGet get = new HttpGet("https://picasaweb.google.com/data/feed/api/user/"+googleUserId+"?fields=entry(gphoto:id,gphoto:name)"); // retrieve only album names and IDs
415      get.setHeader("GData-Version", "2");
416      get.setHeader("Authorization", "Bearer "+token.getAccessToken());
417
418      try(CloseableHttpResponse response = client.execute(get)){
419        int statusCode = response.getStatusLine().getStatusCode();
420        if(statusCode < 200 || statusCode >= 300){
421          LOGGER.warn("Server responded with status: "+statusCode);
422          return null;
423        }
424
425        NodeList entries = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(response.getEntity().getContent()).getElementsByTagName("entry");
426        int entryCount = entries.getLength();
427        if(entryCount < 1){
428          LOGGER.warn("No albums in picasa service for user, id: "+authenticatedUser.getUserId());
429          return null;
430        }
431
432        for(int i=0;i<entryCount;++i){
433          Element entry = (Element) entries.item(i);
434          String id = entry.getElementsByTagName("gphoto:id").item(0).getTextContent();
435          if(StringUtils.isBlank(id)){
436            LOGGER.warn("Ignored album with invalid id.");
437            continue;
438          }
439
440          String name = entry.getElementsByTagName("gphoto:name").item(0).getTextContent();
441          if(StringUtils.isBlank(name)){
442            LOGGER.warn("Ignored album with invalid name, id: "+id);
443          }else{
444            albumIdNameMap.put(id, name);
445          } // else
446        } // for
447      }
448      return (albumIdNameMap.isEmpty() ? null : albumIdNameMap);
449    } catch (IOException | SAXException | ParserConfigurationException ex) {
450      LOGGER.error(ex, ex);
451      return null;
452    }
453  }
454
455  /**
456   * 
457   * @param authenticatedUser
458   * @param albumIds
459   * @param includeGUIDs
460   * @param resolveNames if true Picasa service is used to resolve album names
461   * @return list of albums or null if none available
462   * @throws IllegalArgumentException on bad data
463   */
464  public static PicasaAlbumList getPicasaAlbums(UserIdentity authenticatedUser, List<String> albumIds, boolean includeGUIDs, boolean resolveNames) throws IllegalArgumentException {
465    GoogleCredential gc = GoogleUserCore.getCredential(authenticatedUser);
466    if(gc == null){
467      throw new IllegalArgumentException("Failed to resolve Google credentials for the user.");
468    }
469
470    PicasaAlbumDAO dao = ServiceInitializer.getDAOHandler().getSQLDAO(PicasaAlbumDAO.class);
471    String googleUserId = gc.getId();
472    List<String> ids = dao.getAlbumIds(albumIds, googleUserId);
473    if(ids == null){
474      LOGGER.debug("No albums for user, id: "+authenticatedUser.getUserId());
475      return null;
476    }
477
478    int albumIdCount = ids.size();
479    Map<String, String> albumIdNameMap = null;
480    if(resolveNames){
481      LOGGER.debug("Resolving album names...");
482      albumIdNameMap = resolvePicasaAlbumNames(authenticatedUser, googleUserId);
483      if(albumIdNameMap == null){
484        LOGGER.warn("Failed to resolve any album names.");
485        return null;
486      }
487    }
488
489    List<PicasaAlbum> albums = new ArrayList<>(albumIdCount);
490    for(String id : ids){
491      PicasaAlbum pa = new PicasaAlbum();   
492      pa.setId(id);
493
494      if(resolveNames){
495        String name = albumIdNameMap.get(id);
496        if(StringUtils.isBlank(name)){
497          LOGGER.warn("Ignored album with invalid name: "+name+", album id: "+id);
498          continue;
499        }
500        pa.setName(name);
501      }
502
503      if(includeGUIDs){
504        List<String> GUIDs = dao.getGUIDs(id);
505        if(GUIDs == null){ // this should really not happen, there should always be content
506          LOGGER.warn("No GUIDs for album, id: "+id+" and user, id: "+authenticatedUser.getUserId());
507        }
508        pa.setGUIDs(GUIDs);
509      }
510      albums.add(pa);
511    }
512    
513    if(albums.isEmpty()){
514      LOGGER.warn("No albums resolved.");
515      return null;
516    }
517    
518    if(resolveNames){
519      Collections.sort(albums, new Comparator<PicasaAlbum>() {
520        @Override
521        public int compare(PicasaAlbum a1, PicasaAlbum a2) {
522          return a1.getName().compareTo(a2.getName());
523        }
524      });
525    }
526
527    PicasaAlbumList albumList = new PicasaAlbumList();
528    albumList.setAlbums(albums);
529    return albumList;
530  }
531
532
533
534  /**
535   * 
536   * @param authenticatedUser
537   * @param backendIds optional back-end id filter
538   * @param dataGroups optional datagroups, applicable values are: {@value #DATA_GROUPS_HREF}, {@value #DATA_GROUPS_IMG}, {@value #DATA_GROUPS_VIDEO}
539   * @param url
540   * @return true if the given uri was successfully parsed. Note that this does NOT mean that the uri had any applicable content.
541   */
542  public static boolean analyzePage(UserIdentity authenticatedUser, int[] backendIds, DataGroups dataGroups, String url) {
543    LOGGER.debug("Analyzing page : "+url);
544
545    Document doc = null;
546    try {
547      doc = Jsoup.connect(url).get();
548    } catch (IOException ex) {
549      LOGGER.error(ex, ex);
550      return false;
551    }
552    ArrayList<String> urls = new ArrayList<>();
553
554    if(DataGroups.hasDataGroup(DATA_GROUPS_HREF, dataGroups)){
555      Elements links = doc.select(SELECT_A_HREF);
556      if(links.isEmpty()){
557        LOGGER.debug("No "+DATA_GROUPS_HREF+" links in url : "+url);
558      }else{
559        for(org.jsoup.nodes.Element link : links){
560          String aurl = link.absUrl(ATTRIBUTE_HREF);
561          if(StringUtils.isBlank(aurl)){
562            LOGGER.warn("Ignored empty "+ATTRIBUTE_HREF);
563          }else{
564            urls.add(aurl);
565          }
566        }
567      }
568    }
569
570    if(DataGroups.hasDataGroup(DATA_GROUPS_IMG, dataGroups)){
571      Elements links = doc.select(SELECT_IMAGE_SRC);
572      if(links.isEmpty()){
573        LOGGER.debug("No "+DATA_GROUPS_IMG+" links in url : "+url);
574      }else{
575        for(org.jsoup.nodes.Element link : links){
576          String aurl = link.absUrl(ATTRIBUTE_SRC);
577          if(StringUtils.isBlank(aurl)){
578            LOGGER.warn("Ignored empty "+ATTRIBUTE_SRC);
579          }else{
580            urls.add(aurl);
581          }
582        }
583      }
584    }
585
586    if(DataGroups.hasDataGroup(DATA_GROUPS_VIDEO, dataGroups)){
587      Elements links = doc.select(SELECT_VIDEO_SRC);
588      if(links.isEmpty()){
589        LOGGER.debug("No "+DATA_GROUPS_VIDEO+" links in url : "+url);
590      }else{
591        for(org.jsoup.nodes.Element link : links){
592          String aurl = link.absUrl(ATTRIBUTE_SRC);
593          if(StringUtils.isBlank(aurl)){
594            LOGGER.warn("Ignored empty "+ATTRIBUTE_SRC);
595          }else{
596            urls.add(aurl);
597          }
598        }
599      }
600    }
601
602    if(urls.isEmpty()){
603      LOGGER.warn("No applicable content in url : "+url);
604      return false;
605    }
606
607    ContentStorageCore.addUrls(authenticatedUser, backendIds, urls);
608
609    return true;
610  }
611
612  /**
613   * 
614   * @param file
615   * @return the extracted words as a string
616   */
617  public static String fuzzyFile(InputStream file) {
618    Set<String> words = null;
619    try(FuzzyAnalyzer fa = new FuzzyAnalyzer()){
620      words = fa.analyze(file);
621    }
622    
623    return StringUtils.join(words, ',');
624  }
625  
626  /**
627   * 
628   * @param url
629   * @return the extracted words as a string
630   */
631  public static String fuzzyUrl(String url) {
632    Set<String> words = null;
633    try(FuzzyAnalyzer fa = new FuzzyAnalyzer()){
634      words = fa.analyze(url);
635    }
636    
637    return StringUtils.join(words, ',');
638  }
639}