Find all nodes containing the specified HTML Tags on its body texts

Public

Our Use Case: for security/performance reasons, we want to limit our allowed tags that could be inserted via WYSIWYG and/or the tags allowed by the Drupal's Input Filters when rendering the content in node view. We want to check first if there are no drastic effect in our existing contents.

Get raw version
php
  1. // Tip: You could easily run this code on http://www.yourdrupalsite.com/devel/php admin page.
  2. //
  3. // Create a new EFQ object.
  4. $query = new EntityFieldQuery();
  5.  
  6. // Select all article nodes.
  7. // Remove the 'bundle' condition if you want to crawl all content types.
  8. $query
  9. ->entityCondition('entity_type', 'node')
  10. ->entityCondition('bundle', 'article');
  11.  
  12. $result = $query->execute();
  13.  
  14. // If there's at least one result.
  15. if (isset($result['node'])) {
  16. // Retrieve the list of node ids.
  17. $nids = array_keys($result['node']);
  18.  
  19. // Load the articles simultaneously (this will minimize the no. of db requests).
  20. $articles = entity_load('node', $nids);
  21.  
  22. // Utilize the end tags for simpler pattern.
  23. $regex = ',</script>|</embed>|</object>|</param>|</iframe>,';
  24.  
  25. // Traverse the entity objects.
  26. foreach($articles as $article) {
  27. // Check first if body has some value.
  28. if (isset($article->body['und'])) {
  29. $body = $article->body['und'][0]['value'];
  30.  
  31. $has_match = preg_match($regex, $body);
  32.  
  33. if($has_match) {
  34. $nid = $article->nid;
  35.  
  36. dpm($nid);
  37. }
  38. }
  39. }
  40. }