Find all nodes containing the specified HTML Tags on its body texts

Public

Our Use Case: for security/performance reasons, we want to limit our allowed tags that could be inserted via WYSIWYG and/or the tags allowed by the Drupal's Input Filters when rendering the content in node view. We want to check first if there are no drastic effect in our existing contents.

</> CopyGet raw version
php
  1. // Tip: You could easily run this code on http://www.yourdrupalsite.com/devel/php admin page.
  2. //
  3. // Create a new EFQ object.
  4. $query = new EntityFieldQuery();
  5.  
  6. // Select all article nodes.
  7. // Remove the 'bundle' condition if you want to crawl all content types.
  8. $query
  9. ->entityCondition('entity_type', 'node')
  10. ->entityCondition('bundle', 'article');
  11.  
  12. $result = $query->execute();
  13.  
  14. // If there's at least one result.
  15. if (isset($result['node'])) {
  16. // Retrieve the list of node ids.
  17. $nids = array_keys($result['node']);
  18.  
  19. // Load the articles simultaneously (this will minimize the no. of db requests).
  20. $articles = entity_load('node', $nids);
  21.  
  22. // Utilize the end tags for simpler pattern.
  23. $regex = ',</script>|</embed>|</object>|</param>|</iframe>,';
  24.  
  25. // Traverse the entity objects.
  26. foreach($articles as $article) {
  27. // Check first if body has some value.
  28. if (isset($article->body['und'])) {
  29. $body = $article->body['und'][0]['value'];
  30.  
  31. $has_match = preg_match($regex, $body);
  32.  
  33. if($has_match) {
  34. $nid = $article->nid;
  35.  
  36. dpm($nid);
  37. }
  38. }
  39. }
  40. }