Wordpress migrate: Fix (image) links after import

Public

After you have successfully imported a site (blog) from wordpress.com with the wordpress_migrate.module, you may need to face one problem: The (image) links in the imported contents are points to the original Wordpress.com site. You can not download images directly from Wordpress.com, so here is a little help for resolve this issue with a simple hook_update_N().
(Let's suppose that the imported content is in the Article CT.)

Get raw version
php
  1. <?php
  2.  
  3. /**
  4.  * Download the images from Wordpress.com and replace the urls in the body.
  5.  */
  6. function YOUR_MODULE_update_7000(&$sandbox) {
  7. // If this is the first pass through of this update function then
  8. // set some variables.
  9. if (!isset($sandbox['total'])) {
  10. $sandbox['nids'] = db_select('node', 'n')
  11. ->fields('n', ['nid'])
  12. ->condition('n.type', 'article')
  13. ->execute()
  14. ->fetchCol();
  15. $sandbox['total'] = count($sandbox['nids']);
  16. $sandbox['current'] = 0;
  17. $sandbox['regexp'] = '/((?:http[s]?):\/\/colearningbe.files.wordpress.com){1}([0-9a-zA-Z\/._-]+.(?:jpg|jpeg|png|gif))/';
  18. }
  19.  
  20. // Get the nodes to process during this pass.
  21. $nids = array_slice($sandbox['nids'], $sandbox['current'], 10);
  22.  
  23. foreach ($nids as $nid) {
  24. if ($node = node_load($nid)) {
  25. $node_wrapper = entity_metadata_wrapper('node', $node);
  26. $body = $node_wrapper->body->raw();
  27.  
  28. // Download all linked images from Wordpress blog.
  29. preg_match_all($sandbox['regexp'], $body['value'], $matches);
  30. try {
  31. foreach ($matches[0] as $id => $url) {
  32. $pathinfo = pathinfo($matches[2][$id]);
  33. $directory = file_stream_wrapper_uri_normalize('public://' . $pathinfo['dirname']);
  34. if (file_prepare_directory($directory, FILE_CREATE_DIRECTORY)) {
  35. $response = drupal_http_request($url);
  36. if ($response->code == 200 && !empty($response->data)) {
  37. $destination = $directory . '/' . $pathinfo['basename'];
  38. if (!file_unmanaged_save_data($response->data, $destination, FILE_EXISTS_REPLACE)) {
  39. throw new Exception(format_string('Image can not be saved from <a href="@url">this</a> url to the @path path!', [
  40. '@url' => $url,
  41. '@path' => $destination,
  42. ]));
  43. }
  44. watchdog('blog_image_import', 'Image successfully saved from <a href="@url">this</a> url to the @path path!', [
  45. '@url' => $url,
  46. '@path' => $destination,
  47. ], WATCHDOG_INFO);
  48. }
  49. else {
  50. throw new Exception(format_string('Image can not be imported from <a href="@url">this</a> url! Details: <pre>@details</pre>', [
  51. '@url' => $url,
  52. '@details' => print_r($response, TRUE),
  53. ]));
  54. }
  55. }
  56. else {
  57. throw new Exception(format_string('Directory can not be created: @directory', ['@directory' => $directory]));
  58. }
  59. }
  60.  
  61. // Replace the Wordpress image links with the relative local ones.
  62. $node_wrapper->body->value = preg_replace($sandbox['regexp'], '/' . variable_get('file_public_path', 'sites/default/files') . '$2', $body['value']);
  63. $node_wrapper->save();
  64. watchdog('blog_image_import', '<a href="@url">@title</a> node successfully updated!', [
  65. '@url' => url('/node/' . $node->nid),
  66. '@title' => $node->title,
  67. ], WATCHDOG_INFO);
  68. } catch (Exception $e) {
  69. watchdog_exception('blog_image_import', $e, '<a href="@url">@title</a> node can not be updated, because there was an error meanwhile the image import!', [
  70. '@url' => url('/node/' . $node->nid),
  71. '@title' => $node->title,
  72. ]);
  73. }
  74. }
  75. $sandbox['current']++;
  76. }
  77.  
  78. // If current == total then we finished.
  79. $sandbox['#finished'] = ($sandbox['current'] / $sandbox['total']);
  80.  
  81. if ($sandbox['#finished'] === 1) {
  82. drupal_set_message(t('We successfully updated @nodes nodes.', ['@nodes' => $sandbox['total']]));
  83. }
  84. }

Comments

Dezső BICZÓ's picture

You should drop an Exception again after the 73. line, if you would like to abort the update function on an error.