Skip to content
This repository was archived by the owner on May 30, 2025. It is now read-only.

[Blueprints] Import WXRs via the DataLiberation importer (first stab) #25

Open
wants to merge 6 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bin/build-libraries-phar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ cd $PROJECT_DIR
mkdir -p $BUILD_DIR
rm $DIST_DIR/wordpress-libraries.* > /dev/null 2>&1 || true
export BOX_BASE_PATH=$(type -a box | grep -v 'alias' | awk '{print $3}')
php $BUILD_DIR/box.php compile -d $PROJECT_DIR -c $PROJECT_DIR/phar-box.json
php $BUILD_DIR/box.php compile -d $PROJECT_DIR -c $PROJECT_DIR/phar-libraries.json
php -d 'phar.readonly=0' $BUILD_DIR/truncate-composer-checks.php $DIST_DIR/wordpress-libraries.phar
cd $DIST_DIR
php $BUILD_DIR/smoke-test.php
Expand Down
6 changes: 4 additions & 2 deletions bin/build-phar/smoke-test.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php

require_once __DIR__ . '/../../dist/wordpress-libraries.phar';
require_once __DIR__ . '/../../dist/php-toolkit.phar';

/**
* None of this will actually try to parse a file or import
Expand All @@ -9,7 +9,9 @@
*/
$c = WordPress\DataLiberation\Importer\StreamImporter::create_for_wxr_file(__DIR__ . '/nosuchfile.xml', [
'uploads_path' => __DIR__ . '/uploads',
'new_site_url' => 'https://smoke-test.org'
'new_site_url' => 'https://smoke-test.org',
'new_site_content_root_url' => 'https://smoke-test.org',
'new_media_root_url' => 'https://smoke-test.org',
]);

WordPress\DataLiberation\URL\WPURL::parse('https://example.com');
Expand Down
22 changes: 19 additions & 3 deletions components/Blueprints/Runner.php
Original file line number Diff line number Diff line change
Expand Up @@ -618,10 +618,26 @@ private function createExecutionPlan(): array {
}

foreach ( $plan as $step ) {
// @TODO: Make sure this doesn't get included twice in the execution plan.
// @TODO: Make sure this doesn't get included twice in the execution plan,
// e.g. if the Blueprint specified this step manually.
if ( $step instanceof ImportContentStep ) {
array_unshift( $plan, $this->createStepObject( 'installPlugin', [
'source' => $this->createDataReference( 'https://playground.wordpress.net/wordpress-importer.zip' ),
if($this->configuration->isRunningAsPhar()) {
throw new InvalidArgumentException( '@TODO: Importing content is not supported when running as phar.' );
} else {
$libraries_phar_path = __DIR__ . '/../../dist/php-toolkit.phar';
if(!file_exists($libraries_phar_path)) {
throw new InvalidArgumentException(
'In development, you must run `bash bin/build-libraries-phar.sh` to bundle importer libraries before importing content via a Blueprint. '.
'It generates a `dist/php-toolkit.phar` file bundling all the libraries required for importing content.'
);
}
$this->configuration->getLogger()->info( 'Loading importer libraries from ' . $libraries_phar_path );
$source = $this->createDataReference( new InlineFile( 'php-toolkit.phar', file_get_contents( $libraries_phar_path ) ) );
}
array_unshift( $plan, $this->createStepObject( 'writeFiles', [
'files' => [
'php-toolkit.phar' => $source,
],
] ) );
break;
}
Expand Down
4 changes: 4 additions & 0 deletions components/Blueprints/RunnerConfiguration.php
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,8 @@ public function isAllowedLocalFilesystemAccess(): bool {
public static function getPermissionCliFlag( string $permission ): string {
return $permission;
}

public function isRunningAsPhar(): bool {
return \Phar::running(false) !== '';
}
}
93 changes: 47 additions & 46 deletions components/Blueprints/Steps/ImportContentStep.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public function run( Runtime $runtime, Tracker $progress ) {
$this->importWxr( $runtime, $content_definition );
} elseif ( $content_definition['type'] === 'posts' ) {
$progress[ $i ]->setCaption( 'Importing a post ' );
$this->importPosts( $runtime, $content_definition );
$this->importPosts( $runtime, $content_definition['source'] );
} else {
throw new RuntimeException( 'Unsupported content type: ' . $content_definition['type'] );
}
Expand All @@ -64,74 +64,75 @@ private function importWxr( Runtime $runtime, array $content_definition ): void
) );
}

// @TODO: Pass the data reference to the import script to enable streaming.
$wxrPath = $runtime->saveToTemporaryFile( $resolved );

// @TODO: Make it work when Blueprints are running as phar archive
$import_script_path = __DIR__ . '/scripts/import-content.php';
if ( ! file_exists( $import_script_path ) ) {
throw new BlueprintExecutionException( sprintf(
'Import script %s does not exist.',
$import_script_path
) );
}

$importer_script = file_get_contents( $import_script_path );
$runtime->evalPhpCodeInSubProcess(
<<<'PHP'
<?php
require_once getenv('DOCROOT') . '/wp-load.php';
require_once getenv('DOCROOT') . '/wp-admin/includes/admin.php';

kses_remove_filters();
$admin_id = get_users(array('role' => 'Administrator') )[0]->ID;
wp_set_current_user( $admin_id );

wp_set_current_user( $admin_id );
$importer = new WXR_Importer( array(
'fetch_attachments' => true,
// @TODO: Support custom author
'default_author' => $admin_id
) );
$logger = new WP_Importer_Logger_CLI();
$importer->set_logger( $logger );
// Slashes from the imported content are lost if we don't call wp_slash here.
add_action( 'wp_insert_post_data', function( $data ) {
return wp_slash($data);
});

// Ensure that Site Editor templates are associated with the correct taxonomy.
add_filter( 'wp_import_post_terms', function ( $terms, $post_id ) {
foreach ( $terms as $post_term ) {
if ( 'wp_theme' !== $term['taxonomy'] ) {continue;}
$post_term = get_term_by('slug', $term['slug'], $term['taxonomy'] );
if ( ! $post_term ) {
$post_term = wp_insert_term(
$term['slug'],
$term['taxonomy']
);
$term_id = $post_term['term_id'];
} else {
$term_id = $post_term->term_id;
}
wp_set_object_terms( $post_id, $term_id, $term['taxonomy']) ;
}
return $terms;
}, 10, 2 );
$result = $importer->import( getenv('WXR_PATH') );
// @TODO: Just call a function here, do not go through CLI arguments.
// @TODO: Establish a communication channel between the main process and the subprocess
// to report progress and errors.
// @TODO: Enforce chrooting of the imported static files.
$_SERVER['argv'] = [
'import-wxr.php',
'wxr',
getenv('WXR_PATH'),
// @TODO: Support arbitrary media URLs to enable fetching assets during import.
// '--media-url',
// 'https://pd.w.org/'
];
?>
PHP
. $importer_script
,
[
'WXR_PATH' => $wxrPath,
]
);
}

private function importPosts( Runtime $runtime, array $content_definition ): void {
$posts = $content_definition['source'];
if ( ! is_array( $posts ) ) {
throw new RuntimeException( 'Invalid posts data.' );
private function importPosts( Runtime $runtime, $post ): void {
// @TODO: Use the Data Liberation importer here.
$resolved = $runtime->resolve( $post );
if ( ! $resolved instanceof File ) {
throw new BlueprintExecutionException( sprintf(
'Imported content reference must be a file, but %s was a Directory.',
$post->get_human_readable_name()
) );
}

$runtime->evalPhpCodeInSubProcess(
<<<'PHP'
<?php
require_once getenv('DOCROOT') . '/wp-load.php';
foreach (json_decode(getenv('POSTS'), true) as $post) {
wp_insert_post(wp_slash($post));
$result = wp_insert_post(wp_slash($post));
if (is_wp_error($result)) {
throw new Exception( $result->get_error_message() );
}
}
PHP
,
[
'POSTS' => json_encode( $posts ),
'POSTS' => json_encode( [
[
'post_title' => 'Test Post',
'post_content' => $resolved->getStream()->consume_all(),
'post_status' => 'publish',
'post_type' => 'post',
],
] ),
]
);
}
Expand Down
Loading
Loading