From ec2b1d21f4809863974c031177670fdfcdfd39a3 Mon Sep 17 00:00:00 2001 From: gbanyan Date: Wed, 11 Feb 2026 09:15:19 +0800 Subject: [PATCH] feat(cms): import legacy article documents into document library --- .../Commands/ImportArticleDocuments.php | 390 ++++++++++++++++++ .../Cms/ImportArticleDocumentsCommandTest.php | 130 ++++++ 2 files changed, 520 insertions(+) create mode 100644 app/Console/Commands/ImportArticleDocuments.php create mode 100644 tests/Feature/Cms/ImportArticleDocumentsCommandTest.php diff --git a/app/Console/Commands/ImportArticleDocuments.php b/app/Console/Commands/ImportArticleDocuments.php new file mode 100644 index 0000000..2e9f2b6 --- /dev/null +++ b/app/Console/Commands/ImportArticleDocuments.php @@ -0,0 +1,390 @@ +option('dry-run'); + $includeUnpublished = (bool) $this->option('include-unpublished'); + $markArchived = (bool) $this->option('mark-archived'); + $limit = max((int) $this->option('limit'), 0); + + $fallbackUser = User::find((int) $this->option('fallback-user-id')); + if (! $fallbackUser) { + $this->error('Fallback user not found.'); + + return self::FAILURE; + } + + $fallbackCategory = $this->resolveFallbackCategory($this->option('category-slug')); + if (! $fallbackCategory) { + $this->error('No document category found. Please create one or pass --category-slug.'); + + return self::FAILURE; + } + + $query = Article::query() + ->with(['attachments', 'categories', 'creator']) + ->where('content_type', Article::CONTENT_TYPE_DOCUMENT); + + if (! $includeUnpublished) { + $query->where('status', Article::STATUS_PUBLISHED); + } + + if ($limit > 0) { + $query->limit($limit); + } + + $articles = $query->orderBy('id')->get(); + + if ($articles->isEmpty()) { + $this->info('No article documents matched the import criteria.'); + + return self::SUCCESS; + } + + $this->info('Article document import started'); + $this->line('Matched articles: '.$articles->count()); + $this->line('Fallback category: '.$fallbackCategory->slug); + $this->line('Dry run: '.($dryRun ? 'yes' : 'no')); + $this->line('Mark source archived: '.($markArchived ? 'yes' : 'no')); + $this->newLine(); + + $imported = 0; + $skipped = 0; + $failed = 0; + $archivedSources = 0; + + foreach ($articles as $article) { + try { + $result = $this->importSingleArticle( + article: $article, + fallbackCategory: $fallbackCategory, + fallbackUser: $fallbackUser, + dryRun: $dryRun, + markArchived: $markArchived + ); + + if ($result === 'imported') { + $imported++; + } elseif ($result === 'archived_source') { + $imported++; + $archivedSources++; + } else { + $skipped++; + } + } catch (\Throwable $e) { + $failed++; + $this->error("✗ {$article->slug}: {$e->getMessage()}"); + } + } + + $this->newLine(); + $this->info('Import finished'); + $this->line("Imported: {$imported}"); + $this->line("Skipped: {$skipped}"); + $this->line("Failed: {$failed}"); + if ($markArchived) { + $this->line("Source articles archived: {$archivedSources}"); + } + + return $failed > 0 ? self::FAILURE : self::SUCCESS; + } + + private function importSingleArticle( + Article $article, + DocumentCategory $fallbackCategory, + User $fallbackUser, + bool $dryRun, + bool $markArchived + ): string { + $publicUuid = $this->deterministicPublicUuid($article); + $existing = Document::where('public_uuid', $publicUuid)->first(); + + if ($existing) { + $this->warn("↷ {$article->slug}: already imported as document #{$existing->id}, skipping."); + + return 'skipped'; + } + + $category = $this->resolveCategoryForArticle($article, $fallbackCategory); + $actor = $article->creator ?: $fallbackUser; + $accessLevel = $this->normalizeAccessLevel($article->access_level); + $documentStatus = $article->status === Article::STATUS_PUBLISHED ? 'active' : 'archived'; + $description = $this->resolveDescription($article); + + if ($dryRun) { + $this->line("• {$article->slug}: would import to category={$category->slug}, access={$accessLevel}, attachments={$article->attachments->count()}"); + + return 'skipped'; + } + + DB::transaction(function () use ( + $article, + $publicUuid, + $category, + $actor, + $accessLevel, + $documentStatus, + $description, + $markArchived + ): void { + $document = Document::create([ + 'document_category_id' => $category->id, + 'title' => $article->title, + 'document_number' => null, + 'description' => $description, + 'public_uuid' => $publicUuid, + 'access_level' => $accessLevel, + 'status' => $documentStatus, + 'created_by_user_id' => $actor->id, + 'last_updated_by_user_id' => $actor->id, + 'version_count' => 0, + 'archived_at' => $documentStatus === 'archived' ? now() : null, + ]); + + $versionIds = []; + $orderedAttachments = $article->attachments->sortBy([ + fn (ArticleAttachment $attachment) => $attachment->created_at?->timestamp ?? 0, + fn (ArticleAttachment $attachment) => $attachment->id, + ])->values(); + + foreach ($orderedAttachments as $attachment) { + $copied = $this->copyAttachmentToPrivate($article, $attachment); + if (! $copied) { + continue; + } + + $version = DocumentVersion::create([ + 'document_id' => $document->id, + 'version_number' => $this->versionNumberFromIndex(count($versionIds)), + 'version_notes' => $attachment->description ?: 'Imported from legacy article attachment', + 'is_current' => false, + 'file_path' => $copied['file_path'], + 'original_filename' => $copied['original_filename'], + 'mime_type' => $copied['mime_type'], + 'file_size' => $copied['file_size'], + 'file_hash' => $copied['file_hash'], + 'uploaded_by_user_id' => $actor->id, + 'uploaded_at' => $attachment->created_at ?? $article->updated_at ?? now(), + 'created_at' => $attachment->created_at ?? $article->updated_at ?? now(), + 'updated_at' => $attachment->updated_at ?? $attachment->created_at ?? now(), + ]); + + $versionIds[] = $version->id; + } + + if ($versionIds === []) { + $markdownVersion = $this->createMarkdownVersion($article, $document, $actor); + $versionIds[] = $markdownVersion->id; + } + + $currentVersionId = end($versionIds) ?: null; + if (! $currentVersionId) { + throw new \RuntimeException("Failed to create version for article {$article->slug}"); + } + + DocumentVersion::where('document_id', $document->id)->update(['is_current' => false]); + DocumentVersion::where('id', $currentVersionId)->update(['is_current' => true]); + + $document->forceFill([ + 'current_version_id' => $currentVersionId, + 'version_count' => count($versionIds), + 'created_at' => $article->published_at ?? $article->created_at ?? now(), + 'updated_at' => $article->updated_at ?? $article->published_at ?? now(), + ])->save(); + + if ($markArchived && $article->status !== Article::STATUS_ARCHIVED) { + $article->update([ + 'status' => Article::STATUS_ARCHIVED, + 'archived_at' => now(), + 'last_updated_by_user_id' => $actor->id, + ]); + } + + AuditLog::create([ + 'user_id' => $actor->id, + 'action' => 'article.document_imported', + 'description' => "Imported article document {$article->slug} -> document #{$document->id}", + 'auditable_type' => Document::class, + 'auditable_id' => $document->id, + 'metadata' => [ + 'source_article_id' => $article->id, + 'source_article_slug' => $article->slug, + 'imported_version_count' => count($versionIds), + ], + 'ip_address' => '127.0.0.1', + ]); + }); + + $this->info("✓ {$article->slug}: imported."); + + return $markArchived ? 'archived_source' : 'imported'; + } + + private function resolveFallbackCategory(?string $categorySlug): ?DocumentCategory + { + if ($categorySlug) { + return DocumentCategory::where('slug', $categorySlug)->first(); + } + + return DocumentCategory::where('slug', 'organization-public-disclosure')->first() + ?: DocumentCategory::orderBy('sort_order')->orderBy('id')->first(); + } + + private function resolveCategoryForArticle(Article $article, DocumentCategory $fallbackCategory): DocumentCategory + { + foreach ($article->categories as $category) { + $matched = DocumentCategory::where('slug', $category->slug)->first(); + if ($matched) { + return $matched; + } + } + + return $fallbackCategory; + } + + private function deterministicPublicUuid(Article $article): string + { + return Uuid::uuid5(Uuid::NAMESPACE_URL, "legacy-article-document:{$article->id}:{$article->slug}")->toString(); + } + + private function resolveDescription(Article $article): ?string + { + $description = $article->summary ?: $article->meta_description; + if ($description) { + return $description; + } + + $plain = trim(strip_tags($article->content)); + if ($plain === '') { + return null; + } + + return Str::limit($plain, 400); + } + + private function normalizeAccessLevel(?string $accessLevel): string + { + return in_array($accessLevel, ['public', 'members', 'admin', 'board'], true) + ? $accessLevel + : 'members'; + } + + private function versionNumberFromIndex(int $index): string + { + if ($index === 0) { + return '1.0'; + } + + return '1.'.$index; + } + + /** + * @return array{file_path:string, original_filename:string, mime_type:string, file_size:int, file_hash:string}|null + */ + private function copyAttachmentToPrivate(Article $article, ArticleAttachment $attachment): ?array + { + if (! Storage::disk('public')->exists($attachment->file_path)) { + $this->warn(" - {$article->slug}: attachment #{$attachment->id} file missing ({$attachment->file_path}), skipped."); + + return null; + } + + $bytes = Storage::disk('public')->get($attachment->file_path); + $extension = pathinfo($attachment->original_filename, PATHINFO_EXTENSION); + $safeName = $this->sanitizeFilename(pathinfo($attachment->original_filename, PATHINFO_FILENAME) ?: 'attachment'); + $targetPath = 'documents/imported/articles/'.$article->id.'/'.Str::uuid().'-'.$safeName.($extension ? '.'.$extension : ''); + + Storage::disk('private')->put($targetPath, $bytes); + + return [ + 'file_path' => $targetPath, + 'original_filename' => $attachment->original_filename, + 'mime_type' => $attachment->mime_type ?: 'application/octet-stream', + 'file_size' => strlen($bytes), + 'file_hash' => hash('sha256', $bytes), + ]; + } + + private function createMarkdownVersion(Article $article, Document $document, User $actor): DocumentVersion + { + $content = $this->buildMarkdownFromArticle($article); + $targetPath = 'documents/imported/articles/'.$article->id.'/'.Str::uuid().'-'.$article->slug.'.md'; + Storage::disk('private')->put($targetPath, $content); + + return DocumentVersion::create([ + 'document_id' => $document->id, + 'version_number' => '1.0', + 'version_notes' => 'Imported from legacy article content (no file attachments)', + 'is_current' => false, + 'file_path' => $targetPath, + 'original_filename' => $article->slug.'.md', + 'mime_type' => 'text/markdown', + 'file_size' => strlen($content), + 'file_hash' => hash('sha256', $content), + 'uploaded_by_user_id' => $actor->id, + 'uploaded_at' => $article->updated_at ?? $article->published_at ?? now(), + 'created_at' => $article->created_at ?? now(), + 'updated_at' => $article->updated_at ?? $article->created_at ?? now(), + ]); + } + + private function buildMarkdownFromArticle(Article $article): string + { + $sections = [ + '# '.$article->title, + '', + '- Source article slug: `'.$article->slug.'`', + '- Source article id: `'.$article->id.'`', + '- Imported at: `'.now()->toIso8601String().'`', + '', + ]; + + if ($article->summary) { + $sections[] = '## Summary'; + $sections[] = ''; + $sections[] = trim($article->summary); + $sections[] = ''; + } + + $sections[] = '## Content'; + $sections[] = ''; + $sections[] = trim($article->content); + $sections[] = ''; + + return implode("\n", $sections); + } + + private function sanitizeFilename(string $name): string + { + $sanitized = preg_replace('/[^A-Za-z0-9._-]+/', '-', $name) ?: 'file'; + + return trim($sanitized, '-'); + } +} diff --git a/tests/Feature/Cms/ImportArticleDocumentsCommandTest.php b/tests/Feature/Cms/ImportArticleDocumentsCommandTest.php new file mode 100644 index 0000000..c15a424 --- /dev/null +++ b/tests/Feature/Cms/ImportArticleDocumentsCommandTest.php @@ -0,0 +1,130 @@ +create(); + DocumentCategory::factory()->create([ + 'slug' => 'organization-public-disclosure', + 'name' => '組織公開資訊', + 'default_access_level' => 'public', + ]); + + $articleWithAttachment = Article::factory()->create([ + 'title' => '舊版章程 PDF', + 'slug' => 'legacy-charter', + 'content_type' => Article::CONTENT_TYPE_DOCUMENT, + 'status' => Article::STATUS_PUBLISHED, + 'access_level' => Article::ACCESS_LEVEL_PUBLIC, + 'summary' => '舊版章程摘要', + 'content' => '舊版章程內容', + 'created_by_user_id' => $user->id, + 'last_updated_by_user_id' => $user->id, + ]); + + Storage::disk('public')->put('articles/attachments/legacy-charter.pdf', '%PDF-1.4 test'); + ArticleAttachment::create([ + 'article_id' => $articleWithAttachment->id, + 'file_path' => 'articles/attachments/legacy-charter.pdf', + 'original_filename' => 'legacy-charter.pdf', + 'mime_type' => 'application/pdf', + 'file_size' => 13, + 'description' => '舊版附件', + ]); + + $articleWithoutAttachment = Article::factory()->create([ + 'title' => '純文字福利資源', + 'slug' => 'legacy-welfare-links', + 'content_type' => Article::CONTENT_TYPE_DOCUMENT, + 'status' => Article::STATUS_PUBLISHED, + 'access_level' => Article::ACCESS_LEVEL_PUBLIC, + 'summary' => '福利連結摘要', + 'content' => "第一行\n第二行", + 'created_by_user_id' => $user->id, + 'last_updated_by_user_id' => $user->id, + ]); + + $this->artisan('articles:import-documents', [ + '--fallback-user-id' => $user->id, + ])->assertExitCode(0); + + $this->assertDatabaseCount('documents', 2); + + $importedFromAttachment = Document::where('title', $articleWithAttachment->title)->firstOrFail(); + $this->assertSame('public', $importedFromAttachment->access_level); + $this->assertSame('active', $importedFromAttachment->status); + $this->assertSame(1, $importedFromAttachment->version_count); + $this->assertNotNull($importedFromAttachment->current_version_id); + $this->assertTrue($importedFromAttachment->currentVersion()->exists()); + $this->assertSame('legacy-charter.pdf', $importedFromAttachment->currentVersion->original_filename); + $this->assertTrue(Storage::disk('private')->exists($importedFromAttachment->currentVersion->file_path)); + + $importedFromMarkdown = Document::where('title', $articleWithoutAttachment->title)->firstOrFail(); + $this->assertSame(1, $importedFromMarkdown->version_count); + $this->assertNotNull($importedFromMarkdown->current_version_id); + $this->assertSame('text/markdown', $importedFromMarkdown->currentVersion->mime_type); + $this->assertSame('legacy-welfare-links.md', $importedFromMarkdown->currentVersion->original_filename); + $this->assertTrue(Storage::disk('private')->exists($importedFromMarkdown->currentVersion->file_path)); + + $markdown = Storage::disk('private')->get($importedFromMarkdown->currentVersion->file_path); + $this->assertStringContainsString('# 純文字福利資源', $markdown); + $this->assertStringContainsString('Source article slug: `legacy-welfare-links`', $markdown); + + // Idempotency check + $this->artisan('articles:import-documents', [ + '--fallback-user-id' => $user->id, + ])->assertExitCode(0); + + $this->assertDatabaseCount('documents', 2); + } + + public function test_it_can_archive_source_articles_after_import(): void + { + Storage::fake('public'); + Storage::fake('private'); + + $user = User::factory()->create(); + DocumentCategory::factory()->create([ + 'slug' => 'organization-public-disclosure', + 'name' => '組織公開資訊', + 'default_access_level' => 'public', + ]); + + $article = Article::factory()->create([ + 'title' => '待封存來源文章', + 'slug' => 'legacy-archive-me', + 'content_type' => Article::CONTENT_TYPE_DOCUMENT, + 'status' => Article::STATUS_PUBLISHED, + 'access_level' => Article::ACCESS_LEVEL_PUBLIC, + 'created_by_user_id' => $user->id, + 'last_updated_by_user_id' => $user->id, + ]); + + $this->artisan('articles:import-documents', [ + '--fallback-user-id' => $user->id, + '--mark-archived' => true, + ])->assertExitCode(0); + + $article->refresh(); + $this->assertSame(Article::STATUS_ARCHIVED, $article->status); + $this->assertNotNull($article->archived_at); + $this->assertDatabaseCount('documents', 1); + } +}