/**
* 静态资源提取脚本
* 从 header.html 和 footer.html 中提取 base64 图片、内联 CSS 和字体
*
* 功能:
* 1. 提取 base64 图片到 assets/images/
* 2. 提取所有内联 CSS 到 assets/css/
* 3. 提取字体文件到 assets/fonts/
* 4. 去除所有 meta 标签
* 5. 去除所有 script 标签
* 6. 只保留 body 内的内容
* 7. 把引入的 style 放到顶部
* 8. 给所有 HTML 属性值自动加上双引号
*
* 使用方法:
* - node extract-resources.js # 正常运行,从备份恢复
* - node extract-resources.js --keep # 保持当前文件,不从备份恢复
*/
const fs = require('fs');
const path = require('path');
// 检查命令行参数
const KEEP_CURRENT = process.argv.includes('--keep');
const PUBLIC_DIR = path.join(__dirname, 'public/ww_gb_post_temp1');
const STATIC_DIR = path.join(__dirname, 'public/ww_gb_post_temp1/st');
const FILES_TO_PROCESS = ['footer.html'];
// const FILES_TO_PROCESS = ['home.html', 'page2.html', 'page3.html', 'page4.html', 'page5.html'];
// 创建资源目录 (public)
const ASSETS_DIR = path.join(STATIC_DIR, 'assets');
const IMG_DIR = path.join(ASSETS_DIR, 'images');
const CSS_DIR = path.join(ASSETS_DIR, 'css');
const FONTS_DIR = path.join(ASSETS_DIR, 'fonts');
// 清理旧资源文件的函数
function cleanDirectory(dir) {
if (fs.existsSync(dir)) {
const files = fs.readdirSync(dir);
files.forEach(file => {
const filePath = path.join(dir, file);
if (fs.statSync(filePath).isFile()) {
fs.unlinkSync(filePath);
}
});
}
}
[ASSETS_DIR, IMG_DIR, CSS_DIR, FONTS_DIR].forEach(dir => {
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
});
// 清理旧资源文件
console.log('🧹 清理旧资源文件...');
cleanDirectory(IMG_DIR);
cleanDirectory(CSS_DIR);
cleanDirectory(FONTS_DIR);
console.log('🚀 开始提取静态资源...\n');
console.log(`📂 工作目录: ${PUBLIC_DIR}\n`);
FILES_TO_PROCESS.forEach(filename => {
const filePath = path.join(PUBLIC_DIR, filename);
const backupPath = filePath + '.backup';
if (!fs.existsSync(filePath)) {
console.log(`⚠️ 文件不存在: ${filename}`);
return;
}
// 创建备份
if (!fs.existsSync(backupPath)) {
fs.copyFileSync(filePath, backupPath);
console.log(`📄 处理文件: ${filename} (已创建备份)`);
} else if (!KEEP_CURRENT) {
// 如果备份存在且未指定 --keep,从备份恢复
fs.copyFileSync(backupPath, filePath);
console.log(`📄 处理文件: ${filename} (从备份恢复)`);
} else {
console.log(`📄 处理文件: ${filename} (保持当前版本)`);
}
let content = fs.readFileSync(filePath, 'utf8');
const originalSize = content.length;
let imageCount = 0;
let cssCount = 0;
let fontCount = 0;
// 1. 提取 base64 图片
console.log(' 提取 base64 图片...');
content = content.replace(/url\s*\(\s*["']?(data:image\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
(match, dataUrl, imageType, base64Data) => {
imageCount++;
// 修复图片扩展名,处理 svg+xml 等情况
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_img_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
return `url("/ww_gb_post_temp1/st/assets/images/${imageName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 2. 提取 img src 中的 base64 (有引号的)
content = content.replace(/
]*?)src\s*=\s*["'](data:image\/([^;]+);base64,([^"']+))["']([^>]*)>/gi,
(match, beforeAttrs, dataUrl, imageType, base64Data, afterAttrs) => {
imageCount++;
// 修复图片扩展名
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_inline_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
// 确保属性间有正确的空格
const before = beforeAttrs ? ' ' + beforeAttrs.trim() : '';
const after = afterAttrs ? ' ' + afterAttrs.trim() : '';
return `
`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 3. 提取 img src 中的 base64 (没有引号的,直到遇到空白字符或>)
content = content.replace(/
]*?)src\s*=\s*(data:image\/([^;\s>]+);base64,([^\s>]+))([^>]*)>/gi,
(match, beforeAttrs, dataUrl, imageType, base64Data, afterAttrs) => {
imageCount++;
// 修复图片扩展名
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_inline_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
// 确保属性间有正确的空格
const before = beforeAttrs ? ' ' + beforeAttrs.trim() : '';
const after = afterAttrs ? ' ' + afterAttrs.trim() : '';
return `
`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 4. 提取 CSS 变量中的 base64
content = content.replace(/--[^:]+:\s*url\s*\(\s*["']?(data:image\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
(match, dataUrl, imageType, base64Data) => {
imageCount++;
// 修复图片扩展名
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_var_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
const varName = match.split(':')[0];
return `${varName}: url("/ww_gb_post_temp1/st/assets/images/${imageName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 5. 提取所有内联 CSS (style 标签)
console.log(' 提取内联 CSS...');
const cssLinks = []; // 用于收集所有 CSS 链接
const styleMatches = content.match(/