This commit is contained in:
telangpu
2026-04-28 00:42:28 +08:00
parent 2fd1a741cf
commit cf55c2cad6
2522 changed files with 566733 additions and 13 deletions

View File

@@ -0,0 +1,396 @@
/**
* 静态资源提取脚本
* 从 header.html 和 footer.html 中提取 base64 图片、内联 CSS 和字体
*
* 功能:
* 1. 提取 base64 图片到 assets/images/
* 2. 提取所有内联 CSS 到 assets/css/
* 3. 提取字体文件到 assets/fonts/
* 4. 去除所有 meta 标签
* 5. 去除所有 script 标签
* 6. 只保留 body 内的内容
* 7. 把引入的 style 放到顶部
* 8. 给所有 HTML 属性值自动加上双引号
*
* 使用方法:
* - node extract-resources.js # 正常运行,从备份恢复
* - node extract-resources.js --keep # 保持当前文件,不从备份恢复
*/
const fs = require('fs');
const path = require('path');
// 检查命令行参数
const KEEP_CURRENT = process.argv.includes('--keep');
const PUBLIC_DIR = path.join(__dirname, 'public/ww_gb_post_temp1');
const STATIC_DIR = path.join(__dirname, 'public/ww_gb_post_temp1/st');
const FILES_TO_PROCESS = ['footer.html'];
// const FILES_TO_PROCESS = ['home.html', 'page2.html', 'page3.html', 'page4.html', 'page5.html'];
// 创建资源目录 (public)
const ASSETS_DIR = path.join(STATIC_DIR, 'assets');
const IMG_DIR = path.join(ASSETS_DIR, 'images');
const CSS_DIR = path.join(ASSETS_DIR, 'css');
const FONTS_DIR = path.join(ASSETS_DIR, 'fonts');
// 清理旧资源文件的函数
function cleanDirectory(dir) {
if (fs.existsSync(dir)) {
const files = fs.readdirSync(dir);
files.forEach(file => {
const filePath = path.join(dir, file);
if (fs.statSync(filePath).isFile()) {
fs.unlinkSync(filePath);
}
});
}
}
[ASSETS_DIR, IMG_DIR, CSS_DIR, FONTS_DIR].forEach(dir => {
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
});
// 清理旧资源文件
console.log('🧹 清理旧资源文件...');
cleanDirectory(IMG_DIR);
cleanDirectory(CSS_DIR);
cleanDirectory(FONTS_DIR);
console.log('🚀 开始提取静态资源...\n');
console.log(`📂 工作目录: ${PUBLIC_DIR}\n`);
FILES_TO_PROCESS.forEach(filename => {
const filePath = path.join(PUBLIC_DIR, filename);
const backupPath = filePath + '.backup';
if (!fs.existsSync(filePath)) {
console.log(`⚠️ 文件不存在: ${filename}`);
return;
}
// 创建备份
if (!fs.existsSync(backupPath)) {
fs.copyFileSync(filePath, backupPath);
console.log(`📄 处理文件: ${filename} (已创建备份)`);
} else if (!KEEP_CURRENT) {
// 如果备份存在且未指定 --keep从备份恢复
fs.copyFileSync(backupPath, filePath);
console.log(`📄 处理文件: ${filename} (从备份恢复)`);
} else {
console.log(`📄 处理文件: ${filename} (保持当前版本)`);
}
let content = fs.readFileSync(filePath, 'utf8');
const originalSize = content.length;
let imageCount = 0;
let cssCount = 0;
let fontCount = 0;
// 1. 提取 base64 图片
console.log(' 提取 base64 图片...');
content = content.replace(/url\s*\(\s*["']?(data:image\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
(match, dataUrl, imageType, base64Data) => {
imageCount++;
// 修复图片扩展名,处理 svg+xml 等情况
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_img_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
return `url("/ww_gb_post_temp1/st/assets/images/${imageName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 2. 提取 img src 中的 base64 (有引号的)
content = content.replace(/<img([^>]*?)src\s*=\s*["'](data:image\/([^;]+);base64,([^"']+))["']([^>]*)>/gi,
(match, beforeAttrs, dataUrl, imageType, base64Data, afterAttrs) => {
imageCount++;
// 修复图片扩展名
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_inline_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
// 确保属性间有正确的空格
const before = beforeAttrs ? ' ' + beforeAttrs.trim() : '';
const after = afterAttrs ? ' ' + afterAttrs.trim() : '';
return `<img${before} src="/ww_gb_post_temp1/st/assets/images/${imageName}"${after}>`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 3. 提取 img src 中的 base64 (没有引号的,直到遇到空白字符或>)
content = content.replace(/<img([^>]*?)src\s*=\s*(data:image\/([^;\s>]+);base64,([^\s>]+))([^>]*)>/gi,
(match, beforeAttrs, dataUrl, imageType, base64Data, afterAttrs) => {
imageCount++;
// 修复图片扩展名
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_inline_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
// 确保属性间有正确的空格
const before = beforeAttrs ? ' ' + beforeAttrs.trim() : '';
const after = afterAttrs ? ' ' + afterAttrs.trim() : '';
return `<img${before} src="/ww_gb_post_temp1/st/assets/images/${imageName}"${after}>`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 4. 提取 CSS 变量中的 base64
content = content.replace(/--[^:]+:\s*url\s*\(\s*["']?(data:image\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
(match, dataUrl, imageType, base64Data) => {
imageCount++;
// 修复图片扩展名
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_var_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
const varName = match.split(':')[0];
return `${varName}: url("/ww_gb_post_temp1/st/assets/images/${imageName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 5. 提取所有内联 CSS (style 标签)
console.log(' 提取内联 CSS...');
const cssLinks = []; // 用于收集所有 CSS 链接
const styleMatches = content.match(/<style[^>]*>([\s\S]*?)<\/style>/gi);
if (styleMatches && styleMatches.length > 0) {
styleMatches.forEach((styleTag, index) => {
const cssContent = styleTag.replace(/<\/?style[^>]*>/gi, '').trim();
// 提取所有 CSS不管大小
if (cssContent.length > 0) {
cssCount++;
const cssName = `${filename.replace('.html', '')}_styles_${cssCount}.css`;
const cssPath = path.join(CSS_DIR, cssName);
fs.writeFileSync(cssPath, cssContent);
// 收集 CSS 链接,稍后会统一放到顶部
cssLinks.push(`<link rel="stylesheet" href="/ww_gb_post_temp1/st/assets/css/${cssName}">`);
// 先删除原 style 标签
content = content.replace(styleTag, '');
}
});
}
// 6. 提取字体 (data:font)
console.log(' 提取字体文件...');
content = content.replace(/url\s*\(\s*["']?(data:font\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
(match, dataUrl, fontType, base64Data) => {
fontCount++;
const fontExt = fontType.includes('woff2') ? 'woff2' :
fontType.includes('woff') ? 'woff' :
fontType.includes('ttf') ? 'ttf' : 'font';
const fontName = `${filename.replace('.html', '')}_font_${fontCount}.${fontExt}`;
const fontPath = path.join(FONTS_DIR, fontName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(fontPath, buffer);
return `url("/ww_gb_post_temp1/st/assets/fonts/${fontName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存字体 ${fontName}:`, e.message);
return match;
}
}
);
// 7. 提取 woff2 字体 (特殊处理)
content = content.replace(/url\s*\(\s*data:application\/font-woff2;charset=utf-8;base64,([^)]+)\)/gi,
(match, base64Data) => {
fontCount++;
const fontName = `${filename.replace('.html', '')}_font_${fontCount}.woff2`;
const fontPath = path.join(FONTS_DIR, fontName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(fontPath, buffer);
return `url("/ww_gb_post_temp1/st/assets/fonts/${fontName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存字体 ${fontName}:`, e.message);
return match;
}
}
);
// 8. 去除 DOCTYPE 声明
console.log(' 去除 DOCTYPE 和 HTML 注释...');
let removedCount = 0;
if (content.match(/<!DOCTYPE[^>]*>/i)) {
content = content.replace(/<!DOCTYPE[^>]*>/gi, '');
removedCount++;
}
// 去除所有 HTML 注释(包括多行注释)
const commentCount = (content.match(/<!--[\s\S]*?-->/g) || []).length;
content = content.replace(/<!--[\s\S]*?-->/g, '');
removedCount += commentCount;
if (removedCount > 0) {
console.log(` - 已删除 DOCTYPE 和 ${commentCount} 个 HTML 注释`);
}
// 9. 去除所有 meta、title 和 link 标签
console.log(' 去除 meta、title 和 link 标签...');
const metaCount = (content.match(/<meta[^>]*>/gi) || []).length;
content = content.replace(/<meta[^>]*>/gi, '');
const titleCount = (content.match(/<title[^>]*>[\s\S]*?<\/title>/gi) || []).length;
content = content.replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '');
// 去除 link 标签canonical、icon、preload 等,但不包括我们生成的 stylesheet
const linkCount = (content.match(/<link(?![^>]*rel=["']stylesheet["'])[^>]*>/gi) || []).length;
content = content.replace(/<link(?![^>]*rel=["']stylesheet["'])[^>]*>/gi, '');
if (metaCount > 0 || titleCount > 0 || linkCount > 0) {
console.log(` - 已删除 ${metaCount} 个 meta、${titleCount} 个 title 和 ${linkCount} 个 link 标签`);
}
// 10. 去除所有 script 标签(包括内联和外部脚本)
console.log(' 去除 script 标签...');
const scriptCount = (content.match(/<script[^>]*>[\s\S]*?<\/script>/gi) || []).length;
content = content.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
if (scriptCount > 0) {
console.log(` - 已删除 ${scriptCount} 个 script 标签`);
}
// 11. 提取 body 内容并重组 HTML
console.log(' 重组 HTML 结构...');
const bodyMatch = content.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
if (bodyMatch) {
const bodyContent = bodyMatch[1];
// 重新构建 HTMLCSS 链接 + body 内容
let newContent = '';
// 将所有 CSS 链接放到顶部
if (cssLinks.length > 0) {
newContent = cssLinks.join('\n') + '\n\n';
}
// 添加 body 内容
newContent += bodyContent;
content = newContent;
console.log(` - 已提取 body 内容并移除其他标签`);
} else {
// 如果找不到 body 标签,尝试去除 html、head、body 等标签
content = content.replace(/<\/?html[^>]*>/gi, '');
content = content.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '');
content = content.replace(/<\/?body[^>]*>/gi, '');
// 将 CSS 链接放到最前面
if (cssLinks.length > 0) {
content = cssLinks.join('\n') + '\n\n' + content;
}
console.log(` - 已去除 HTML 结构标签`);
}
// 清理多余的空行
content = content.replace(/\n\s*\n\s*\n/g, '\n\n');
// 12. 给所有HTML属性值加上引号
console.log(' 给HTML属性加上引号...');
// 匹配没有引号的属性值:属性名=值(值不以引号开头,且到空格或>结束)
// 排除已经有引号的属性
content = content.replace(/(\s+[\w\-:]+)=([^"'\s>][^\s>]*)/g, (match, attrName, attrValue) => {
// 如果属性值为空或只是一个标志,保持原样
if (!attrValue || attrValue === '') {
return match;
}
// 给属性值加上双引号
return `${attrName}="${attrValue}"`;
});
// 保存修改后的文件
const outputPath = path.join(PUBLIC_DIR, filename);
fs.writeFileSync(outputPath, content);
const newSize = content.length;
const reduction = ((originalSize - newSize) / originalSize * 100).toFixed(1);
console.log(` ✅ 完成:`);
console.log(` - 提取图片: ${imageCount}`);
console.log(` - 提取 CSS: ${cssCount}`);
console.log(` - 提取字体: ${fontCount}`);
console.log(` - 删除 meta: ${metaCount}`);
console.log(` - 删除 title: ${titleCount}`);
console.log(` - 删除 link: ${linkCount}`);
console.log(` - 删除 script: ${scriptCount}`);
console.log(` - 原始大小: ${(originalSize / 1024).toFixed(2)} KB`);
console.log(` - 新大小: ${(newSize / 1024).toFixed(2)} KB`);
console.log(` - 减少: ${reduction}%`);
// 验证生成的文件
if (imageCount > 0 || cssCount > 0 || fontCount > 0) {
console.log(` 提示: 请确保资源路径 /ww_gb_post_temp1/st/assets/ 在服务器上可访问\n`);
} else {
console.log(` 未找到可提取的资源\n`);
}
});
console.log('✅ 资源提取完成!');
console.log('\n📁 资源文件位置:');
console.log(` - 图片: ${IMG_DIR}`);
console.log(` - CSS: ${CSS_DIR}`);
console.log(` - 字体: ${FONTS_DIR}`);
console.log('\n✨ 自动优化:');
console.log(' ✅ DOCTYPE 和 HTML 注释已删除');
console.log(' ✅ 所有 meta、title 和 link 标签已删除(保留生成的 stylesheet');
console.log(' ✅ 所有 script 标签已删除');
console.log(' ✅ 所有 style 已提取到 CSS 文件');
console.log(' ✅ 只保留 body 内的内容');
console.log(' ✅ CSS 引用已放到顶部');
console.log(' ✅ 图片扩展名已修复svg+xml → svg');
console.log(' ✅ 使用绝对路径(/ww_gb_post_temp1/st/assets/');
console.log(' ✅ HTML 属性值已自动加上双引号');
console.log('\n💡 提示: 刷新浏览器测试页面,所有资源应该正常加载');