Files
zy-client-a/a4_se_post_instabox/new_extract-resources.js
telangpu f421220d77 update
2026-05-07 23:00:28 +08:00

396 lines
15 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 静态资源提取脚本
* 从 header.html 和 footer.html 中提取 base64 图片、内联 CSS 和字体
*
* 功能:
* 1. 提取 base64 图片到 assets/images/
* 2. 提取所有内联 CSS 到 assets/css/
* 3. 提取字体文件到 assets/fonts/
* 4. 去除所有 meta 标签
* 5. 去除所有 script 标签
* 6. 只保留 body 内的内容
* 7. 把引入的 style 放到顶部
* 8. 给所有 HTML 属性值自动加上双引号
*
* 使用方法:
* - node extract-resources.js # 正常运行,从备份恢复
* - node extract-resources.js --keep # 保持当前文件,不从备份恢复
*/
const fs = require('fs');
const path = require('path');
// 检查命令行参数
const KEEP_CURRENT = process.argv.includes('--keep');
const PUBLIC_DIR = path.join(__dirname, 'public/Static_zy');
const STATIC_DIR = path.join(__dirname, 'public/Static_zy/st');
const FILES_TO_PROCESS = ['footer.html'];
// const FILES_TO_PROCESS = ['home.html', 'page2.html', 'page3.html', 'page4.html', 'page5.html'];
// 创建资源目录 (public)
const ASSETS_DIR = path.join(STATIC_DIR, 'assets');
const IMG_DIR = path.join(ASSETS_DIR, 'images');
const CSS_DIR = path.join(ASSETS_DIR, 'css');
const FONTS_DIR = path.join(ASSETS_DIR, 'fonts');
// 清理旧资源文件的函数
function cleanDirectory(dir) {
if (fs.existsSync(dir)) {
const files = fs.readdirSync(dir);
files.forEach(file => {
const filePath = path.join(dir, file);
if (fs.statSync(filePath).isFile()) {
fs.unlinkSync(filePath);
}
});
}
}
[ASSETS_DIR, IMG_DIR, CSS_DIR, FONTS_DIR].forEach(dir => {
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
});
// 清理旧资源文件
console.log('🧹 清理旧资源文件...');
cleanDirectory(IMG_DIR);
cleanDirectory(CSS_DIR);
cleanDirectory(FONTS_DIR);
console.log('🚀 开始提取静态资源...\n');
console.log(`📂 工作目录: ${PUBLIC_DIR}\n`);
FILES_TO_PROCESS.forEach(filename => {
const filePath = path.join(PUBLIC_DIR, filename);
const backupPath = filePath + '.backup';
if (!fs.existsSync(filePath)) {
console.log(`⚠️ 文件不存在: ${filename}`);
return;
}
// 创建备份
if (!fs.existsSync(backupPath)) {
fs.copyFileSync(filePath, backupPath);
console.log(`📄 处理文件: ${filename} (已创建备份)`);
} else if (!KEEP_CURRENT) {
// 如果备份存在且未指定 --keep从备份恢复
fs.copyFileSync(backupPath, filePath);
console.log(`📄 处理文件: ${filename} (从备份恢复)`);
} else {
console.log(`📄 处理文件: ${filename} (保持当前版本)`);
}
let content = fs.readFileSync(filePath, 'utf8');
const originalSize = content.length;
let imageCount = 0;
let cssCount = 0;
let fontCount = 0;
// 1. 提取 base64 图片
console.log(' 提取 base64 图片...');
content = content.replace(/url\s*\(\s*["']?(data:image\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
(match, dataUrl, imageType, base64Data) => {
imageCount++;
// 修复图片扩展名,处理 svg+xml 等情况
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_img_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
return `url("/Static_zy/st/assets/images/${imageName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 2. 提取 img src 中的 base64 (有引号的)
content = content.replace(/<img([^>]*?)src\s*=\s*["'](data:image\/([^;]+);base64,([^"']+))["']([^>]*)>/gi,
(match, beforeAttrs, dataUrl, imageType, base64Data, afterAttrs) => {
imageCount++;
// 修复图片扩展名
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_inline_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
// 确保属性间有正确的空格
const before = beforeAttrs ? ' ' + beforeAttrs.trim() : '';
const after = afterAttrs ? ' ' + afterAttrs.trim() : '';
return `<img${before} src="/Static_zy/st/assets/images/${imageName}"${after}>`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 3. 提取 img src 中的 base64 (没有引号的,直到遇到空白字符或>)
content = content.replace(/<img([^>]*?)src\s*=\s*(data:image\/([^;\s>]+);base64,([^\s>]+))([^>]*)>/gi,
(match, beforeAttrs, dataUrl, imageType, base64Data, afterAttrs) => {
imageCount++;
// 修复图片扩展名
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_inline_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
// 确保属性间有正确的空格
const before = beforeAttrs ? ' ' + beforeAttrs.trim() : '';
const after = afterAttrs ? ' ' + afterAttrs.trim() : '';
return `<img${before} src="/Static_zy/st/assets/images/${imageName}"${after}>`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 4. 提取 CSS 变量中的 base64
content = content.replace(/--[^:]+:\s*url\s*\(\s*["']?(data:image\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
(match, dataUrl, imageType, base64Data) => {
imageCount++;
// 修复图片扩展名
let ext = imageType.split('/').pop();
if (ext.includes('svg')) {
ext = 'svg';
} else if (ext.includes('+')) {
ext = ext.split('+')[0];
}
const imageName = `${filename.replace('.html', '')}_var_${imageCount}.${ext}`;
const imagePath = path.join(IMG_DIR, imageName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(imagePath, buffer);
const varName = match.split(':')[0];
return `${varName}: url("/Static_zy/st/assets/images/${imageName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
return match;
}
}
);
// 5. 提取所有内联 CSS (style 标签)
console.log(' 提取内联 CSS...');
const cssLinks = []; // 用于收集所有 CSS 链接
const styleMatches = content.match(/<style[^>]*>([\s\S]*?)<\/style>/gi);
if (styleMatches && styleMatches.length > 0) {
styleMatches.forEach((styleTag, index) => {
const cssContent = styleTag.replace(/<\/?style[^>]*>/gi, '').trim();
// 提取所有 CSS不管大小
if (cssContent.length > 0) {
cssCount++;
const cssName = `${filename.replace('.html', '')}_styles_${cssCount}.css`;
const cssPath = path.join(CSS_DIR, cssName);
fs.writeFileSync(cssPath, cssContent);
// 收集 CSS 链接,稍后会统一放到顶部
cssLinks.push(`<link rel="stylesheet" href="/Static_zy/st/assets/css/${cssName}">`);
// 先删除原 style 标签
content = content.replace(styleTag, '');
}
});
}
// 6. 提取字体 (data:font)
console.log(' 提取字体文件...');
content = content.replace(/url\s*\(\s*["']?(data:font\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
(match, dataUrl, fontType, base64Data) => {
fontCount++;
const fontExt = fontType.includes('woff2') ? 'woff2' :
fontType.includes('woff') ? 'woff' :
fontType.includes('ttf') ? 'ttf' : 'font';
const fontName = `${filename.replace('.html', '')}_font_${fontCount}.${fontExt}`;
const fontPath = path.join(FONTS_DIR, fontName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(fontPath, buffer);
return `url("/Static_zy/st/assets/fonts/${fontName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存字体 ${fontName}:`, e.message);
return match;
}
}
);
// 7. 提取 woff2 字体 (特殊处理)
content = content.replace(/url\s*\(\s*data:application\/font-woff2;charset=utf-8;base64,([^)]+)\)/gi,
(match, base64Data) => {
fontCount++;
const fontName = `${filename.replace('.html', '')}_font_${fontCount}.woff2`;
const fontPath = path.join(FONTS_DIR, fontName);
try {
const buffer = Buffer.from(base64Data, 'base64');
fs.writeFileSync(fontPath, buffer);
return `url("/Static_zy/st/assets/fonts/${fontName}")`;
} catch (e) {
console.log(` ⚠️ 无法保存字体 ${fontName}:`, e.message);
return match;
}
}
);
// 8. 去除 DOCTYPE 声明
console.log(' 去除 DOCTYPE 和 HTML 注释...');
let removedCount = 0;
if (content.match(/<!DOCTYPE[^>]*>/i)) {
content = content.replace(/<!DOCTYPE[^>]*>/gi, '');
removedCount++;
}
// 去除所有 HTML 注释(包括多行注释)
const commentCount = (content.match(/<!--[\s\S]*?-->/g) || []).length;
content = content.replace(/<!--[\s\S]*?-->/g, '');
removedCount += commentCount;
if (removedCount > 0) {
console.log(` - 已删除 DOCTYPE 和 ${commentCount} 个 HTML 注释`);
}
// 9. 去除所有 meta、title 和 link 标签
console.log(' 去除 meta、title 和 link 标签...');
const metaCount = (content.match(/<meta[^>]*>/gi) || []).length;
content = content.replace(/<meta[^>]*>/gi, '');
const titleCount = (content.match(/<title[^>]*>[\s\S]*?<\/title>/gi) || []).length;
content = content.replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '');
// 去除 link 标签canonical、icon、preload 等,但不包括我们生成的 stylesheet
const linkCount = (content.match(/<link(?![^>]*rel=["']stylesheet["'])[^>]*>/gi) || []).length;
content = content.replace(/<link(?![^>]*rel=["']stylesheet["'])[^>]*>/gi, '');
if (metaCount > 0 || titleCount > 0 || linkCount > 0) {
console.log(` - 已删除 ${metaCount} 个 meta、${titleCount} 个 title 和 ${linkCount} 个 link 标签`);
}
// 10. 去除所有 script 标签(包括内联和外部脚本)
console.log(' 去除 script 标签...');
const scriptCount = (content.match(/<script[^>]*>[\s\S]*?<\/script>/gi) || []).length;
content = content.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
if (scriptCount > 0) {
console.log(` - 已删除 ${scriptCount} 个 script 标签`);
}
// 11. 提取 body 内容并重组 HTML
console.log(' 重组 HTML 结构...');
const bodyMatch = content.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
if (bodyMatch) {
const bodyContent = bodyMatch[1];
// 重新构建 HTMLCSS 链接 + body 内容
let newContent = '';
// 将所有 CSS 链接放到顶部
if (cssLinks.length > 0) {
newContent = cssLinks.join('\n') + '\n\n';
}
// 添加 body 内容
newContent += bodyContent;
content = newContent;
console.log(` - 已提取 body 内容并移除其他标签`);
} else {
// 如果找不到 body 标签,尝试去除 html、head、body 等标签
content = content.replace(/<\/?html[^>]*>/gi, '');
content = content.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '');
content = content.replace(/<\/?body[^>]*>/gi, '');
// 将 CSS 链接放到最前面
if (cssLinks.length > 0) {
content = cssLinks.join('\n') + '\n\n' + content;
}
console.log(` - 已去除 HTML 结构标签`);
}
// 清理多余的空行
content = content.replace(/\n\s*\n\s*\n/g, '\n\n');
// 12. 给所有HTML属性值加上引号
console.log(' 给HTML属性加上引号...');
// 匹配没有引号的属性值:属性名=值(值不以引号开头,且到空格或>结束)
// 排除已经有引号的属性
content = content.replace(/(\s+[\w\-:]+)=([^"'\s>][^\s>]*)/g, (match, attrName, attrValue) => {
// 如果属性值为空或只是一个标志,保持原样
if (!attrValue || attrValue === '') {
return match;
}
// 给属性值加上双引号
return `${attrName}="${attrValue}"`;
});
// 保存修改后的文件
const outputPath = path.join(PUBLIC_DIR, filename);
fs.writeFileSync(outputPath, content);
const newSize = content.length;
const reduction = ((originalSize - newSize) / originalSize * 100).toFixed(1);
console.log(` ✅ 完成:`);
console.log(` - 提取图片: ${imageCount}`);
console.log(` - 提取 CSS: ${cssCount}`);
console.log(` - 提取字体: ${fontCount}`);
console.log(` - 删除 meta: ${metaCount}`);
console.log(` - 删除 title: ${titleCount}`);
console.log(` - 删除 link: ${linkCount}`);
console.log(` - 删除 script: ${scriptCount}`);
console.log(` - 原始大小: ${(originalSize / 1024).toFixed(2)} KB`);
console.log(` - 新大小: ${(newSize / 1024).toFixed(2)} KB`);
console.log(` - 减少: ${reduction}%`);
// 验证生成的文件
if (imageCount > 0 || cssCount > 0 || fontCount > 0) {
console.log(` 提示: 请确保资源路径 /Static_zy/st/assets/ 在服务器上可访问\n`);
} else {
console.log(` 未找到可提取的资源\n`);
}
});
console.log('✅ 资源提取完成!');
console.log('\n📁 资源文件位置:');
console.log(` - 图片: ${IMG_DIR}`);
console.log(` - CSS: ${CSS_DIR}`);
console.log(` - 字体: ${FONTS_DIR}`);
console.log('\n✨ 自动优化:');
console.log(' ✅ DOCTYPE 和 HTML 注释已删除');
console.log(' ✅ 所有 meta、title 和 link 标签已删除(保留生成的 stylesheet');
console.log(' ✅ 所有 script 标签已删除');
console.log(' ✅ 所有 style 已提取到 CSS 文件');
console.log(' ✅ 只保留 body 内的内容');
console.log(' ✅ CSS 引用已放到顶部');
console.log(' ✅ 图片扩展名已修复svg+xml → svg');
console.log(' ✅ 使用绝对路径(/Static_zy/st/assets/');
console.log(' ✅ HTML 属性值已自动加上双引号');
console.log('\n💡 提示: 刷新浏览器测试页面,所有资源应该正常加载');