update
This commit is contained in:
396
t_global_post_temp1/new_extract-resources.js
Normal file
396
t_global_post_temp1/new_extract-resources.js
Normal file
@@ -0,0 +1,396 @@
|
||||
/**
|
||||
* 静态资源提取脚本
|
||||
* 从 header.html 和 footer.html 中提取 base64 图片、内联 CSS 和字体
|
||||
*
|
||||
* 功能:
|
||||
* 1. 提取 base64 图片到 assets/images/
|
||||
* 2. 提取所有内联 CSS 到 assets/css/
|
||||
* 3. 提取字体文件到 assets/fonts/
|
||||
* 4. 去除所有 meta 标签
|
||||
* 5. 去除所有 script 标签
|
||||
* 6. 只保留 body 内的内容
|
||||
* 7. 把引入的 style 放到顶部
|
||||
* 8. 给所有 HTML 属性值自动加上双引号
|
||||
*
|
||||
* 使用方法:
|
||||
* - node extract-resources.js # 正常运行,从备份恢复
|
||||
* - node extract-resources.js --keep # 保持当前文件,不从备份恢复
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// 检查命令行参数
|
||||
const KEEP_CURRENT = process.argv.includes('--keep');
|
||||
|
||||
const PUBLIC_DIR = path.join(__dirname, 'public/static');
|
||||
const static_DIR = path.join(__dirname, 'public/static/st');
|
||||
|
||||
const FILES_TO_PROCESS = ['footer.html'];
|
||||
// const FILES_TO_PROCESS = ['home.html', 'page2.html', 'page3.html', 'page4.html', 'page5.html'];
|
||||
|
||||
// 创建资源目录 (public)
|
||||
const ASSETS_DIR = path.join(static_DIR, 'assets');
|
||||
const IMG_DIR = path.join(ASSETS_DIR, 'images');
|
||||
const CSS_DIR = path.join(ASSETS_DIR, 'css');
|
||||
const FONTS_DIR = path.join(ASSETS_DIR, 'fonts');
|
||||
|
||||
// 清理旧资源文件的函数
|
||||
function cleanDirectory(dir) {
|
||||
if (fs.existsSync(dir)) {
|
||||
const files = fs.readdirSync(dir);
|
||||
files.forEach(file => {
|
||||
const filePath = path.join(dir, file);
|
||||
if (fs.statSync(filePath).isFile()) {
|
||||
fs.unlinkSync(filePath);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
[ASSETS_DIR, IMG_DIR, CSS_DIR, FONTS_DIR].forEach(dir => {
|
||||
if (!fs.existsSync(dir)) {
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
// 清理旧资源文件
|
||||
console.log('🧹 清理旧资源文件...');
|
||||
cleanDirectory(IMG_DIR);
|
||||
cleanDirectory(CSS_DIR);
|
||||
cleanDirectory(FONTS_DIR);
|
||||
|
||||
console.log('🚀 开始提取静态资源...\n');
|
||||
console.log(`📂 工作目录: ${PUBLIC_DIR}\n`);
|
||||
|
||||
FILES_TO_PROCESS.forEach(filename => {
|
||||
const filePath = path.join(PUBLIC_DIR, filename);
|
||||
const backupPath = filePath + '.backup';
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
console.log(`⚠️ 文件不存在: ${filename}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// 创建备份
|
||||
if (!fs.existsSync(backupPath)) {
|
||||
fs.copyFileSync(filePath, backupPath);
|
||||
console.log(`📄 处理文件: ${filename} (已创建备份)`);
|
||||
} else if (!KEEP_CURRENT) {
|
||||
// 如果备份存在且未指定 --keep,从备份恢复
|
||||
fs.copyFileSync(backupPath, filePath);
|
||||
console.log(`📄 处理文件: ${filename} (从备份恢复)`);
|
||||
} else {
|
||||
console.log(`📄 处理文件: ${filename} (保持当前版本)`);
|
||||
}
|
||||
|
||||
let content = fs.readFileSync(filePath, 'utf8');
|
||||
const originalSize = content.length;
|
||||
|
||||
let imageCount = 0;
|
||||
let cssCount = 0;
|
||||
let fontCount = 0;
|
||||
|
||||
// 1. 提取 base64 图片
|
||||
console.log(' 提取 base64 图片...');
|
||||
content = content.replace(/url\s*\(\s*["']?(data:image\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
|
||||
(match, dataUrl, imageType, base64Data) => {
|
||||
imageCount++;
|
||||
// 修复图片扩展名,处理 svg+xml 等情况
|
||||
let ext = imageType.split('/').pop();
|
||||
if (ext.includes('svg')) {
|
||||
ext = 'svg';
|
||||
} else if (ext.includes('+')) {
|
||||
ext = ext.split('+')[0];
|
||||
}
|
||||
const imageName = `${filename.replace('.html', '')}_img_${imageCount}.${ext}`;
|
||||
const imagePath = path.join(IMG_DIR, imageName);
|
||||
|
||||
try {
|
||||
const buffer = Buffer.from(base64Data, 'base64');
|
||||
fs.writeFileSync(imagePath, buffer);
|
||||
return `url("/static/st/assets/images/${imageName}")`;
|
||||
} catch (e) {
|
||||
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
|
||||
return match;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// 2. 提取 img src 中的 base64 (有引号的)
|
||||
content = content.replace(/<img([^>]*?)src\s*=\s*["'](data:image\/([^;]+);base64,([^"']+))["']([^>]*)>/gi,
|
||||
(match, beforeAttrs, dataUrl, imageType, base64Data, afterAttrs) => {
|
||||
imageCount++;
|
||||
// 修复图片扩展名
|
||||
let ext = imageType.split('/').pop();
|
||||
if (ext.includes('svg')) {
|
||||
ext = 'svg';
|
||||
} else if (ext.includes('+')) {
|
||||
ext = ext.split('+')[0];
|
||||
}
|
||||
const imageName = `${filename.replace('.html', '')}_inline_${imageCount}.${ext}`;
|
||||
const imagePath = path.join(IMG_DIR, imageName);
|
||||
|
||||
try {
|
||||
const buffer = Buffer.from(base64Data, 'base64');
|
||||
fs.writeFileSync(imagePath, buffer);
|
||||
// 确保属性间有正确的空格
|
||||
const before = beforeAttrs ? ' ' + beforeAttrs.trim() : '';
|
||||
const after = afterAttrs ? ' ' + afterAttrs.trim() : '';
|
||||
return `<img${before} src="/static/st/assets/images/${imageName}"${after}>`;
|
||||
} catch (e) {
|
||||
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
|
||||
return match;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// 3. 提取 img src 中的 base64 (没有引号的,直到遇到空白字符或>)
|
||||
content = content.replace(/<img([^>]*?)src\s*=\s*(data:image\/([^;\s>]+);base64,([^\s>]+))([^>]*)>/gi,
|
||||
(match, beforeAttrs, dataUrl, imageType, base64Data, afterAttrs) => {
|
||||
imageCount++;
|
||||
// 修复图片扩展名
|
||||
let ext = imageType.split('/').pop();
|
||||
if (ext.includes('svg')) {
|
||||
ext = 'svg';
|
||||
} else if (ext.includes('+')) {
|
||||
ext = ext.split('+')[0];
|
||||
}
|
||||
const imageName = `${filename.replace('.html', '')}_inline_${imageCount}.${ext}`;
|
||||
const imagePath = path.join(IMG_DIR, imageName);
|
||||
|
||||
try {
|
||||
const buffer = Buffer.from(base64Data, 'base64');
|
||||
fs.writeFileSync(imagePath, buffer);
|
||||
// 确保属性间有正确的空格
|
||||
const before = beforeAttrs ? ' ' + beforeAttrs.trim() : '';
|
||||
const after = afterAttrs ? ' ' + afterAttrs.trim() : '';
|
||||
return `<img${before} src="/static/st/assets/images/${imageName}"${after}>`;
|
||||
} catch (e) {
|
||||
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
|
||||
return match;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// 4. 提取 CSS 变量中的 base64
|
||||
content = content.replace(/--[^:]+:\s*url\s*\(\s*["']?(data:image\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
|
||||
(match, dataUrl, imageType, base64Data) => {
|
||||
imageCount++;
|
||||
// 修复图片扩展名
|
||||
let ext = imageType.split('/').pop();
|
||||
if (ext.includes('svg')) {
|
||||
ext = 'svg';
|
||||
} else if (ext.includes('+')) {
|
||||
ext = ext.split('+')[0];
|
||||
}
|
||||
const imageName = `${filename.replace('.html', '')}_var_${imageCount}.${ext}`;
|
||||
const imagePath = path.join(IMG_DIR, imageName);
|
||||
|
||||
try {
|
||||
const buffer = Buffer.from(base64Data, 'base64');
|
||||
fs.writeFileSync(imagePath, buffer);
|
||||
const varName = match.split(':')[0];
|
||||
return `${varName}: url("/static/st/assets/images/${imageName}")`;
|
||||
} catch (e) {
|
||||
console.log(` ⚠️ 无法保存图片 ${imageName}:`, e.message);
|
||||
return match;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// 5. 提取所有内联 CSS (style 标签)
|
||||
console.log(' 提取内联 CSS...');
|
||||
const cssLinks = []; // 用于收集所有 CSS 链接
|
||||
const styleMatches = content.match(/<style[^>]*>([\s\S]*?)<\/style>/gi);
|
||||
if (styleMatches && styleMatches.length > 0) {
|
||||
styleMatches.forEach((styleTag, index) => {
|
||||
const cssContent = styleTag.replace(/<\/?style[^>]*>/gi, '').trim();
|
||||
// 提取所有 CSS,不管大小
|
||||
if (cssContent.length > 0) {
|
||||
cssCount++;
|
||||
const cssName = `${filename.replace('.html', '')}_styles_${cssCount}.css`;
|
||||
const cssPath = path.join(CSS_DIR, cssName);
|
||||
fs.writeFileSync(cssPath, cssContent);
|
||||
|
||||
// 收集 CSS 链接,稍后会统一放到顶部
|
||||
cssLinks.push(`<link rel="stylesheet" href="/static/st/assets/css/${cssName}">`);
|
||||
|
||||
// 先删除原 style 标签
|
||||
content = content.replace(styleTag, '');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 6. 提取字体 (data:font)
|
||||
console.log(' 提取字体文件...');
|
||||
content = content.replace(/url\s*\(\s*["']?(data:font\/([^;]+);base64,([^"')]+))["']?\s*\)/gi,
|
||||
(match, dataUrl, fontType, base64Data) => {
|
||||
fontCount++;
|
||||
const fontExt = fontType.includes('woff2') ? 'woff2' :
|
||||
fontType.includes('woff') ? 'woff' :
|
||||
fontType.includes('ttf') ? 'ttf' : 'font';
|
||||
const fontName = `${filename.replace('.html', '')}_font_${fontCount}.${fontExt}`;
|
||||
const fontPath = path.join(FONTS_DIR, fontName);
|
||||
|
||||
try {
|
||||
const buffer = Buffer.from(base64Data, 'base64');
|
||||
fs.writeFileSync(fontPath, buffer);
|
||||
return `url("/static/st/assets/fonts/${fontName}")`;
|
||||
} catch (e) {
|
||||
console.log(` ⚠️ 无法保存字体 ${fontName}:`, e.message);
|
||||
return match;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// 7. 提取 woff2 字体 (特殊处理)
|
||||
content = content.replace(/url\s*\(\s*data:application\/font-woff2;charset=utf-8;base64,([^)]+)\)/gi,
|
||||
(match, base64Data) => {
|
||||
fontCount++;
|
||||
const fontName = `${filename.replace('.html', '')}_font_${fontCount}.woff2`;
|
||||
const fontPath = path.join(FONTS_DIR, fontName);
|
||||
|
||||
try {
|
||||
const buffer = Buffer.from(base64Data, 'base64');
|
||||
fs.writeFileSync(fontPath, buffer);
|
||||
return `url("/static/st/assets/fonts/${fontName}")`;
|
||||
} catch (e) {
|
||||
console.log(` ⚠️ 无法保存字体 ${fontName}:`, e.message);
|
||||
return match;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// 8. 去除 DOCTYPE 声明
|
||||
console.log(' 去除 DOCTYPE 和 HTML 注释...');
|
||||
let removedCount = 0;
|
||||
if (content.match(/<!DOCTYPE[^>]*>/i)) {
|
||||
content = content.replace(/<!DOCTYPE[^>]*>/gi, '');
|
||||
removedCount++;
|
||||
}
|
||||
|
||||
// 去除所有 HTML 注释(包括多行注释)
|
||||
const commentCount = (content.match(/<!--[\s\S]*?-->/g) || []).length;
|
||||
content = content.replace(/<!--[\s\S]*?-->/g, '');
|
||||
removedCount += commentCount;
|
||||
|
||||
if (removedCount > 0) {
|
||||
console.log(` - 已删除 DOCTYPE 和 ${commentCount} 个 HTML 注释`);
|
||||
}
|
||||
|
||||
// 9. 去除所有 meta、title 和 link 标签
|
||||
console.log(' 去除 meta、title 和 link 标签...');
|
||||
const metaCount = (content.match(/<meta[^>]*>/gi) || []).length;
|
||||
content = content.replace(/<meta[^>]*>/gi, '');
|
||||
|
||||
const titleCount = (content.match(/<title[^>]*>[\s\S]*?<\/title>/gi) || []).length;
|
||||
content = content.replace(/<title[^>]*>[\s\S]*?<\/title>/gi, '');
|
||||
|
||||
// 去除 link 标签(canonical、icon、preload 等,但不包括我们生成的 stylesheet)
|
||||
const linkCount = (content.match(/<link(?![^>]*rel=["']stylesheet["'])[^>]*>/gi) || []).length;
|
||||
content = content.replace(/<link(?![^>]*rel=["']stylesheet["'])[^>]*>/gi, '');
|
||||
|
||||
if (metaCount > 0 || titleCount > 0 || linkCount > 0) {
|
||||
console.log(` - 已删除 ${metaCount} 个 meta、${titleCount} 个 title 和 ${linkCount} 个 link 标签`);
|
||||
}
|
||||
|
||||
// 10. 去除所有 script 标签(包括内联和外部脚本)
|
||||
console.log(' 去除 script 标签...');
|
||||
const scriptCount = (content.match(/<script[^>]*>[\s\S]*?<\/script>/gi) || []).length;
|
||||
content = content.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
||||
if (scriptCount > 0) {
|
||||
console.log(` - 已删除 ${scriptCount} 个 script 标签`);
|
||||
}
|
||||
|
||||
// 11. 提取 body 内容并重组 HTML
|
||||
console.log(' 重组 HTML 结构...');
|
||||
const bodyMatch = content.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
||||
if (bodyMatch) {
|
||||
const bodyContent = bodyMatch[1];
|
||||
|
||||
// 重新构建 HTML:CSS 链接 + body 内容
|
||||
let newContent = '';
|
||||
|
||||
// 将所有 CSS 链接放到顶部
|
||||
if (cssLinks.length > 0) {
|
||||
newContent = cssLinks.join('\n') + '\n\n';
|
||||
}
|
||||
|
||||
// 添加 body 内容
|
||||
newContent += bodyContent;
|
||||
|
||||
content = newContent;
|
||||
console.log(` - 已提取 body 内容并移除其他标签`);
|
||||
} else {
|
||||
// 如果找不到 body 标签,尝试去除 html、head、body 等标签
|
||||
content = content.replace(/<\/?html[^>]*>/gi, '');
|
||||
content = content.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '');
|
||||
content = content.replace(/<\/?body[^>]*>/gi, '');
|
||||
|
||||
// 将 CSS 链接放到最前面
|
||||
if (cssLinks.length > 0) {
|
||||
content = cssLinks.join('\n') + '\n\n' + content;
|
||||
}
|
||||
console.log(` - 已去除 HTML 结构标签`);
|
||||
}
|
||||
|
||||
// 清理多余的空行
|
||||
content = content.replace(/\n\s*\n\s*\n/g, '\n\n');
|
||||
|
||||
// 12. 给所有HTML属性值加上引号
|
||||
console.log(' 给HTML属性加上引号...');
|
||||
// 匹配没有引号的属性值:属性名=值(值不以引号开头,且到空格或>结束)
|
||||
// 排除已经有引号的属性
|
||||
content = content.replace(/(\s+[\w\-:]+)=([^"'\s>][^\s>]*)/g, (match, attrName, attrValue) => {
|
||||
// 如果属性值为空或只是一个标志,保持原样
|
||||
if (!attrValue || attrValue === '') {
|
||||
return match;
|
||||
}
|
||||
// 给属性值加上双引号
|
||||
return `${attrName}="${attrValue}"`;
|
||||
});
|
||||
|
||||
// 保存修改后的文件
|
||||
const outputPath = path.join(PUBLIC_DIR, filename);
|
||||
fs.writeFileSync(outputPath, content);
|
||||
|
||||
const newSize = content.length;
|
||||
const reduction = ((originalSize - newSize) / originalSize * 100).toFixed(1);
|
||||
|
||||
console.log(` ✅ 完成:`);
|
||||
console.log(` - 提取图片: ${imageCount} 个`);
|
||||
console.log(` - 提取 CSS: ${cssCount} 个`);
|
||||
console.log(` - 提取字体: ${fontCount} 个`);
|
||||
console.log(` - 删除 meta: ${metaCount} 个`);
|
||||
console.log(` - 删除 title: ${titleCount} 个`);
|
||||
console.log(` - 删除 link: ${linkCount} 个`);
|
||||
console.log(` - 删除 script: ${scriptCount} 个`);
|
||||
console.log(` - 原始大小: ${(originalSize / 1024).toFixed(2)} KB`);
|
||||
console.log(` - 新大小: ${(newSize / 1024).toFixed(2)} KB`);
|
||||
console.log(` - 减少: ${reduction}%`);
|
||||
|
||||
// 验证生成的文件
|
||||
if (imageCount > 0 || cssCount > 0 || fontCount > 0) {
|
||||
console.log(` ℹ️ 提示: 请确保资源路径 /static/st/assets/ 在服务器上可访问\n`);
|
||||
} else {
|
||||
console.log(` ℹ️ 未找到可提取的资源\n`);
|
||||
}
|
||||
});
|
||||
|
||||
console.log('✅ 资源提取完成!');
|
||||
console.log('\n📁 资源文件位置:');
|
||||
console.log(` - 图片: ${IMG_DIR}`);
|
||||
console.log(` - CSS: ${CSS_DIR}`);
|
||||
console.log(` - 字体: ${FONTS_DIR}`);
|
||||
console.log('\n✨ 自动优化:');
|
||||
console.log(' ✅ DOCTYPE 和 HTML 注释已删除');
|
||||
console.log(' ✅ 所有 meta、title 和 link 标签已删除(保留生成的 stylesheet)');
|
||||
console.log(' ✅ 所有 script 标签已删除');
|
||||
console.log(' ✅ 所有 style 已提取到 CSS 文件');
|
||||
console.log(' ✅ 只保留 body 内的内容');
|
||||
console.log(' ✅ CSS 引用已放到顶部');
|
||||
console.log(' ✅ 图片扩展名已修复(svg+xml → svg)');
|
||||
console.log(' ✅ 使用绝对路径(/static/st/assets/)');
|
||||
console.log(' ✅ HTML 属性值已自动加上双引号');
|
||||
console.log('\n💡 提示: 刷新浏览器测试页面,所有资源应该正常加载');
|
||||
Reference in New Issue
Block a user