如何使用ONLYOFFICE宏分析PDF表单
在当今快节奏的数字环境中,作家、编辑和内容创作者往往难以获取有关文档的深入见解。了解可读性、词频和结构平衡等指标有助于显著提升文档质量,但手动分析既费时又缺乏一致性。在本文中,我们将为您演示如何编写一个强大的 ONLYOFFICE 宏,自动分析文档内容并生成详尽的分析报告。
构建文档分析宏
让我们将宏分解成功能组件,并解释每个部分的工作原理。
设置主函数
宏的核心是 analyzeDocument() 函数,它负责协调整个分析过程:
function analyzeDocument() {
try {
// Get document and all text
var oDocument = Api.GetDocument();
var allText = "";
var paragraphs = oDocument.GetAllParagraphs();
// Check if document is empty
if (paragraphs.length === 0) {
console.log("Warning: Document is empty or no paragraphs found for analysis.");
return;
}
// Collect all text
paragraphs.forEach(function(paragraph) {
allText += paragraph.GetText() + " ";
});
// Perform analyses
var stats = calculateBasicStats(allText, paragraphs);
var advancedStats = calculateAdvancedStats(allText, stats);
var commonWords = findCommonWords(allText, 10);
// Create report
createAndAddReport(oDocument, stats, advancedStats, commonWords);
// Log success
console.log("Success: Document analysis completed. Report added to the end of the document.");
} catch (error) {
console.log("Error: " + error.message);
}
}
此函数首先收集文档中的所有文本,然后将其传递给专门的分析函数,最终创建报告。try-catch 代码块确保宏能够正常处理任何错误。
计算基本统计数据
calculateBasicStats() 函数处理文本以提取基本指标:
function calculateBasicStats(text, paragraphs) {
// Word count
var words = text.split(/\s+/).filter(function(word) {
return word.length > 0;
});
var wordCount = words.length;
// Sentence count
var sentences = text.split(/[.!?]+/).filter(function(sentence) {
return sentence.trim().length > 0;
});
var sentenceCount = sentences.length;
// Paragraph count
var paragraphCount = paragraphs.length;
// Character count
var charCountWithSpaces = text.length;
var charCountWithoutSpaces = text.replace(/\s+/g, "").length;
// Line count (approximate)
var lineCount = Math.ceil(charCountWithSpaces / 70);
return {
wordCount: wordCount,
sentenceCount: sentenceCount,
paragraphCount: paragraphCount,
charCountWithSpaces: charCountWithSpaces,
charCountWithoutSpaces: charCountWithoutSpaces,
lineCount: lineCount,
words: words,
sentences: sentences
};
}
此函数将文本拆分为单词和句子,统计段落数,并计算字符数和行数。
执行高级分析
为了获得更深入的洞察,calculateAdvancedStats() 函数可以计算更复杂的指标:
function calculateAdvancedStats(text, basicStats) {
// Average sentence length
var avgWordsPerSentence = basicStats.wordCount / Math.max(1, basicStats.sentenceCount);
// Average paragraph length
var avgWordsPerParagraph = basicStats.wordCount / Math.max(1, basicStats.paragraphCount);
// Average word length
var totalWordLength = basicStats.words.reduce(function(sum, word) {
return sum + word.length;
}, 0);
var avgWordLength = totalWordLength / Math.max(1, basicStats.wordCount);
// Readability score (simplified Flesch-Kincaid)
var readabilityScore = 206.835 - 1.015 * avgWordsPerSentence - 84.6 * (totalWordLength / basicStats.wordCount);
// Estimated reading time
var readingTimeMinutes = Math.ceil(basicStats.wordCount / 200);
return {
avgWordsPerSentence: avgWordsPerSentence,
avgWordsPerParagraph: avgWordsPerParagraph,
avgWordLength: avgWordLength,
readabilityScore: readabilityScore,
readingTimeMinutes: readingTimeMinutes
};
}
这会计算平均句子和段落长度、可读性得分以及预计阅读时间。
分析词频
findCommonWords() 函数可识别最常用的单词:
function findCommonWords(text, limit) {
// Clean text and convert to lowercase
var cleanText = text.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
// Split into words
var words = cleanText.split(/\s+/).filter(function(word) {
return word.length > 3;
});
// Calculate word frequencies
var wordFrequency = {};
words.forEach(function(word) {
wordFrequency[word] = (wordFrequency[word] || 0) + 1;
});
// Filter stop words
var stopWords = ["this", "that", "with", "from", "have", "been"];
stopWords.forEach(function(stopWord) {
delete wordFrequency[stopWord];
});
// Sort by frequency
var sortedWords = Object.keys(wordFrequency).sort(function(a, b) {
return wordFrequency[b] - wordFrequency[a];
});
// Return top N words
return sortedWords.slice(0, limit).map(function(word) {
return { word: word, frequency: wordFrequency[word] };
});
}
function findCommonWords(text, limit) {
// Clean text and convert to lowercase
var cleanText = text.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
// Split into words
var words = cleanText.split(/\s+/).filter(function(word) {
return word.length > 3;
});
// Calculate word frequencies
var wordFrequency = {};
words.forEach(function(word) {
wordFrequency[word] = (wordFrequency[word] || 0) + 1;
});
// Filter stop words
var stopWords = ["this", "that", "with", "from", "have", "been"];
stopWords.forEach(function(stopWord) {
delete wordFrequency[stopWord];
});
// Sort by frequency
var sortedWords = Object.keys(wordFrequency).sort(function(a, b) {
return wordFrequency[b] - wordFrequency[a];
});
// Return top N words
return sortedWords.slice(0, limit).map(function(word) {
return { word: word, frequency: wordFrequency[word] };
});
}
此函数会删除标点符号,过滤常用填充词,并返回文档中最常用的单词。
生成报告
最后,createAndAddReport() 函数会编译并格式化所有分析结果:
function createAndAddReport(oDocument, basicStats, advancedStats, commonWords) {
// Add new page
var oParagraph = Api.CreateParagraph();
oParagraph.AddPageBreak();
oDocument.AddElement(oDocument.GetElementsCount(), oParagraph);
// Add title
var oHeading = Api.CreateParagraph();
oHeading.AddText("DOCUMENT ANALYSIS REPORT");
oDocument.AddElement(oDocument.GetElementsCount(), oHeading);
// Add basic statistics section
var oSubHeading = Api.CreateParagraph();
oSubHeading.AddText("BASIC STATISTICS");
oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
// Add statistics content
// ... (code that adds individual statistics)
// Add advanced analysis section
// ... (code that adds advanced metrics)
// Add word frequency section
// ... (code that adds word frequency list)
// Add footer
var oFootnotePara = Api.CreateParagraph();
oFootnotePara.AddText("This report was generated by OnlyOffice Document Statistics and Analysis Tool on " +
new Date().toLocaleString() + ".");
oDocument.AddElement(oDocument.GetElementsCount(), oFootnotePara);
}
此功能在文档末尾创建一个包含所有分析结果的结构化报告。
完整的宏代码
以下是您可以复制并使用的完整宏代码:
(function() {
// Main function - starts all operations
function analyzeDocument() {
try {
// Get document and all text
var oDocument = Api.GetDocument();
var allText = "";
var paragraphs = oDocument.GetAllParagraphs();
// Check if document is empty
if (paragraphs.length === 0) {
console.log("Warning: Document is empty or no paragraphs found for analysis.");
return;
}
// Collect all text
paragraphs.forEach(function(paragraph) {
allText += paragraph.GetText() + " ";
});
// Calculate basic statistics
var stats = calculateBasicStats(allText, paragraphs);
// Perform advanced analysis
var advancedStats = calculateAdvancedStats(allText, stats);
// Find most common words
var commonWords = findCommonWords(allText, 10);
// Create and add report to the document
createAndAddReport(oDocument, stats, advancedStats, commonWords);
// Inform user
console.log("Success: Document analysis completed. Report added to the end of the document.");
} catch (error) {
console.log("Error: An error occurred during processing: " + error.message);
}
}
// Calculate basic statistics
function calculateBasicStats(text, paragraphs) {
// Word count
var words = text.split(/\s+/).filter(function(word) {
return word.length > 0;
});
var wordCount = words.length;
// Sentence count
var sentences = text.split(/[.!?]+/).filter(function(sentence) {
return sentence.trim().length > 0;
});
var sentenceCount = sentences.length;
// Paragraph count
var paragraphCount = paragraphs.length;
// Character count (with and without spaces)
var charCountWithSpaces = text.length;
var charCountWithoutSpaces = text.replace(/\s+/g, "").length;
// Line count (approximate)
var lineCount = Math.ceil(charCountWithSpaces / 70); // Approximately 70 characters/line
return {
wordCount: wordCount,
sentenceCount: sentenceCount,
paragraphCount: paragraphCount,
charCountWithSpaces: charCountWithSpaces,
charCountWithoutSpaces: charCountWithoutSpaces,
lineCount: lineCount,
words: words,
sentences: sentences
};
}
// Calculate advanced statistics
function calculateAdvancedStats(text, basicStats) {
// Average sentence length (in words)
var avgWordsPerSentence = basicStats.wordCount / Math.max(1, basicStats.sentenceCount);
// Average paragraph length (in words)
var avgWordsPerParagraph = basicStats.wordCount / Math.max(1, basicStats.paragraphCount);
// Average word length (in characters)
var totalWordLength = basicStats.words.reduce(function(sum, word) {
return sum + word.length;
}, 0);
var avgWordLength = totalWordLength / Math.max(1, basicStats.wordCount);
// Readability score (simplified Flesch-Kincaid)
var readabilityScore = 206.835 - 1.015 * (basicStats.wordCount / Math.max(1, basicStats.sentenceCount)) - 84.6 * (totalWordLength / Math.max(1, basicStats.wordCount));
// Estimated reading time (minutes)
var readingTimeMinutes = Math.ceil(basicStats.wordCount / 200); // Average reading speed 200 words/minute
return {
avgWordsPerSentence: avgWordsPerSentence,
avgWordsPerParagraph: avgWordsPerParagraph,
avgWordLength: avgWordLength,
readabilityScore: readabilityScore,
readingTimeMinutes: readingTimeMinutes
};
}
// Find most common words
function findCommonWords(text, limit) {
// Clean text and convert to lowercase
var cleanText = text.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
// Split into words
var words = cleanText.split(/\s+/).filter(function(word) {
return word.length > 3; // Filter out very short words
});
// Calculate word frequencies
var wordFrequency = {};
words.forEach(function(word) {
if (wordFrequency[word]) {
wordFrequency[word]++;
} else {
wordFrequency[word] = 1;
}
});
// Filter stop words (common English words)
var stopWords = ["this", "that", "these", "those", "with", "from", "have", "been", "were", "they", "their", "what", "when", "where", "which", "there", "will", "would", "could", "should", "about", "also"];
stopWords.forEach(function(stopWord) {
if (wordFrequency[stopWord]) {
delete wordFrequency[stopWord];
}
});
// Sort by frequency
var sortedWords = Object.keys(wordFrequency).sort(function(a, b) {
return wordFrequency[b] - wordFrequency[a];
});
// Take top N words
var topWords = sortedWords.slice(0, limit);
// Return results as word-frequency pairs
return topWords.map(function(word) {
return {
word: word,
frequency: wordFrequency[word]
};
});
}
// Create and add report to document
function createAndAddReport(oDocument, basicStats, advancedStats, commonWords) {
// Add new page
var oParagraph = Api.CreateParagraph();
oParagraph.AddPageBreak();
oDocument.AddElement(oDocument.GetElementsCount(), oParagraph);
// Main title - highlighting in capital letters
var oHeading = Api.CreateParagraph();
oHeading.AddText("DOCUMENT ANALYSIS REPORT");
oDocument.AddElement(oDocument.GetElementsCount(), oHeading);
// Subheading - in capital letters
var oSubHeading = Api.CreateParagraph();
oSubHeading.AddText("BASIC STATISTICS");
oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
// Add basic statistics
var oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Word Count: " + basicStats.wordCount);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Sentence Count: " + basicStats.sentenceCount);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Paragraph Count: " + basicStats.paragraphCount);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Character Count (with spaces): " + basicStats.charCountWithSpaces);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Character Count (without spaces): " + basicStats.charCountWithoutSpaces);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Estimated Line Count: " + basicStats.lineCount);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
// Advanced analysis title
oSubHeading = Api.CreateParagraph();
oSubHeading.AddText("ADVANCED ANALYSIS");
oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
// Add advanced analysis results
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Average Sentence Length: " + advancedStats.avgWordsPerSentence.toFixed(2) + " words");
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Average Paragraph Length: " + advancedStats.avgWordsPerParagraph.toFixed(2) + " words");
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Average Word Length: " + advancedStats.avgWordLength.toFixed(2) + " characters");
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Readability Score: " + advancedStats.readabilityScore.toFixed(2));
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Estimated Reading Time: " + advancedStats.readingTimeMinutes + " minutes");
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
// Common words title
oSubHeading = Api.CreateParagraph();
oSubHeading.AddText("MOST FREQUENTLY USED WORDS");
oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
// We'll create a simple list instead of a table
if (commonWords.length > 0) {
for (var i = 0; i < commonWords.length; i++) {
var oWordPara = Api.CreateParagraph();
oWordPara.AddText((i + 1) + ". " + commonWords[i].word + " (" + commonWords[i].frequency + " times)");
oDocument.AddElement(oDocument.GetElementsCount(), oWordPara);
}
} else {
var oNoneFoundPara = Api.CreateParagraph();
oNoneFoundPara.AddText("No frequently used words found.");
oDocument.AddElement(oDocument.GetElementsCount(), oNoneFoundPara);
}
// Footer note
var oFootnotePara = Api.CreateParagraph();
oFootnotePara.AddText("This report was generated by OnlyOffice Document Statistics and Analysis Tool on " +
new Date().toLocaleString() + ".");
oDocument.AddElement(oDocument.GetElementsCount(), oFootnotePara);
}
// Run the macro
analyzeDocument();
})();
要在 ONLYOFFICE 中使用此宏
- 在 ONLYOFFICE 中打开您的文档
- 导航到“视图”选项卡并选择“宏”
- 创建新的宏并粘贴代码
- 运行宏
- 详细的分析报告将添加到文档末尾
对于希望在现代办公环境中自动化文本分析和文档流程的专业人士来说,此宏是一个非常好的工具。
我们鼓励您探索 ONLYOFFICE API 文档,以创建自定义宏或增强现有宏。若有任何改进意见或新宏的建议,欢迎随时与我们联系。您的反馈对我们持续开发更高效文档创建和编辑工具至关重要。
关于作者
Firat Demir:我是一名 JavaScript 开发者和前端开发者。我运用现代 Web 技术开发用户友好的界面,并通过我的博客文章分享我的经验和行业知识。我热爱编写代码、学习并分享我所学到的知识。
创建免费的 ONLYOFFICE 账户
在线查看并协作编辑文本文档、电子表格、幻灯片、表单和 PDF 文件。