如何使用ONLYOFFICE宏分析PDF表单

2025年04月30日作者:Krystal

在当今快节奏的数字环境中,作家、编辑和内容创作者往往难以获取有关文档的深入见解。了解可读性、词频和结构平衡等指标有助于显著提升文档质量,但手动分析既费时又缺乏一致性。在本文中,我们将为您演示如何编写一个强大的 ONLYOFFICE 宏,自动分析文档内容并生成详尽的分析报告。

如何使用 ONLYOFFICE 宏分析 PDF 格式文档

构建文档分析宏

让我们将宏分解成功能组件,并解释每个部分的工作原理。

设置主函数

宏的核心是 analyzeDocument() 函数,它负责协调整个分析过程:

function analyzeDocument() {
    try {
        // Get document and all text
        var oDocument = Api.GetDocument();
        var allText = "";
        var paragraphs = oDocument.GetAllParagraphs();
        
        // Check if document is empty
        if (paragraphs.length === 0) {
            console.log("Warning: Document is empty or no paragraphs found for analysis.");
            return;
        }
        
        // Collect all text
        paragraphs.forEach(function(paragraph) {
            allText += paragraph.GetText() + " ";
        });
        
        // Perform analyses
        var stats = calculateBasicStats(allText, paragraphs);
        var advancedStats = calculateAdvancedStats(allText, stats);
        var commonWords = findCommonWords(allText, 10);
        
        // Create report
        createAndAddReport(oDocument, stats, advancedStats, commonWords);
        
        // Log success
        console.log("Success: Document analysis completed. Report added to the end of the document.");
    } catch (error) {
        console.log("Error: " + error.message);
    }
}

此函数首先收集文档中的所有文本,然后将其传递给专门的分析函数,最终创建报告。try-catch 代码块确保宏能够正常处理任何错误。

计算基本统计数据

calculateBasicStats() 函数处理文本以提取基本指标:

function calculateBasicStats(text, paragraphs) {
    // Word count
    var words = text.split(/\s+/).filter(function(word) { 
        return word.length > 0; 
    });
    var wordCount = words.length;
    
    // Sentence count
    var sentences = text.split(/[.!?]+/).filter(function(sentence) { 
        return sentence.trim().length > 0; 
    });
    var sentenceCount = sentences.length;
    
    // Paragraph count
    var paragraphCount = paragraphs.length;
    
    // Character count
    var charCountWithSpaces = text.length;
    var charCountWithoutSpaces = text.replace(/\s+/g, "").length;
    
    // Line count (approximate)
    var lineCount = Math.ceil(charCountWithSpaces / 70);
    
    return {
        wordCount: wordCount,
        sentenceCount: sentenceCount,
        paragraphCount: paragraphCount,
        charCountWithSpaces: charCountWithSpaces,
        charCountWithoutSpaces: charCountWithoutSpaces,
        lineCount: lineCount,
        words: words,
        sentences: sentences
    };
}

此函数将文本拆分为单词和句子,统计段落数,并计算字符数和行数。

执行高级分析

为了获得更深入的洞察,calculateAdvancedStats() 函数可以计算更复杂的指标:

function calculateAdvancedStats(text, basicStats) {
    // Average sentence length
    var avgWordsPerSentence = basicStats.wordCount / Math.max(1, basicStats.sentenceCount);
    
    // Average paragraph length
    var avgWordsPerParagraph = basicStats.wordCount / Math.max(1, basicStats.paragraphCount);
    
    // Average word length
    var totalWordLength = basicStats.words.reduce(function(sum, word) {
        return sum + word.length;
    }, 0);
    var avgWordLength = totalWordLength / Math.max(1, basicStats.wordCount);
    
    // Readability score (simplified Flesch-Kincaid)
    var readabilityScore = 206.835 - 1.015 * avgWordsPerSentence - 84.6 * (totalWordLength / basicStats.wordCount);
    
    // Estimated reading time
    var readingTimeMinutes = Math.ceil(basicStats.wordCount / 200);
    
    return {
        avgWordsPerSentence: avgWordsPerSentence,
        avgWordsPerParagraph: avgWordsPerParagraph,
        avgWordLength: avgWordLength,
        readabilityScore: readabilityScore,
        readingTimeMinutes: readingTimeMinutes
    };
}

这会计算平均句子和段落长度、可读性得分以及预计阅读时间。

分析词频

findCommonWords() 函数可识别最常用的单词:

function findCommonWords(text, limit) {
    // Clean text and convert to lowercase
    var cleanText = text.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
    
    // Split into words
    var words = cleanText.split(/\s+/).filter(function(word) { 
        return word.length > 3; 
    });
    
    // Calculate word frequencies
    var wordFrequency = {};
    words.forEach(function(word) {
        wordFrequency[word] = (wordFrequency[word] || 0) + 1;
    });
    
    // Filter stop words
    var stopWords = ["this", "that", "with", "from", "have", "been"];
    stopWords.forEach(function(stopWord) {
        delete wordFrequency[stopWord];
    });
    
    // Sort by frequency
    var sortedWords = Object.keys(wordFrequency).sort(function(a, b) {
        return wordFrequency[b] - wordFrequency[a];
    });
    
    // Return top N words
    return sortedWords.slice(0, limit).map(function(word) {
        return { word: word, frequency: wordFrequency[word] };
    });
}
function findCommonWords(text, limit) {
    // Clean text and convert to lowercase
    var cleanText = text.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
    
    // Split into words
    var words = cleanText.split(/\s+/).filter(function(word) { 
        return word.length > 3; 
    });
    
    // Calculate word frequencies
    var wordFrequency = {};
    words.forEach(function(word) {
        wordFrequency[word] = (wordFrequency[word] || 0) + 1;
    });
    
    // Filter stop words
    var stopWords = ["this", "that", "with", "from", "have", "been"];
    stopWords.forEach(function(stopWord) {
        delete wordFrequency[stopWord];
    });
    
    // Sort by frequency
    var sortedWords = Object.keys(wordFrequency).sort(function(a, b) {
        return wordFrequency[b] - wordFrequency[a];
    });
    
    // Return top N words
    return sortedWords.slice(0, limit).map(function(word) {
        return { word: word, frequency: wordFrequency[word] };
    });
}

此函数会删除标点符号,过滤常用填充词,并返回文档中最常用的单词。

生成报告

最后,createAndAddReport() 函数会编译并格式化所有分析结果:

function createAndAddReport(oDocument, basicStats, advancedStats, commonWords) {
    // Add new page
    var oParagraph = Api.CreateParagraph();
    oParagraph.AddPageBreak();
    oDocument.AddElement(oDocument.GetElementsCount(), oParagraph);
    
    // Add title
    var oHeading = Api.CreateParagraph();
    oHeading.AddText("DOCUMENT ANALYSIS REPORT");
    oDocument.AddElement(oDocument.GetElementsCount(), oHeading);
    
    // Add basic statistics section
    var oSubHeading = Api.CreateParagraph();
    oSubHeading.AddText("BASIC STATISTICS");
    oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
    
    // Add statistics content
    // ... (code that adds individual statistics)
    
    // Add advanced analysis section
    // ... (code that adds advanced metrics)
    
    // Add word frequency section
    // ... (code that adds word frequency list)
    
    // Add footer
    var oFootnotePara = Api.CreateParagraph();
    oFootnotePara.AddText("This report was generated by OnlyOffice Document Statistics and Analysis Tool on " + 
                        new Date().toLocaleString() + ".");
    oDocument.AddElement(oDocument.GetElementsCount(), oFootnotePara);
}

此功能在文档末尾创建一个包含所有分析结果的结构化报告。

How to analyze PDF form documents with ONLYOFFICE macro

完整的宏代码

以下是您可以复制并使用的完整宏代码:

(function() {
    // Main function - starts all operations
    function analyzeDocument() {
        try {
            // Get document and all text
            var oDocument = Api.GetDocument();
            var allText = "";
            var paragraphs = oDocument.GetAllParagraphs();
            
            // Check if document is empty
            if (paragraphs.length === 0) {
                console.log("Warning: Document is empty or no paragraphs found for analysis.");
                return;
            }
            
            // Collect all text
            paragraphs.forEach(function(paragraph) {
                allText += paragraph.GetText() + " ";
            });
            
            // Calculate basic statistics
            var stats = calculateBasicStats(allText, paragraphs);
            
            // Perform advanced analysis
            var advancedStats = calculateAdvancedStats(allText, stats);
            
            // Find most common words
            var commonWords = findCommonWords(allText, 10);
            
            // Create and add report to the document
            createAndAddReport(oDocument, stats, advancedStats, commonWords);
            
            // Inform user
            console.log("Success: Document analysis completed. Report added to the end of the document.");
        } catch (error) {
           console.log("Error: An error occurred during processing: " +         error.message);
        }
    }
    
    // Calculate basic statistics
    function calculateBasicStats(text, paragraphs) {
        // Word count
        var words = text.split(/\s+/).filter(function(word) { 
            return word.length > 0; 
        });
        var wordCount = words.length;
        
        // Sentence count
        var sentences = text.split(/[.!?]+/).filter(function(sentence) { 
            return sentence.trim().length > 0; 
        });
 var sentenceCount = sentences.length;
        
        // Paragraph count
        var paragraphCount = paragraphs.length;
        
        // Character count (with and without spaces)
        var charCountWithSpaces = text.length;
        var charCountWithoutSpaces = text.replace(/\s+/g, "").length;
        
        // Line count (approximate)
        var lineCount = Math.ceil(charCountWithSpaces / 70); // Approximately 70 characters/line
   return {
            wordCount: wordCount,
            sentenceCount: sentenceCount,
            paragraphCount: paragraphCount,
            charCountWithSpaces: charCountWithSpaces,
            charCountWithoutSpaces: charCountWithoutSpaces,
            lineCount: lineCount,
            words: words,
            sentences: sentences
        };
    }
    
    // Calculate advanced statistics
    function calculateAdvancedStats(text, basicStats) {
        // Average sentence length (in words)
        var avgWordsPerSentence = basicStats.wordCount / Math.max(1, basicStats.sentenceCount);
        
        // Average paragraph length (in words)
        var avgWordsPerParagraph = basicStats.wordCount / Math.max(1, basicStats.paragraphCount);
        
        // Average word length (in characters)
        var totalWordLength = basicStats.words.reduce(function(sum, word) {
            return sum + word.length;
        }, 0);
        var avgWordLength = totalWordLength / Math.max(1, basicStats.wordCount);
        
        // Readability score (simplified Flesch-Kincaid)
        var readabilityScore = 206.835 - 1.015 * (basicStats.wordCount / Math.max(1, basicStats.sentenceCount)) - 84.6 * (totalWordLength / Math.max(1, basicStats.wordCount));
        
        // Estimated reading time (minutes)
        var readingTimeMinutes = Math.ceil(basicStats.wordCount / 200); // Average reading speed 200 words/minute
        
        return {
            avgWordsPerSentence: avgWordsPerSentence,
            avgWordsPerParagraph: avgWordsPerParagraph,
            avgWordLength: avgWordLength,
            readabilityScore: readabilityScore,
            readingTimeMinutes: readingTimeMinutes
        };
    }
    
    // Find most common words
    function findCommonWords(text, limit) {
        // Clean text and convert to lowercase
        var cleanText = text.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
        
   // Split into words
        var words = cleanText.split(/\s+/).filter(function(word) { 
            return word.length > 3; // Filter out very short words
        });
   // Calculate word frequencies
        var wordFrequency = {};
        words.forEach(function(word) {
            if (wordFrequency[word]) {
                wordFrequency[word]++;
            } else {
                wordFrequency[word] = 1;
            }
        });

   // Filter stop words (common English words)
        var stopWords = ["this", "that", "these", "those", "with", "from", "have", "been", "were", "they", "their", "what", "when", "where", "which", "there", "will", "would", "could", "should", "about", "also"];
        stopWords.forEach(function(stopWord) {
            if (wordFrequency[stopWord]) {
                delete wordFrequency[stopWord];
            }
        });
        
        // Sort by frequency
        var sortedWords = Object.keys(wordFrequency).sort(function(a, b) {
            return wordFrequency[b] - wordFrequency[a];
        });
        
        // Take top N words
        var topWords = sortedWords.slice(0, limit);
        
        // Return results as word-frequency pairs
        return topWords.map(function(word) {
            return {
                word: word,
                frequency: wordFrequency[word]
            };
        });
    }
   
    // Create and add report to document
    function createAndAddReport(oDocument, basicStats, advancedStats, commonWords) {
        // Add new page
        var oParagraph = Api.CreateParagraph();
        oParagraph.AddPageBreak();
        oDocument.AddElement(oDocument.GetElementsCount(), oParagraph);
        
        // Main title - highlighting in capital letters
        var oHeading = Api.CreateParagraph();
        oHeading.AddText("DOCUMENT ANALYSIS REPORT");
        oDocument.AddElement(oDocument.GetElementsCount(), oHeading);
        
        // Subheading - in capital letters
        var oSubHeading = Api.CreateParagraph();
        oSubHeading.AddText("BASIC STATISTICS");
        oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);

        // Add basic statistics
        var oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Word Count: " + basicStats.wordCount);
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Sentence Count: " + basicStats.sentenceCount);
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Paragraph Count: " +      basicStats.paragraphCount);
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Character Count (with spaces): " +  basicStats.charCountWithSpaces);
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Character Count (without spaces): " +  basicStats.charCountWithoutSpaces);
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        

        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Estimated Line Count: " + basicStats.lineCount);
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        // Advanced analysis title
        oSubHeading = Api.CreateParagraph();
        oSubHeading.AddText("ADVANCED ANALYSIS");
        oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
        
        // Add advanced analysis results
        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Average Sentence Length: " + advancedStats.avgWordsPerSentence.toFixed(2) + " words");
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Average Paragraph Length: " + advancedStats.avgWordsPerParagraph.toFixed(2) + " words");
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Average Word Length: " + advancedStats.avgWordLength.toFixed(2) + " characters");
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Readability Score: " + advancedStats.readabilityScore.toFixed(2));
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        oStatsPara = Api.CreateParagraph();
        oStatsPara.AddText("• Estimated Reading Time: " + advancedStats.readingTimeMinutes + " minutes");
        oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
        
        // Common words title
        oSubHeading = Api.CreateParagraph();
        oSubHeading.AddText("MOST FREQUENTLY USED WORDS");
        oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
        
        // We'll create a simple list instead of a table
        if (commonWords.length > 0) {
            for (var i = 0; i < commonWords.length; i++) {
                var oWordPara = Api.CreateParagraph();
                oWordPara.AddText((i + 1) + ". " + commonWords[i].word + " (" + commonWords[i].frequency + " times)");
                oDocument.AddElement(oDocument.GetElementsCount(), oWordPara);
            }
        } else {
            var oNoneFoundPara = Api.CreateParagraph();
            oNoneFoundPara.AddText("No frequently used words found.");
            oDocument.AddElement(oDocument.GetElementsCount(), oNoneFoundPara);
        }
        
        // Footer note
        var oFootnotePara = Api.CreateParagraph();
        oFootnotePara.AddText("This report was generated by OnlyOffice Document Statistics and Analysis Tool on " + 
                            new Date().toLocaleString() + ".");
        oDocument.AddElement(oDocument.GetElementsCount(), oFootnotePara);
    }
    
    // Run the macro
    analyzeDocument();
})();

     

要在 ONLYOFFICE 中使用此宏

  1. 在 ONLYOFFICE 中打开您的文档
  2. 导航到“视图”选项卡并选择“宏”
  3. 创建新的宏并粘贴代码
  4. 运行宏
  5. 详细的分析报告将添加到文档末尾

对于希望在现代办公环境中自动化文本分析和文档流程的专业人士来说,此宏是一个非常好的工具。

我们鼓励您探索 ONLYOFFICE API 文档,以创建自定义宏或增强现有宏。若有任何改进意见或新宏的建议,欢迎随时与我们联系。您的反馈对我们持续开发更高效文档创建和编辑工具至关重要。

关于作者

How to analyze PDF form documents with ONLYOFFICE macro

Firat Demir:我是一名 JavaScript 开发者和前端开发者。我运用现代 Web 技术开发用户友好的界面,并通过我的博客文章分享我的经验和行业知识。我热爱编写代码、学习并分享我所学到的知识。

创建免费的 ONLYOFFICE 账户

在线查看并协作编辑文本文档、电子表格、幻灯片、表单和 PDF 文件。