So analysieren Sie PDF-Formulare mit dem ONLYOFFICE-Makro
In der heutigen schnelllebigen digitalen Welt fällt es Autoren, Redakteuren und Content-Erstellern oft schwer, aussagekräftige Erkenntnisse über ihre Dokumente zu gewinnen. Das Verständnis von Kennzahlen wie Lesbarkeit, Worthäufigkeit und struktureller Ausgewogenheit kann die Dokumentqualität deutlich verbessern, doch die manuelle Analyse ist zeitaufwändig und inkonsistent. In diesem Blogbeitrag zeigen wir Ihnen, wie Sie ein leistungsstarkes ONLYOFFICE-Makro erstellen, das Ihre Dokumente automatisch analysiert und umfassende Berichte erstellt.
Erstellen des Dokumentanalyse-Makros
Wir zerlegen unser Makro in funktionale Komponenten und erklären deren Funktionsweise.
Einrichten der Hauptfunktion
Der Kern unseres Makros ist die Funktion analyzeDocument(), die den gesamten Analyseprozess orchestriert:
function analyzeDocument() {
try {
// Get document and all text
var oDocument = Api.GetDocument();
var allText = "";
var paragraphs = oDocument.GetAllParagraphs();
// Check if document is empty
if (paragraphs.length === 0) {
console.log("Warning: Document is empty or no paragraphs found for analysis.");
return;
}
// Collect all text
paragraphs.forEach(function(paragraph) {
allText += paragraph.GetText() + " ";
});
// Perform analyses
var stats = calculateBasicStats(allText, paragraphs);
var advancedStats = calculateAdvancedStats(allText, stats);
var commonWords = findCommonWords(allText, 10);
// Create report
createAndAddReport(oDocument, stats, advancedStats, commonWords);
// Log success
console.log("Success: Document analysis completed. Report added to the end of the document.");
} catch (error) {
console.log("Error: " + error.message);
}
}
Diese Funktion sammelt zunächst den gesamten Text aus dem Dokument, übergibt ihn dann an spezialisierte Analysefunktionen und erstellt schließlich einen Bericht. Der Try-Catch-Block stellt sicher, dass das Makro alle Fehler ordnungsgemäß verarbeitet.
Berechnung grundlegender Statistiken
Die Funktion calculateBasicStats() verarbeitet den Text, um grundlegende Metriken zu extrahieren:
function calculateBasicStats(text, paragraphs) {
// Word count
var words = text.split(/\s+/).filter(function(word) {
return word.length > 0;
});
var wordCount = words.length;
// Sentence count
var sentences = text.split(/[.!?]+/).filter(function(sentence) {
return sentence.trim().length > 0;
});
var sentenceCount = sentences.length;
// Paragraph count
var paragraphCount = paragraphs.length;
// Character count
var charCountWithSpaces = text.length;
var charCountWithoutSpaces = text.replace(/\s+/g, "").length;
// Line count (approximate)
var lineCount = Math.ceil(charCountWithSpaces / 70);
return {
wordCount: wordCount,
sentenceCount: sentenceCount,
paragraphCount: paragraphCount,
charCountWithSpaces: charCountWithSpaces,
charCountWithoutSpaces: charCountWithoutSpaces,
lineCount: lineCount,
words: words,
sentences: sentences
};
}
Diese Funktion zerlegt den Text in Wörter und Sätze, zählt Absätze und berechnet die Anzahl von Zeichen und Zeilen.
Erweiterte Analysen durchführen
Für tiefere Einblicke berechnet die Funktion calculateAdvancedStats() komplexere Kennzahlen:
function calculateAdvancedStats(text, basicStats) {
// Average sentence length
var avgWordsPerSentence = basicStats.wordCount / Math.max(1, basicStats.sentenceCount);
// Average paragraph length
var avgWordsPerParagraph = basicStats.wordCount / Math.max(1, basicStats.paragraphCount);
// Average word length
var totalWordLength = basicStats.words.reduce(function(sum, word) {
return sum + word.length;
}, 0);
var avgWordLength = totalWordLength / Math.max(1, basicStats.wordCount);
// Readability score (simplified Flesch-Kincaid)
var readabilityScore = 206.835 - 1.015 * avgWordsPerSentence - 84.6 * (totalWordLength / basicStats.wordCount);
// Estimated reading time
var readingTimeMinutes = Math.ceil(basicStats.wordCount / 200);
return {
avgWordsPerSentence: avgWordsPerSentence,
avgWordsPerParagraph: avgWordsPerParagraph,
avgWordLength: avgWordLength,
readabilityScore: readabilityScore,
readingTimeMinutes: readingTimeMinutes
};
}
Dadurch werden die durchschnittliche Satz- und Absatzlänge, die Lesbarkeitswerte und die geschätzte Lesezeit berechnet.
Analyse der Worthäufigkeit
Die Funktion findCommonWords() ermittelt die am häufigsten verwendeten Wörter:
function findCommonWords(text, limit) {
// Clean text and convert to lowercase
var cleanText = text.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
// Split into words
var words = cleanText.split(/\s+/).filter(function(word) {
return word.length > 3;
});
// Calculate word frequencies
var wordFrequency = {};
words.forEach(function(word) {
wordFrequency[word] = (wordFrequency[word] || 0) + 1;
});
// Filter stop words
var stopWords = ["this", "that", "with", "from", "have", "been"];
stopWords.forEach(function(stopWord) {
delete wordFrequency[stopWord];
});
// Sort by frequency
var sortedWords = Object.keys(wordFrequency).sort(function(a, b) {
return wordFrequency[b] - wordFrequency[a];
});
// Return top N words
return sortedWords.slice(0, limit).map(function(word) {
return { word: word, frequency: wordFrequency[word] };
});
}
function findCommonWords(text, limit) {
// Clean text and convert to lowercase
var cleanText = text.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
// Split into words
var words = cleanText.split(/\s+/).filter(function(word) {
return word.length > 3;
});
// Calculate word frequencies
var wordFrequency = {};
words.forEach(function(word) {
wordFrequency[word] = (wordFrequency[word] || 0) + 1;
});
// Filter stop words
var stopWords = ["this", "that", "with", "from", "have", "been"];
stopWords.forEach(function(stopWord) {
delete wordFrequency[stopWord];
});
// Sort by frequency
var sortedWords = Object.keys(wordFrequency).sort(function(a, b) {
return wordFrequency[b] - wordFrequency[a];
});
// Return top N words
return sortedWords.slice(0, limit).map(function(word) {
return { word: word, frequency: wordFrequency[word] };
});
}
Diese Funktion entfernt Satzzeichen, filtert häufige Füllwörter heraus und gibt die am häufigsten verwendeten Wörter im Dokument zurück.
Bericht erstellen
Abschließend kompiliert und formatiert die Funktion createAndAddReport() alle Analyseergebnisse:
function createAndAddReport(oDocument, basicStats, advancedStats, commonWords) {
// Add new page
var oParagraph = Api.CreateParagraph();
oParagraph.AddPageBreak();
oDocument.AddElement(oDocument.GetElementsCount(), oParagraph);
// Add title
var oHeading = Api.CreateParagraph();
oHeading.AddText("DOCUMENT ANALYSIS REPORT");
oDocument.AddElement(oDocument.GetElementsCount(), oHeading);
// Add basic statistics section
var oSubHeading = Api.CreateParagraph();
oSubHeading.AddText("BASIC STATISTICS");
oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
// Add statistics content
// ... (code that adds individual statistics)
// Add advanced analysis section
// ... (code that adds advanced metrics)
// Add word frequency section
// ... (code that adds word frequency list)
// Add footer
var oFootnotePara = Api.CreateParagraph();
oFootnotePara.AddText("This report was generated by OnlyOffice Document Statistics and Analysis Tool on " +
new Date().toLocaleString() + ".");
oDocument.AddElement(oDocument.GetElementsCount(), oFootnotePara);
}
Diese Funktion erstellt am Ende des Dokuments einen strukturierten Bericht mit allen Analyseergebnissen.
Vollständiger Makrocode
Hier ist der vollständige Makrocode, den Sie kopieren und verwenden können:
(function() {
// Main function - starts all operations
function analyzeDocument() {
try {
// Get document and all text
var oDocument = Api.GetDocument();
var allText = "";
var paragraphs = oDocument.GetAllParagraphs();
// Check if document is empty
if (paragraphs.length === 0) {
console.log("Warning: Document is empty or no paragraphs found for analysis.");
return;
}
// Collect all text
paragraphs.forEach(function(paragraph) {
allText += paragraph.GetText() + " ";
});
// Calculate basic statistics
var stats = calculateBasicStats(allText, paragraphs);
// Perform advanced analysis
var advancedStats = calculateAdvancedStats(allText, stats);
// Find most common words
var commonWords = findCommonWords(allText, 10);
// Create and add report to the document
createAndAddReport(oDocument, stats, advancedStats, commonWords);
// Inform user
console.log("Success: Document analysis completed. Report added to the end of the document.");
} catch (error) {
console.log("Error: An error occurred during processing: " + error.message);
}
}
// Calculate basic statistics
function calculateBasicStats(text, paragraphs) {
// Word count
var words = text.split(/\s+/).filter(function(word) {
return word.length > 0;
});
var wordCount = words.length;
// Sentence count
var sentences = text.split(/[.!?]+/).filter(function(sentence) {
return sentence.trim().length > 0;
});
var sentenceCount = sentences.length;
// Paragraph count
var paragraphCount = paragraphs.length;
// Character count (with and without spaces)
var charCountWithSpaces = text.length;
var charCountWithoutSpaces = text.replace(/\s+/g, "").length;
// Line count (approximate)
var lineCount = Math.ceil(charCountWithSpaces / 70); // Approximately 70 characters/line
return {
wordCount: wordCount,
sentenceCount: sentenceCount,
paragraphCount: paragraphCount,
charCountWithSpaces: charCountWithSpaces,
charCountWithoutSpaces: charCountWithoutSpaces,
lineCount: lineCount,
words: words,
sentences: sentences
};
}
// Calculate advanced statistics
function calculateAdvancedStats(text, basicStats) {
// Average sentence length (in words)
var avgWordsPerSentence = basicStats.wordCount / Math.max(1, basicStats.sentenceCount);
// Average paragraph length (in words)
var avgWordsPerParagraph = basicStats.wordCount / Math.max(1, basicStats.paragraphCount);
// Average word length (in characters)
var totalWordLength = basicStats.words.reduce(function(sum, word) {
return sum + word.length;
}, 0);
var avgWordLength = totalWordLength / Math.max(1, basicStats.wordCount);
// Readability score (simplified Flesch-Kincaid)
var readabilityScore = 206.835 - 1.015 * (basicStats.wordCount / Math.max(1, basicStats.sentenceCount)) - 84.6 * (totalWordLength / Math.max(1, basicStats.wordCount));
// Estimated reading time (minutes)
var readingTimeMinutes = Math.ceil(basicStats.wordCount / 200); // Average reading speed 200 words/minute
return {
avgWordsPerSentence: avgWordsPerSentence,
avgWordsPerParagraph: avgWordsPerParagraph,
avgWordLength: avgWordLength,
readabilityScore: readabilityScore,
readingTimeMinutes: readingTimeMinutes
};
}
// Find most common words
function findCommonWords(text, limit) {
// Clean text and convert to lowercase
var cleanText = text.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
// Split into words
var words = cleanText.split(/\s+/).filter(function(word) {
return word.length > 3; // Filter out very short words
});
// Calculate word frequencies
var wordFrequency = {};
words.forEach(function(word) {
if (wordFrequency[word]) {
wordFrequency[word]++;
} else {
wordFrequency[word] = 1;
}
});
// Filter stop words (common English words)
var stopWords = ["this", "that", "these", "those", "with", "from", "have", "been", "were", "they", "their", "what", "when", "where", "which", "there", "will", "would", "could", "should", "about", "also"];
stopWords.forEach(function(stopWord) {
if (wordFrequency[stopWord]) {
delete wordFrequency[stopWord];
}
});
// Sort by frequency
var sortedWords = Object.keys(wordFrequency).sort(function(a, b) {
return wordFrequency[b] - wordFrequency[a];
});
// Take top N words
var topWords = sortedWords.slice(0, limit);
// Return results as word-frequency pairs
return topWords.map(function(word) {
return {
word: word,
frequency: wordFrequency[word]
};
});
}
// Create and add report to document
function createAndAddReport(oDocument, basicStats, advancedStats, commonWords) {
// Add new page
var oParagraph = Api.CreateParagraph();
oParagraph.AddPageBreak();
oDocument.AddElement(oDocument.GetElementsCount(), oParagraph);
// Main title - highlighting in capital letters
var oHeading = Api.CreateParagraph();
oHeading.AddText("DOCUMENT ANALYSIS REPORT");
oDocument.AddElement(oDocument.GetElementsCount(), oHeading);
// Subheading - in capital letters
var oSubHeading = Api.CreateParagraph();
oSubHeading.AddText("BASIC STATISTICS");
oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
// Add basic statistics
var oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Word Count: " + basicStats.wordCount);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Sentence Count: " + basicStats.sentenceCount);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Paragraph Count: " + basicStats.paragraphCount);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Character Count (with spaces): " + basicStats.charCountWithSpaces);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Character Count (without spaces): " + basicStats.charCountWithoutSpaces);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Estimated Line Count: " + basicStats.lineCount);
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
// Advanced analysis title
oSubHeading = Api.CreateParagraph();
oSubHeading.AddText("ADVANCED ANALYSIS");
oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
// Add advanced analysis results
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Average Sentence Length: " + advancedStats.avgWordsPerSentence.toFixed(2) + " words");
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Average Paragraph Length: " + advancedStats.avgWordsPerParagraph.toFixed(2) + " words");
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Average Word Length: " + advancedStats.avgWordLength.toFixed(2) + " characters");
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Readability Score: " + advancedStats.readabilityScore.toFixed(2));
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
oStatsPara = Api.CreateParagraph();
oStatsPara.AddText("• Estimated Reading Time: " + advancedStats.readingTimeMinutes + " minutes");
oDocument.AddElement(oDocument.GetElementsCount(), oStatsPara);
// Common words title
oSubHeading = Api.CreateParagraph();
oSubHeading.AddText("MOST FREQUENTLY USED WORDS");
oDocument.AddElement(oDocument.GetElementsCount(), oSubHeading);
// We'll create a simple list instead of a table
if (commonWords.length > 0) {
for (var i = 0; i < commonWords.length; i++) {
var oWordPara = Api.CreateParagraph();
oWordPara.AddText((i + 1) + ". " + commonWords[i].word + " (" + commonWords[i].frequency + " times)");
oDocument.AddElement(oDocument.GetElementsCount(), oWordPara);
}
} else {
var oNoneFoundPara = Api.CreateParagraph();
oNoneFoundPara.AddText("No frequently used words found.");
oDocument.AddElement(oDocument.GetElementsCount(), oNoneFoundPara);
}
// Footer note
var oFootnotePara = Api.CreateParagraph();
oFootnotePara.AddText("This report was generated by OnlyOffice Document Statistics and Analysis Tool on " +
new Date().toLocaleString() + ".");
oDocument.AddElement(oDocument.GetElementsCount(), oFootnotePara);
}
// Run the macro
analyzeDocument();
})();
So verwenden Sie dieses Makro in ONLYOFFICE:
- Öffnen Sie Ihr Dokument in ONLYOFFICE.
- Navigieren Sie zur Registerkarte „Ansicht“ und wählen Sie „Makros“.
- Erstellen Sie ein neues Makro und fügen Sie den Code ein.
- Führen Sie das Makro aus.
- Ein detaillierter Analysebericht wird am Ende Ihres Dokuments hinzugefügt.
Lassen Sie uns nun unser Makro ausführen und sehen, wie es funktioniert!
Dieses Makro ist ein wertvolles Tool für Fachleute, die Textanalyse- und Dokumentationsprozesse in einer modernen Büroumgebung automatisieren möchten. Wir hoffen, es ist eine nützliche Ergänzung für Ihre Arbeit.
Wir empfehlen Ihnen, die ONLYOFFICE API-Dokumentation zu erkunden, um eigene Makros zu erstellen oder diese zu erweitern. Wenn Sie Verbesserungsvorschläge oder Vorschläge für neue Makros haben, kontaktieren Sie uns gerne. Ihr Feedback hilft uns, weiterhin Tools zu entwickeln, die die Dokumenterstellung und -bearbeitung effizienter machen.
Über den Autor
Erstellen Sie Ihr kostenloses ONLYOFFICE-Konto
Öffnen und bearbeiten Sie gemeinsam Dokumente, Tabellen, Folien, Formulare und PDF-Dateien online.