diff --git a/CHANGELOG.md b/CHANGELOG.md
index 08b5f89..5234770 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.12.0] - 2025-01-09
+### Added
+* Added document minification as a feature before document translation, to
+ allow translation of large docx or pptx files. For more info check the README.
## [1.11.0] - 2024-11-15
### Added
@@ -169,7 +173,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [0.1.0] - 2021-11-05
Initial release.
-
+[1.12.0]: https://github.com/DeepLcom/deepl-dotnet/compare/v1.11.0...v1.12.0
[1.11.0]: https://github.com/DeepLcom/deepl-dotnet/compare/v1.10.0...v1.11.0
[1.10.0]: https://github.com/DeepLcom/deepl-dotnet/compare/v1.9.0...v1.10.0
[1.9.0]: https://github.com/DeepLcom/deepl-dotnet/compare/v1.8.0...v1.9.0
diff --git a/DeepL/DeepL.csproj b/DeepL/DeepL.csproj
index 14534dc..a4d2d3a 100644
--- a/DeepL/DeepL.csproj
+++ b/DeepL/DeepL.csproj
@@ -3,9 +3,9 @@
DeepL.net is the official DeepL .NET client library.
DeepL.net
- 1.11.0
- 1.11.0
- 1.11.0.0
+ 1.12.0
+ 1.12.0
+ 1.12.0.0
1.0.0.0
net5.0;netstandard2.0
8
@@ -32,14 +32,14 @@
-
-
+
+
-
-
+
+
diff --git a/DeepL/DeepLException.cs b/DeepL/DeepLException.cs
index 760334f..8c185e1 100644
--- a/DeepL/DeepLException.cs
+++ b/DeepL/DeepLException.cs
@@ -103,4 +103,30 @@ public DocumentTranslationException(string message, Exception innerException, Do
/// The handle can be used to later retrieve the document or to contact DeepL support.
public DocumentHandle? DocumentHandle { get; }
}
+
+ ///
+ /// Exception thrown if an error occurs during the minification phase of document minification.
+ /// See
+ ///
+ public sealed class DocumentMinificationException : DeepLException {
+ /// Initializes a new instance of the class.
+ /// The message that describes the error.
+ /// The exception representing the connection error.
+ public DocumentMinificationException(string message, Exception innerException) :
+ base(message, innerException) {
+ }
+ }
+
+ ///
+ /// Exception thrown if an error occurs during the minification phase of document deminification.
+ /// See
+ ///
+ public sealed class DocumentDeminificationException : DeepLException {
+ /// Initializes a new instance of the class.
+ /// The message that describes the error.
+ /// The exception representing the connection error.
+ public DocumentDeminificationException(string message, Exception innerException) :
+ base(message, innerException) {
+ }
+ }
}
diff --git a/DeepL/DocumentMinifier.cs b/DeepL/DocumentMinifier.cs
new file mode 100644
index 0000000..f9eff14
--- /dev/null
+++ b/DeepL/DocumentMinifier.cs
@@ -0,0 +1,388 @@
+// Copyright 2022 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+using System;
+using System.IO;
+using System.IO.Compression;
+using System.Linq;
+using System.Threading;
+
+namespace DeepL {
+ public interface IDocumentMinifier {
+ ///
+ /// Minifies a given document using the given tempDir, by extracting it as a ZIP file and
+ /// replacing all supported media files with a small placeholder.
+ /// Created file will be inside the tempDir, the filename can be retrieved by calling
+ /// with tempDir as a parameter
+ /// Note that this method will minify the file without any checks, you should first call
+ /// on the input file.
+ /// If cleanup is set to true , the extracted document will be deleted afterwards, and only
+ /// the original media and the minified file will remain in the tempDir.
+ ///
+ /// Path to the file to be minified.
+ ///
+ /// If true, will delete the extracted document files from the temporary directory.
+ /// Otherwise, the files will remain (useful for debugging).
+ ///
+ ///
+ /// The path of the minified document. Can also be retrieved by calling
+ ///
+ ///
+ ///
+ /// If an exception occurred during the minification process
+ ///
+ public string MinifyDocument(string inputFilePath, bool cleanup = false);
+
+ ///
+ /// Deminifies a given file at inputFilePath by reinserting its original media in tempDir and stores
+ /// the resulting document in outputFilePath. If cleanup is set to true, it will delete the
+ /// tempDir afterwards, otherwise nothing will happen after the deminification.
+ ///
+ /// Path to document to be deminified with its media.
+ /// Where the final (deminified) document will be stored.
+ /// Determines if the tempDir is deleted at the end of this method.
+ ///
+ /// If an exception occurred during the deminification process
+ ///
+ public void DeminifyDocument(string inputFilePath, string outputFilePath, bool cleanup = false);
+ }
+
+ ///
+ /// Class that implements document minification: Stripping supported files like pptx and docx
+ /// of their media (images, videos, etc) before uploading them to the DeepL API to be translated.
+ /// This allows users to translate files that would usually hit the size limit for files.
+ /// Please note the following:
+ ///
+ /// -
+ ///
+ /// To use this class, you first need to check by calling
+ /// if the file type is supported. This class performs no further checks.
+ ///
+ ///
+ /// -
+ ///
+ /// The DocumentMinifier is stateful, so you cannot use it to minify multiple documents at once.
+ /// You need to create a new DocumentMinifier object per document.
+ ///
+ ///
+ /// -
+ ///
+ /// Be very careful when providing a custom tempDir when instantiating the class. For example,
+ /// will delete the entire tempDir with
+ /// cleanup set to true (disabled by default). In order not to lose any data, ideally always
+ /// call new DocumentMinifier() in order to get a fresh temporary directory.
+ ///
+ ///
+ /// -
+ ///
+ /// If an error occurs during minification, either a or a
+ /// will be thrown, depending on which phase the error
+ /// occured in.
+ ///
+ ///
+ ///
+ /// The document minification process works in 2 phases:
+ ///
+ /// -
+ ///
+ /// Minification: The document is extracted into a temporary directory, the media files are backed up,
+ /// the media in the document is replaced with placeholders and a minified document is created.
+ ///
+ ///
+ /// -
+ ///
+ /// Deminification: The minified document is extracted into a temporary directory, the media backups are
+ /// reinserted into the extracted document, and the document is deminified into the output path.
+ ///
+ ///
+ ///
+ /// If cleanup is enabled, the minification phase will delete the folder with the extracted document
+ /// and the deminification phase will delete the entire temporary directory.
+ /// Note that by default, the input file will be kept on disk, and as such no further backups of media etc.
+ /// are made (as they are all available from the input file).
+ /// Example usage:
+ ///
+ /// var inputFile = "/home/exampleUser/document.pptx";
+ /// var outputFile = "/home/exampleUser/document_ES.pptx";
+ /// var minifier = new DocumentMinifier();
+ /// if (minifier.CanMinifyFile(inputFile)) {
+ /// try {
+ /// minifier.MinifyDocument(inputFile, true);
+ /// minifiedFile = minifier.GetMinifiedDocFile(inputFile);
+ /// // process file minifiedFile, e.g. translate it with DeepL
+ /// minifier.DeminifyDocument(inputFile, outputFile, true);
+ /// // process file outputFile
+ /// } catch (DocumentMinificationException e) {
+ /// // handle exception during minification, e.g. print list of media, clean up temporary directory, etc
+ /// } catch (DocumentDeminificationException e) {
+ /// // handle exception during deminification, e.g. save minified document, clean up temporary directory, etc
+ /// } catch (DocumentTranslationException e) {
+ /// // handle general DocTrans exception (mostly useful if document is translated between minification
+ /// // and deminification)
+ /// }
+ /// }
+ ///
+ ///
+ public class DocumentMinifier : IDocumentMinifier {
+ /// Which input document types are supported for minification.
+ private static readonly string[] SupportedDocumentTypes = { ".pptx", ".docx" };
+
+ /// Which media formats in the documents are supported for minification.
+ private static readonly string[] SupportedMediaFormats = {
+ // Image formats
+ ".png", ".jpg", ".jpeg", ".emf", ".bmp", ".tiff", ".wdp", ".svg", ".gif",
+ // Video formats
+ // Taken from https://support.microsoft.com/en-gb/office/video-and-audio-file-formats-supported-in-powerpoint-d8b12450-26db-4c7b-a5c1-593d3418fb59
+ ".mp4", ".asf", ".avi", ".m4v", ".mpg", ".mpeg", ".wmv", ".mov",
+ // Audio formats, taken from the same URL as video
+ ".aiff", ".au", ".mid", ".midi", ".mp3", ".m4a", ".wav", ".wma"
+ };
+
+ private const string ExtractedDocDirName = "extracted_doc";
+ private const string OriginalMediaDirName = "original_media";
+ private const string MinifiedDocFileBaseName = "minifiedDoc";
+ private const int MinifiedDocSizeLimitWarning = 5000000;
+
+ private readonly string _tempDir;
+
+ ///
+ /// Initializes a new object either with a specified or newly created
+ /// temporary directory.
+ ///
+ /// The temporary directory used for media extraction during minification
+ public DocumentMinifier(string? tempDir = null) {
+ _tempDir = tempDir ?? CreateTemporaryDirectory();
+ }
+
+ /// Checks if a given file can be minified or not
+ /// The path to the file
+ /// true if the file can be minified otherwise false
+ ///
+ /// if the inputFilePath contains characters not allowed in a path name
+ ///
+ public static bool CanMinifyFile(string inputFilePath) {
+ return !string.IsNullOrWhiteSpace(inputFilePath) &&
+ SupportedDocumentTypes.Contains(Path.GetExtension(inputFilePath).ToLowerInvariant());
+ }
+
+ /// Gets the path for where the minified version of the input file will live
+ /// The path to the file
+ /// The path to the minified version of the file
+ /// if the inputFilePath is null
+ ///
+ /// if the inputFilePath contains characters not allowed in a path name
+ ///
+ public string GetMinifiedDocFile(string inputFilePath) {
+ var minifiedDocFileName = Path.ChangeExtension(MinifiedDocFileBaseName, Path.GetExtension(inputFilePath));
+ return Path.Combine(_tempDir, minifiedDocFileName);
+ }
+
+ /// Gets the path to the directory where the input file will be extracted to
+ /// The path to the directory where the input file will be extracted to
+ public string GetExtractedDocDirectory() {
+ return Path.Combine(_tempDir, ExtractedDocDirName);
+ }
+
+ /// Gets the path to the directory where the original media was extracted to
+ /// The path to the media directory containing the original media
+ public string GetOriginalMediaDirectory() {
+ return Path.Combine(_tempDir, OriginalMediaDirName);
+ }
+
+ ///
+ public string MinifyDocument(string inputFilePath, bool cleanup = false) {
+ var extractedDocDirectory = GetExtractedDocDirectory();
+ var mediaDir = GetOriginalMediaDirectory();
+ var minifiedDocFilePath = GetMinifiedDocFile(inputFilePath);
+
+ try {
+ ExtractZipTo(inputFilePath, extractedDocDirectory);
+ } catch (Exception ex) {
+ throw new DocumentMinificationException(
+ $"Exception when extracting document: Failed to extract {inputFilePath} to {extractedDocDirectory}",
+ ex);
+ }
+
+ ExportMediaToMediaDirAndReplace(extractedDocDirectory, mediaDir);
+
+ try {
+ ZipFile.CreateFromDirectory(extractedDocDirectory, minifiedDocFilePath);
+ } catch (Exception ex) {
+ throw new DocumentMinificationException($"Failed creating a zip file at {minifiedDocFilePath}", ex);
+ }
+
+ if (cleanup) {
+ try {
+ Directory.Delete(extractedDocDirectory, true);
+ } catch (Exception ex) {
+ throw new DocumentMinificationException($"Failed to delete directory {extractedDocDirectory}", ex);
+ }
+ }
+
+ var fileSizeResponse = new FileInfo(minifiedDocFilePath).Length;
+ if (fileSizeResponse > MinifiedDocSizeLimitWarning) {
+ Console.Error.WriteLine(
+ "The input file could not be minified below 5 MB, likely a media type is missing. "
+ + "This might cause the translation to fail.");
+ }
+
+ return minifiedDocFilePath;
+ }
+
+ ///
+ public void DeminifyDocument(string inputFilePath, string outputFilePath, bool cleanup = false) {
+ var extractedDocDirectory = GetExtractedDocDirectory();
+ var mediaDir = GetOriginalMediaDirectory();
+ if (!Directory.Exists(extractedDocDirectory)) {
+ try {
+ Directory.CreateDirectory(extractedDocDirectory);
+ } catch (Exception ex) {
+ throw new DocumentDeminificationException(
+ $"Exception when deminifying, could not create directory at {extractedDocDirectory}.",
+ ex);
+ }
+ }
+
+ try {
+ ExtractZipTo(inputFilePath, extractedDocDirectory);
+ } catch (Exception ex) {
+ throw new DocumentDeminificationException(
+ $"Exception when extracting document: Failed to extract {inputFilePath} to {extractedDocDirectory}",
+ ex);
+ }
+
+ ReplaceMediaInDir(extractedDocDirectory, mediaDir);
+ try {
+ if (File.Exists(outputFilePath)) {
+ File.Delete(outputFilePath);
+ }
+
+ ZipFile.CreateFromDirectory(extractedDocDirectory, outputFilePath);
+ } catch (Exception ex) {
+ throw new DocumentDeminificationException($"Failed creating a zip file at {outputFilePath}", ex);
+ }
+
+ if (cleanup) {
+ try {
+ Directory.Delete(_tempDir, true);
+ } catch (Exception ex) {
+ throw new DocumentMinificationException($"Failed to delete directory {extractedDocDirectory}", ex);
+ }
+ }
+ }
+
+ ///
+ /// Creates a temporary directory for use in the
+ /// Uses the system's temporary directory.
+ ///
+ /// The path of the created temporary directory
+ /// if the temporary directory could not be created
+ private static string CreateTemporaryDirectory() {
+ var tempDir = Path.GetTempPath() + "/document_minification_" + Guid.NewGuid().ToString("N");
+ while (Directory.Exists(tempDir)) {
+ Thread.Sleep(1);
+ tempDir = Path.GetTempPath() + "/document_minification_" + Guid.NewGuid().ToString("N");
+ }
+
+ try {
+ Directory.CreateDirectory(tempDir);
+ } catch (Exception ex) {
+ throw new DocumentMinificationException($"Failed creating temporary directory at {tempDir}", ex);
+ }
+
+ return tempDir;
+ }
+
+ /// Extracts a zip file to a given directory
+ /// The path to the zip file
+ ///
+ /// The path to the directory where the contents of the zip file will be extracted to
+ ///
+ private void ExtractZipTo(string zippedDocumentPath, string extractionDir) {
+ if (!Directory.Exists(extractionDir)) {
+ Directory.CreateDirectory(extractionDir);
+ }
+
+ ZipFile.ExtractToDirectory(zippedDocumentPath, extractionDir);
+ }
+
+ ///
+ /// Iterates through the inputDirectory and if it contains a supported media file, will export that media
+ /// to the mediaDirectory and replace the media in the inputDirectory with a placeholder. The
+ /// relative path will be preserved when moving the file to the mediaDirectory (e.g. a file located at
+ /// "/inputDirectory/foo/bar.png" will be exported to "/mediaDirectory/foo/bar.png")
+ ///
+ /// The path to the input directory
+ ///
+ /// The path to the directory where the supported media from inputDirectory will be exported to
+ ///
+ ///
+ /// If a problem occurred when exporting the original media from inputDirectory to mediaDirectory
+ ///
+ private void ExportMediaToMediaDirAndReplace(string inputDirectory, string mediaDirectory) {
+ foreach (var filePath in Directory.GetFiles(inputDirectory, "*.*", SearchOption.AllDirectories)) {
+ if (SupportedMediaFormats.Contains(Path.GetExtension(filePath).ToLowerInvariant())) {
+ var relativeFilePath = filePath.Substring(inputDirectory.Length + 1);
+ var mediaPath = Path.Combine(mediaDirectory, relativeFilePath);
+
+ // mediaDir should never be null as mediaPath contains the specified mediaDirectory
+ var mediaDir = Path.GetDirectoryName(mediaPath);
+
+ try {
+ if (!string.IsNullOrWhiteSpace(mediaDir) && !Directory.Exists(mediaDir)) {
+ Directory.CreateDirectory(mediaDir);
+ }
+
+ File.Move(filePath, mediaPath);
+ File.WriteAllText(filePath, "DeepL Media Placeholder");
+ } catch (Exception ex) {
+ throw new DocumentMinificationException($"Exception when exporting and replacing media files", ex);
+ }
+ }
+ }
+ }
+
+ ///
+ /// Iterates through mediaDirectory and moves all files into the inputDirectory while preserving
+ /// the relative paths. (e.g. /mediaDirectory/foo/bar.png will be moved to the path /inputDirectory/foo/bar.png
+ /// and replace any file if it exists at that path. Any subdirectories in mediaDirectory will also be
+ /// created in inputDirectory).
+ ///
+ /// The path to the input directory
+ ///
+ /// The path to the directory where the original media lives. This media will be reinserted back and replace any
+ /// placeholder media.
+ ///
+ ///
+ /// If a problem occurred when trying to reinsert the media
+ ///
+ private void ReplaceMediaInDir(string inputDirectory, string mediaDirectory) {
+ foreach (var filePath in Directory.GetFiles(mediaDirectory, "*.*", SearchOption.AllDirectories)) {
+ var relativeFilePath = filePath.Substring(mediaDirectory.Length + 1);
+ var curMediaPath = Path.Combine(inputDirectory, relativeFilePath);
+ var curMediaDir = Path.GetDirectoryName(curMediaPath);
+ if (!string.IsNullOrWhiteSpace(curMediaDir) && !Directory.Exists(curMediaDir)) {
+ try {
+ Directory.CreateDirectory(curMediaDir);
+ } catch (Exception ex) {
+ throw new DocumentDeminificationException(
+ $"Exception when reinserting media. Failed to create directory at {curMediaDir}.",
+ ex);
+ }
+ }
+
+ try {
+ if (File.Exists(curMediaPath)) {
+ File.Delete(curMediaPath);
+ }
+
+ File.Move(filePath, curMediaPath);
+ } catch (Exception ex) {
+ throw new DocumentDeminificationException(
+ $"Exception when reinserting media. Failed to move media back to {curMediaPath}.",
+ ex);
+ }
+ }
+ }
+ }
+}
diff --git a/DeepL/DocumentTranslateOptions.cs b/DeepL/DocumentTranslateOptions.cs
index d24101e..a2fe740 100644
--- a/DeepL/DocumentTranslateOptions.cs
+++ b/DeepL/DocumentTranslateOptions.cs
@@ -26,5 +26,8 @@ public DocumentTranslateOptions(GlossaryInfo glossary) : this() {
/// Specifies the ID of a glossary to use with the translation.
public string? GlossaryId { get; set; }
+
+ /// Controls whether to use Document Minification for translation, if available.
+ public bool EnableDocumentMinification { get; set; }
}
}
diff --git a/DeepL/Translator.cs b/DeepL/Translator.cs
index 357575c..854ab67 100644
--- a/DeepL/Translator.cs
+++ b/DeepL/Translator.cs
@@ -546,12 +546,19 @@ public async Task TranslateDocumentAsync(
string targetLanguageCode,
DocumentTranslateOptions? options = null,
CancellationToken cancellationToken = default) {
- using var inputFile = inputFileInfo.OpenRead();
+ var willMinify = (options?.EnableDocumentMinification ?? false) && DocumentMinifier.CanMinifyFile(inputFileInfo.Name);
+ var fileToUpload = inputFileInfo;
+ var minifier = new DocumentMinifier();
+ if (willMinify) {
+ minifier.MinifyDocument(inputFileInfo.FullName, true);
+ fileToUpload = new FileInfo(minifier.GetMinifiedDocFile(inputFileInfo.FullName));
+ }
+ using var inputFile = fileToUpload.OpenRead();
using var outputFile = outputFileInfo.Open(FileMode.CreateNew, FileAccess.Write);
try {
await TranslateDocumentAsync(
inputFile,
- inputFileInfo.Name,
+ fileToUpload.Name,
outputFile,
sourceLanguageCode,
targetLanguageCode,
@@ -565,6 +572,10 @@ await TranslateDocumentAsync(
}
throw;
+ } if (willMinify) {
+ outputFile.Dispose();
+ // Translated minified file is at `outputFileName`. Reinsert media (deminify) before returning
+ minifier.DeminifyDocument(outputFileInfo.FullName, outputFileInfo.FullName, true);
}
}
diff --git a/DeepLTests/BaseDeepLTest.cs b/DeepLTests/BaseDeepLTest.cs
index 433eb36..c3f68fb 100644
--- a/DeepLTests/BaseDeepLTest.cs
+++ b/DeepLTests/BaseDeepLTest.cs
@@ -5,7 +5,11 @@
using System;
using System.Collections.Generic;
using System.IO;
+using System.IO.Compression;
+using System.Linq;
+using System.Net;
using System.Net.Http;
+using System.Security.Cryptography;
using System.Threading;
using System.Threading.Tasks;
using DeepL;
@@ -17,6 +21,8 @@ public class BaseDeepLTest {
protected static readonly string AuthKey;
protected static readonly string? ServerUrl;
protected static readonly string? ProxyUrl;
+ protected static readonly Dictionary DocMinificationTestFilesMapping;
+ private static Random _random = new Random();
static BaseDeepLTest() {
if (IsMockServer) {
@@ -28,7 +34,13 @@ static BaseDeepLTest() {
"DEEPL_AUTH_KEY environment variable must be set unless using mock server.");
ServerUrl = Environment.GetEnvironmentVariable("DEEPL_SERVER_URL");
}
+
ProxyUrl = Environment.GetEnvironmentVariable("DEEPL_PROXY_URL");
+ DocMinificationTestFilesMapping = new Dictionary() {
+ { ".docx", "example_document_template.docx" },
+ { ".pptx", "example_presentation_template.pptx" },
+ { ".zip", "example_zip_template.zip" }
+ };
}
protected static Translator CreateTestTranslator(bool randomAuthKey = false) {
@@ -56,7 +68,7 @@ protected static Translator CreateTestTranslatorWithMockSession(
}
protected static MockHttpMessageHandler getMockHandler(String responseMessage) {
- var response = new HttpResponseMessage(System.Net.HttpStatusCode.OK);
+ var response = new HttpResponseMessage(HttpStatusCode.OK);
response.Content = new StringContent(responseMessage);
return new MockHttpMessageHandler(response);
}
@@ -214,6 +226,47 @@ protected static string TempDir() {
return path;
}
+ protected static string GetFullPathForTestFile(string testFileName) {
+ return Path.Combine(Directory.GetCurrentDirectory(), "resources", testFileName);
+ }
+
+ protected static string CreateMinifiedTestDocument(string extension, string outputDirectory) {
+ var extractionDir = TempDir();
+ var testFilePath = GetFullPathForTestFile(DocMinificationTestFilesMapping[extension]);
+ var outputFilePath = Path.Combine(outputDirectory, "test_document" + extension);
+ ZipFile.ExtractToDirectory(testFilePath, extractionDir);
+ var characters = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ~!@#$%^&*()_+=-<,>.?:";
+ var length = 90000000;
+ var createText = new string(
+ Enumerable.Repeat(characters, length)
+ .Select(s => s[_random.Next(s.Length)])
+ .ToArray());
+ File.WriteAllText(Path.Combine(extractionDir, "placeholder_image.png"), createText);
+ ZipFile.CreateFromDirectory(extractionDir, outputFilePath);
+ Directory.Delete(extractionDir, true);
+ return outputFilePath;
+ }
+
+ protected bool AreDirectoriesEqual(string dir1, string dir2) {
+ var dir1Info = new DirectoryInfo(dir1);
+ var dir2Info = new DirectoryInfo(dir2);
+
+ var dir1Files = dir1Info.GetFiles("*.*", SearchOption.AllDirectories);
+ var dir2Files = dir2Info.GetFiles("*.*", SearchOption.AllDirectories);
+
+ var dir1Hashes = dir1Files.ToDictionary(k => k.Name, GetHashForFile);
+ var dir2Hashes = dir2Files.ToDictionary(k => k.Name, GetHashForFile);
+
+ return dir1Hashes.Keys.Count == dir2Hashes.Keys.Count &&
+ dir1Hashes.All(kvp => dir2Hashes.ContainsKey(kvp.Key) && dir2Hashes[kvp.Key].SequenceEqual(kvp.Value));
+ }
+
+ private byte[] GetHashForFile(FileInfo file) {
+ using var fileStream = file.OpenRead();
+ using var md5 = MD5.Create();
+ return md5.ComputeHash(fileStream);
+ }
+
protected struct SessionOptions {
public int? NoResponse;
public int? RespondWith429;
@@ -250,17 +303,17 @@ public RealServerOnlyFact() {
}
}
-
///
/// Class to mock HTTP requests the library makes. Supports returning a constant response to every request
/// through .
/// If we ever need more complex mocking functionality, we should drop this and use a mocking library.
///
- protected class MockHttpMessageHandler : System.Net.Http.HttpMessageHandler {
+ protected class MockHttpMessageHandler : HttpMessageHandler {
///
/// List of requests made through this mock. Use to make assertions in your tests after the code has run.
///
public List requests;
+
///
/// Default response returned on every HTTP request. If we need more complex functionality,
/// we should use a proper mocking library, for example Moq
@@ -271,7 +324,10 @@ public MockHttpMessageHandler(HttpResponseMessage response) : base() {
defaultResponse = response;
requests = new List();
}
- protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) {
+
+ protected override async Task SendAsync(
+ HttpRequestMessage request,
+ CancellationToken cancellationToken) {
this.requests.Add(request);
await Task.Delay(0);
return defaultResponse;
diff --git a/DeepLTests/DeepLTests.csproj b/DeepLTests/DeepLTests.csproj
index aaeb57a..9586d00 100644
--- a/DeepLTests/DeepLTests.csproj
+++ b/DeepLTests/DeepLTests.csproj
@@ -24,4 +24,8 @@
+
+ PreserveNewest
+
+
diff --git a/DeepLTests/DocumentMinificationTest.cs b/DeepLTests/DocumentMinificationTest.cs
new file mode 100644
index 0000000..e942be1
--- /dev/null
+++ b/DeepLTests/DocumentMinificationTest.cs
@@ -0,0 +1,127 @@
+// Copyright 2022 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Collections.Generic;
+using System.IO;
+using System.IO.Compression;
+using System.Threading.Tasks;
+using DeepL;
+using Xunit;
+
+namespace DeepLTests {
+ public sealed class DocumentMinificationTest : BaseDeepLTest {
+ private readonly string _tempDir = TempDir();
+
+ private string OutputDocumentPath(string extension) {
+ var path = Path.Combine(_tempDir, "output", Path.ChangeExtension("example_document", extension));
+ Directory.CreateDirectory(Path.Combine(_tempDir, "output"));
+ File.Delete(path);
+ return path;
+ }
+
+ [Theory]
+ [InlineData(".pptx")]
+ [InlineData(".docx")]
+ public void TestMinifyDocumentHappyPath(string extension) {
+ var minifiedTestDocument = CreateMinifiedTestDocument(extension, _tempDir);
+ var originalFileSize = new FileInfo(minifiedTestDocument).Length;
+ var minifier = new DocumentMinifier(_tempDir);
+ var minifiedDocumentPath = minifier.MinifyDocument(minifiedTestDocument, false);
+ var minifiedFileSize = new FileInfo(minifiedDocumentPath).Length;
+
+ Assert.True(minifiedFileSize < originalFileSize);
+ Assert.InRange(minifiedFileSize, 100, 50000);
+
+ // Cleanup
+ Directory.Delete(minifier.GetExtractedDocDirectory(), true);
+ Directory.Delete(minifier.GetOriginalMediaDirectory(), true);
+ File.Delete(minifiedTestDocument);
+ File.Delete(minifiedDocumentPath);
+ }
+
+ [Theory]
+ [InlineData(true)]
+ [InlineData(false)]
+ public void TestDocumentMinificationCleansUpProperly(bool shouldCleanUp) {
+ var minifiedTestDocument = CreateMinifiedTestDocument(".pptx", _tempDir);
+ var minifier = new DocumentMinifier(_tempDir);
+ var minifiedDocumentPath = minifier.MinifyDocument(minifiedTestDocument, shouldCleanUp);
+
+ Assert.Equal(shouldCleanUp, !Directory.Exists(minifier.GetExtractedDocDirectory()));
+
+ // Cleanup
+ if (!shouldCleanUp) Directory.Delete(minifier.GetExtractedDocDirectory(), true);
+ Directory.Delete(minifier.GetOriginalMediaDirectory(), true);
+ File.Delete(minifiedTestDocument);
+ File.Delete(minifiedDocumentPath);
+ }
+
+ [Fact]
+ public void TestDeminifyDocumentHappyPath() {
+ var inputFile = CreateMinifiedTestDocument(".zip", _tempDir);
+ var outputFile = Path.Combine(_tempDir, "example_zip_transformed.zip");
+ var minifier = new DocumentMinifier(_tempDir);
+ var minifiedFile = minifier.MinifyDocument(inputFile, true);
+ minifier.DeminifyDocument(minifiedFile, outputFile, false);
+
+ var inputExtractionDir = Path.Combine(_tempDir, "input_dir");
+ var outputExtractionDir = Path.Combine(_tempDir, "output_dir");
+ ZipFile.ExtractToDirectory(inputFile, inputExtractionDir);
+ ZipFile.ExtractToDirectory(outputFile, outputExtractionDir);
+
+ Assert.True(AreDirectoriesEqual(inputExtractionDir, outputExtractionDir));
+
+ // Cleanup
+ Directory.Delete(_tempDir, true);
+ }
+
+ [Theory]
+ [InlineData(true)]
+ [InlineData(false)]
+ public void TestDocumentDeminificationCleansUpProperly(bool shouldCleanUp) {
+ var minifiedTestDocument = CreateMinifiedTestDocument(".zip", _tempDir);
+ var outputFile = Path.Combine(_tempDir, "example_zip_transformed.zip");
+ var minifier = new DocumentMinifier();
+ var minifiedFile = minifier.MinifyDocument(minifiedTestDocument, true);
+ minifier.DeminifyDocument(minifiedFile, outputFile, shouldCleanUp);
+
+ Assert.Equal(shouldCleanUp, !Directory.Exists(minifier.GetExtractedDocDirectory()));
+
+ // Cleanup
+ if (!shouldCleanUp) {
+ Directory.Delete(minifier.GetExtractedDocDirectory(), true);
+ Directory.Delete(minifier.GetOriginalMediaDirectory(), true);
+ File.Delete(minifiedFile);
+ }
+
+ File.Delete(minifiedTestDocument);
+ File.Delete(outputFile);
+ }
+
+ [RealServerOnlyFact]
+ public async Task TestMinifyAndTranslateDocuments() {
+ var translator = CreateTestTranslator();
+ var extensions = new List() { ".docx", ".pptx" };
+ foreach (var extension in extensions) {
+ var exampleDocumentPath = CreateMinifiedTestDocument(extension, _tempDir);
+ var outputDocumentPath = OutputDocumentPath(extension);
+
+ await translator.TranslateDocumentAsync(
+ new FileInfo(exampleDocumentPath),
+ new FileInfo(outputDocumentPath),
+ "EN",
+ "DE",
+ new DocumentTranslateOptions { EnableDocumentMinification = true });
+
+ // If the output exists, the input document must have been minified as TranslateDocumentAsync
+ // will not succeed for files over 30 MB
+ Assert.True(File.Exists(outputDocumentPath));
+ Assert.NotInRange(new FileInfo(exampleDocumentPath).Length, 0, 30000000);
+ }
+
+ // Cleanup
+ Directory.Delete(_tempDir, true);
+ }
+ }
+}
diff --git a/DeepLTests/GeneralTest.cs b/DeepLTests/GeneralTest.cs
index ea1d88f..98ff54f 100644
--- a/DeepLTests/GeneralTest.cs
+++ b/DeepLTests/GeneralTest.cs
@@ -5,11 +5,11 @@
using System;
using System.Collections.Generic;
using System.IO;
+using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
using DeepL;
using Xunit;
-using Xunit.Abstractions;
namespace DeepLTests {
public sealed class GeneralTest : BaseDeepLTest {
@@ -18,7 +18,7 @@ public sealed class GeneralTest : BaseDeepLTest {
///
[Fact]
public void TestVersion() {
- Assert.Equal("1.11.0", Translator.Version());
+ Assert.Equal("1.12.0", Translator.Version());
// Note the assembly version must remain unchanged for binary compatibility, excepting the major version.
Assert.Equal("1.0.0.0", typeof(Translator).Assembly.GetName().Version?.ToString());
@@ -43,9 +43,12 @@ public async Task TestExampleTranslation() {
[Fact]
public async Task TestDefaultUserAgentHeader() {
var mockHandler = getMockHandler("{\"character_count\": 180118,\"character_limit\": 1250000}");
- var translator = new Translator(AuthKey, new TranslatorOptions { ClientFactory = () => new HttpClientAndDisposeFlag {
- HttpClient = new HttpClient(mockHandler), DisposeClient = true,
- } });
+ var translator = new Translator(
+ AuthKey,
+ new TranslatorOptions {
+ ClientFactory = () =>
+ new HttpClientAndDisposeFlag { HttpClient = new HttpClient(mockHandler), DisposeClient = true, }
+ });
var usage = await translator.GetUsageAsync();
Assert.Single(mockHandler.requests);
var userAgentHeader = mockHandler.requests[0].Headers.UserAgent;
@@ -57,9 +60,13 @@ public async Task TestDefaultUserAgentHeader() {
[Fact]
public async Task TestOptInUserAgentHeader() {
var mockHandler = getMockHandler("{\"character_count\": 180118,\"character_limit\": 1250000}");
- var translator = new Translator(AuthKey, new TranslatorOptions { sendPlatformInfo = true, ClientFactory = () => new HttpClientAndDisposeFlag {
- HttpClient = new HttpClient(mockHandler), DisposeClient = true,
- } });
+ var translator = new Translator(
+ AuthKey,
+ new TranslatorOptions {
+ sendPlatformInfo = true,
+ ClientFactory = () =>
+ new HttpClientAndDisposeFlag { HttpClient = new HttpClient(mockHandler), DisposeClient = true, }
+ });
var usage = await translator.GetUsageAsync();
Assert.Single(mockHandler.requests);
var userAgentHeader = mockHandler.requests[0].Headers.UserAgent;
@@ -68,13 +75,16 @@ public async Task TestOptInUserAgentHeader() {
Assert.Contains("dotnet-clr/", userAgentHeader.ToString());
}
-
[Fact]
public async Task TestOptOutUserAgentHeader() {
var mockHandler = getMockHandler("{\"character_count\": 180118,\"character_limit\": 1250000}");
- var translator = new Translator(AuthKey, new TranslatorOptions { sendPlatformInfo = false, ClientFactory = () => new HttpClientAndDisposeFlag {
- HttpClient = new HttpClient(mockHandler), DisposeClient = true,
- } });
+ var translator = new Translator(
+ AuthKey,
+ new TranslatorOptions {
+ sendPlatformInfo = false,
+ ClientFactory = () =>
+ new HttpClientAndDisposeFlag { HttpClient = new HttpClient(mockHandler), DisposeClient = true, }
+ });
var usage = await translator.GetUsageAsync();
Assert.Single(mockHandler.requests);
var userAgentHeader = mockHandler.requests[0].Headers.UserAgent;
@@ -86,9 +96,14 @@ public async Task TestOptOutUserAgentHeader() {
[Fact]
public async Task TestDefaultUserAgentHeaderWithAppInfo() {
var mockHandler = getMockHandler("{\"character_count\": 180118,\"character_limit\": 1250000}");
- var translator = new Translator(AuthKey, new TranslatorOptions {sendPlatformInfo = true, appInfo = new AppInfo { AppName = "my-dotnet-test-app", AppVersion = "1.2.3"}, ClientFactory = () => new HttpClientAndDisposeFlag {
- HttpClient = new HttpClient(mockHandler), DisposeClient = true,
- }});
+ var translator = new Translator(
+ AuthKey,
+ new TranslatorOptions {
+ sendPlatformInfo = true,
+ appInfo = new AppInfo { AppName = "my-dotnet-test-app", AppVersion = "1.2.3" },
+ ClientFactory = () =>
+ new HttpClientAndDisposeFlag { HttpClient = new HttpClient(mockHandler), DisposeClient = true, }
+ });
var usage = await translator.GetUsageAsync();
Assert.Single(mockHandler.requests);
var userAgentHeader = mockHandler.requests[0].Headers.UserAgent;
@@ -101,9 +116,14 @@ public async Task TestDefaultUserAgentHeaderWithAppInfo() {
[Fact]
public async Task TestOptInUserAgentHeaderWithAppInfo() {
var mockHandler = getMockHandler("{\"character_count\": 180118,\"character_limit\": 1250000}");
- var translator = new Translator(AuthKey, new TranslatorOptions { sendPlatformInfo = true, appInfo = new AppInfo { AppName = "my-dotnet-test-app", AppVersion = "1.2.3" }, ClientFactory = () => new HttpClientAndDisposeFlag {
- HttpClient = new HttpClient(mockHandler), DisposeClient = true,
- } });
+ var translator = new Translator(
+ AuthKey,
+ new TranslatorOptions {
+ sendPlatformInfo = true,
+ appInfo = new AppInfo { AppName = "my-dotnet-test-app", AppVersion = "1.2.3" },
+ ClientFactory = () =>
+ new HttpClientAndDisposeFlag { HttpClient = new HttpClient(mockHandler), DisposeClient = true, }
+ });
var usage = await translator.GetUsageAsync();
Assert.Single(mockHandler.requests);
var userAgentHeader = mockHandler.requests[0].Headers.UserAgent;
@@ -113,13 +133,17 @@ public async Task TestOptInUserAgentHeaderWithAppInfo() {
Assert.Contains("my-dotnet-test-app/1.2.3", userAgentHeader.ToString());
}
-
[Fact]
public async Task TestOptOutUserAgentHeaderWithAppInfo() {
var mockHandler = getMockHandler("{\"character_count\": 180118,\"character_limit\": 1250000}");
- var translator = new Translator(AuthKey, new TranslatorOptions { sendPlatformInfo = false, appInfo = new AppInfo { AppName = "my-dotnet-test-app", AppVersion = "1.2.3" }, ClientFactory = () => new HttpClientAndDisposeFlag {
- HttpClient = new HttpClient(mockHandler), DisposeClient = true,
- } });
+ var translator = new Translator(
+ AuthKey,
+ new TranslatorOptions {
+ sendPlatformInfo = false,
+ appInfo = new AppInfo { AppName = "my-dotnet-test-app", AppVersion = "1.2.3" },
+ ClientFactory = () =>
+ new HttpClientAndDisposeFlag { HttpClient = new HttpClient(mockHandler), DisposeClient = true, }
+ });
var usage = await translator.GetUsageAsync();
Assert.Single(mockHandler.requests);
var userAgentHeader = mockHandler.requests[0].Headers.UserAgent;
@@ -227,9 +251,7 @@ public async Task TestProxyUsage() {
ServerUrl = ServerUrl,
ClientFactory =
() => {
- var handler = new System.Net.Http.HttpClientHandler() {
- Proxy = new System.Net.WebProxy(ProxyUrl), UseProxy = true,
- };
+ var handler = new HttpClientHandler() { Proxy = new WebProxy(ProxyUrl), UseProxy = true, };
return new HttpClientAndDisposeFlag {
HttpClient = new HttpClient(handler), DisposeClient = true,
diff --git a/DeepLTests/resources/example_document_template.docx b/DeepLTests/resources/example_document_template.docx
new file mode 100644
index 0000000..7ddbf3d
Binary files /dev/null and b/DeepLTests/resources/example_document_template.docx differ
diff --git a/DeepLTests/resources/example_presentation_template.pptx b/DeepLTests/resources/example_presentation_template.pptx
new file mode 100644
index 0000000..1e5f5f7
Binary files /dev/null and b/DeepLTests/resources/example_presentation_template.pptx differ
diff --git a/DeepLTests/resources/example_zip_template.zip b/DeepLTests/resources/example_zip_template.zip
new file mode 100644
index 0000000..f1ca638
Binary files /dev/null and b/DeepLTests/resources/example_zip_template.zip differ
diff --git a/README.md b/README.md
index 5daa66b..94c7256 100644
--- a/README.md
+++ b/README.md
@@ -216,6 +216,63 @@ application needs to execute these steps individually, you can instead use the f
- `Formality`: same as in [Text translation options](#text-translation-options).
- `GlossaryId`: same as in [Text translation options](#text-translation-options).
+- `EnableDocumentMinification`: A `bool` value. If set to `true`, the library will try to minify a document
+before translating it through the API, sending a smaller document if the file contains a lot of media. This is
+currently only supported for `pptx` and `docx` files. See also [Document minification](#document-minification).
+Note that this only works in the high-level `TranslateDocumentDownloadAsync` method, not
+`TranslateDocumentUploadAsync`. However, the behavior can be emulated by creating a new `DocumentMinifier`
+object and calling the minifier's methods in between.
+
+#### Document minification
+In some contexts, one can end up with large document files (e.g. PowerPoint presentations
+or Word files with many contributors, especially in a larger organization). However, the
+DeepL API enforces a limit of 30 MB for most of these files (see Usage Limits in the docs).
+In the case that most of this size comes from media included in the documents (e.g. images,
+videos, animations), document minification can help.
+In this case, the library will create a temporary directory to extract the document into,
+replace the large media with tiny placeholders, create a minified document, translate that
+via the API, and re-insert the original media into the original file. Please note that this
+requires a bit of additional (temporary) disk space, we recommend at least 2x the file size
+of the document to be translated.
+To use document minification, simply pass the option to the `TranslateDocumentAsync` function:
+```c#
+await translator.TranslateDocumentAsync(
+ inFile, outFile, "EN", "DE", new DocumentTranslateOptions { EnableDocumentMinification = true }
+);
+```
+In order to use document minification with the lower-level `TranslateDocumentUploadAsync`,
+`TranslateDocumentWaitUntilDoneAsync` and `TranslateDocumentDownloadAsync` methods as well as other details,
+see the `DocumentMinifier` class.
+Currently supported document types for minification:
+1. `pptx`
+2. `docx`
+ Currently supported media types for minification:
+1. `png`
+2. `jpg`
+3. `jpeg`
+4. `emf`
+5. `bmp`
+6. `tiff`
+7. `wdp`
+8. `svg`
+9. `gif`
+10. `mp4`
+11. `asf`
+12. `avi`
+13. `m4v`
+14. `mpg`
+15. `mpeg`
+16. `wmv`
+17. `mov`
+18. `aiff`
+19. `au`
+20. `mid`
+21. `midi`
+22. `mp3`
+23. `m4a`
+24. `wav`
+25. `wma`
+
### Glossaries