lessCode.net
Links
AFFILIATES
« No State Machines in Windows Workflow 4.0? | Main | Project Vector? You mean like “attack” vector? »
Friday
Jun192009

Convert Word Documents to PDF with an MSBuild Task

I needed to convert a lot of Microsoft Word documents to PDF format, as one step in a continuous integration build of a Visual Studio project. To save a lot of build time, an incremental build of this project needs to only spend time converting the Word documents that actually changed since the last build. Rather than write a Powershell script, or other such out-of-band mechanism, a simple custom MSBuild task does the trick nicely.

using System;
using System.Collections.Generic;
using System.IO;
using Microsoft.Build.Framework;
using Microsoft.Office.Interop.Word;

namespace MSBuild.Tasks {
  public class Doc2Pdf : Microsoft.Build.Utilities.Task {

    private static object missing = Type.Missing;

    [Required]
    public ITaskItem[] Inputs { get; set; }

    [Required]
    public ITaskItem[] Outputs { get; set; }

    [Output]
    public ITaskItem[] ConvertedFiles { get; set; }

    public override bool Execute() {
      _Application word = (_Application) new Application();
      _Document doc = null;

      try {
        List<ITaskItem> convertedFiles = new List<ITaskItem>();

        for (int i = 0; i < Inputs.Length; i++ ) {
          ITaskItem docFile = Inputs[i];
          ITaskItem pdfFile = Outputs[i];

          Log.LogMessage(MessageImportance.Normal, 
                         string.Format(
                           "DOC2PDF: Converting {0} -> {1}", 
                           docFile.ItemSpec, pdfFile.ItemSpec));

          FileInfo docFileInfo = new FileInfo(docFile.ItemSpec);
          if (!docFileInfo.Exists) {
            throw new FileNotFoundException("No such file", docFile.ItemSpec);
          }

          object docFileName = docFileInfo.FullName;

          FileInfo pdfFileInfo = new FileInfo(pdfFile.ItemSpec);

          object pdfFileName = pdfFileInfo.FullName;

          word.Visible = false;
          doc = word.Documents.Open(ref docFileName, 
                    ref missing, ref missing, ref missing, ref missing, ref missing, 
                    ref missing, ref missing, ref missing, ref missing, ref missing, 
                    ref missing, ref missing, ref missing, ref missing, ref missing);

          object fileFormat = 17;
          doc.SaveAs(ref pdfFileName, ref fileFormat, 
                     ref missing, ref missing, ref missing, ref missing, ref missing, 
                     ref missing, ref missing, ref missing, ref missing, ref missing, 
                     ref missing, ref missing, ref missing, ref missing);

          convertedFiles.Add(pdfFile);
        }

        return true;
      }
      catch (Exception ex) {
        Log.LogErrorFromException(ex);
        return false;
      }
      finally {
        object saveChanges = false;
        if (doc != null)
        {
          doc.Close(ref saveChanges, ref missing, ref missing);
        }
        word.Quit(ref saveChanges, ref missing, ref missing);
      }
    }
  }
}

The Office interop APIs make this code messier than it needs to be, with all the optional parameters that we don’t care about in this case. Fortunately this story gets better in C#4.0 with the new optional parameters feature.

Assuming you build the above into an assembly called Doc2PdfTask.dll, and put the assembly in the MSBuild extensions directory, you can then set up your project file to do something like this:

<Project DefaultTargets="Build"
         xmlns="http://schemas.microsoft.com/developer/msbuild/2003"
         ToolsVersion="3.5">
  <UsingTask AssemblyFile="$(MSBuildExtensionsPath)\Doc2PdfTask.dll"
             TaskName="MSBuild.Tasks.Doc2Pdf" />
  <Target Name="Build"
          DependsOnTargets="Convert" />
  <Target Name="Convert"
          Inputs="@(WordDocuments)"
          Outputs="@(WordDocuments->'%(RelativeDir)%(FileName).pdf')">
    <Doc2Pdf Inputs="%(WordDocuments.Identity)"
              Outputs="%(RelativeDir)%(FileName).pdf">
      <Output TaskParameter="ConvertedFiles"
              ItemName="PdfDocuments" />
    </Doc2Pdf>
  </Target>
  <ItemGroup>
    <WordDocuments Include="**\*.doc" />
    <WordDocuments Include="**\*.docx" />
  </ItemGroup>
</Project>

All Word documents under the project directory (recursively), will be converted if necessary. Obviously, you’ll need Microsoft Word installed for this to work (I have Office 2007).

PrintView Printer Friendly Version

EmailEmail Article to Friend

Reader Comments

There are no comments for this journal entry. To create a new comment, use the form below.

PostPost a New Comment

Enter your information below to add a new comment.

My response is on my own website »
Author Email (optional):
Author URL (optional):
Post:
 
All HTML will be escaped. Hyperlinks will be created for URLs automatically.