Image auto-orient with Tesseract and ImageMagick

deceptikon 0 Tallied Votes 2K Views Share

Warning: This code is tested, but not as-is. I combined multiple files to simplify the snippet.

One of my specialties is document imaging, and custom tools to handle it. A common request for both scanner sources and file import is correction of when pages are not properly oriented. To avoid forcing users to correct this in their scaning utility of choice, automatic detection and rotation of these images is a nice feature.

This snippet is a command-oriented solution using the Tesseract OCR engine and a .NET ImageMagick library (both available on NuGet). It might be used like this:

using (var fs = File.Open(file, FileMode.Open))
{
    try
    {           
        using (var ms = new AutoOrient().Run(fs, new AutoOrientOptions()))
        {
            // Close the original file so we can manage it as necessary
            fs.Close();

            // Overwrite the original file
            File.WriteAllBytes(file, ms.ToArray());
        }
    }
    catch (Exception ex)
    {
        Log("Error running command" + ex.ToString());
    }
}

There's nothing unusual going down, but I'm offering this to save others time in developing something similar. The curious may find the simple command-oriented design interesting as well. Questions and comments are welcome. :)

Note that Tesseract OCR requires external training files that can be downloaded from the https://code.google.com/p/tesseract-ocr/downloads/list. The files I'm using are tesseract-ocr-3.02.eng.tar.gz and tesseract-ocr-3.01.osd.tar.gz. The latter is critical for orientation information.

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.IO;
using ImageMagick;
using Tesseract;

namespace JRD.Imaging
{
    #region Imaging Command Interfaces
    /// <summary>
    /// Defines an interface for command options.
    /// </summary>
    public interface IPreprocessOptions
    {
        /// <summary>
        /// Extracts serialized option data.
        /// </summary>
        /// <param name="serializedValue">The serialized option data.</param>
        void Load(string serializedValue);

        /// <summary>
        /// Gets the current option data as a serialized string.
        /// </summary>
        string Settings { get; }
    }

    /// <summary>
    /// Defines an interface for image preprocessing commands.
    /// </summary>
    public interface IPreprocess
    {
        /// <summary>
        /// Runs the command on an image stream with the provided options.
        /// </summary>
        /// <param name="image">The image stream being processed.</param>
        /// <param name="options">Command-specific options for the command.</param>
        /// <returns>The processed image in a new stream.</returns>
        MemoryStream Run(Stream image, IPreprocessOptions options);
    }
    #endregion

    #region Imaging Command Options
    public abstract class CommandOptions : IPreprocessOptions, INotifyPropertyChanged
    {
        /// <summary>
        /// Gets a serialized string representation of the options object.
        /// </summary>
        public virtual string Settings
        {
            get { return ToString(); }
        }
        /// <summary>
        /// Represents an event that is raised when a property in the object changes.
        /// </summary>
        public event PropertyChangedEventHandler PropertyChanged = delegate { };

        /// <summary>
        /// Fires the PropertyChanged event.
        /// </summary>
        /// <param name="propertyName">Name of the property being changed.</param>
        protected void OnPropertyChanged(string propertyName)
        {
            PropertyChanged(this, new PropertyChangedEventArgs(propertyName));
        }

        /// <summary>
        /// Deserializes the provided string into the current options object.
        /// </summary>
        /// <param name="serializedValue">A serialized string corresponding to the result of ToString().</param>
        public virtual void Load(string serializedValue)
        {
            // No work to do
        }

        #region Miscellaneous Helpers
        /// <summary>
        /// Convenience method for extracting a configuration setting from the settings collection.
        /// </summary>
        /// <typeparam name="T">Desired type after parsing the setting.</typeparam>
        /// <param name="settings">The settings collection.</param>
        /// <param name="key">The key name for the desired setting.</param>
        /// <param name="defaultOnEmpty">Value to return if the setting has no value.</param>
        /// <param name="parseSetting">Action for parsing a setting value string to the desired type.</param>
        /// <returns>The parsed setting value, or defaultOnEmpty if the setting had no value.</returns>
        protected T GetSetting<T>(List<KeyValuePair<string, string>> settings, string key, T defaultOnEmpty, Func<string, T> parseSetting)
        {
            var setting = settings.Find(s => s.Key.Equals(key, StringComparison.InvariantCultureIgnoreCase));
            var empty = default(KeyValuePair<string, string>);

            if (setting.Equals(empty))
            {
                return defaultOnEmpty;
            }
            else
            {
                return parseSetting(setting.Value);
            }
        }
        #endregion
    }

    /// <summary>
    /// Represents rotation options for the Rotate command.
    /// </summary>
    public class RotateOptions : CommandOptions
    {
        #region Private Fields
        private double _angle = 180.0;
        #endregion

        #region Public Properties
        /// <summary>
        /// Gets or sets the rotation angle in degrees.
        /// </summary>
        public double Angle
        {
            get { return _angle; }
            set
            {
                _angle = value;
                OnPropertyChanged("Angle");
                OnPropertyChanged("Settings");
            }
        }
        #endregion

        #region Overrides
        /// <summary>
        /// Returns a string representation of the object.
        /// </summary>
        /// <returns>The string representation of the object.</returns>
        public override string ToString()
        {
            return "Angle=" + Angle + ";" + base.ToString();
        }
        #endregion

        /// <summary>
        /// Deserializes the provided string into the current options object.
        /// </summary>
        /// <param name="serializedValue">A serialized string corresponding to the result of ToString().</param>
        public override void Load(string serializedValue)
        {
            base.Load(serializedValue);

            var settings = NameValueSettingsParser.SplitString(serializedValue);

            Angle = GetSetting<double>(settings, "Angle", 180.0, x => double.Parse(x));
        }
    }

    /// <summary>
    /// Represents options for the AutoOrient command.
    /// </summary>
    public class AutoOrientOptions : CommandOptions
    {
        // All work done in the base class
    }
    #endregion

    #region Imaging Commands
    /// <summary>
    /// Performs multi page rotation on supported images.
    /// </summary>
    public class Rotate : IPreprocess
    {
        /// <summary>
        /// Runs the command on the provided image.
        /// </summary>
        /// <param name="image">A stream representing the image bytes.</param>
        /// <param name="options">Corresponding options for the command.</param>
        /// <returns>A stream of bytes representing the result image after processing.</returns>
        public MemoryStream Run(Stream image, IPreprocessOptions options)
        {
            if (!(options is RotateOptions))
            {
                throw new ArgumentException("RotateOptions expected");
            }

            var opt = options as RotateOptions;
            var ms = new MemoryStream();

            image.Seek(0, SeekOrigin.Begin);

            using (var frames = new MagickImageCollection(image))
            {
                foreach (var frame in frames)
                {
                    frame.Rotate(opt.Angle);
                }

                frames.RePage();
                frames.Write(ms);
            }

            return ms;
        }
    }

    /// <summary>
    /// Performs auto-orientation on supported images.
    /// </summary>
    public class AutoOrient : IPreprocess
    {
        /// <summary>
        /// Runs the command on the provided image.
        /// </summary>
        /// <param name="image">A stream representing the image bytes.</param>
        /// <param name="options">Corresponding options for the command.</param>
        /// <returns>A stream of bytes representing the result image after processing.</returns>
        public MemoryStream Run(Stream image, IPreprocessOptions options)
        {
            if (!(options is AutoOrientOptions))
            {
                throw new ArgumentException("AutoOrientOptions expected");
            }

            image.Seek(0, SeekOrigin.Begin);

            var angle = 0;

            using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
            {
                // Enable orientation detection
                engine.DefaultPageSegMode = PageSegMode.AutoOsd;

                using (var file = Pix.LoadTiffFromMemory(ImageTools.GetBytes(image)))
                {
                    using (var page = engine.Process(file))
                    {
                        using (var layout = page.AnalyseLayout())
                        {
                            switch (layout.GetProperties().Orientation)
                            {
                                case Orientation.PageDown:
                                    angle = -180;
                                    break;
                                case Orientation.PageLeft:
                                    angle = 90;
                                    break;
                                case Orientation.PageRight:
                                    angle = -90;
                                    break;
                            }
                        }
                    }
                }
            }

            return new Rotate().Run(image, new RotateOptions() { Angle = angle });
        }
    }
    #endregion

    #region Helpers
    /// <summary>
    /// Provides non-command image tools.
    /// </summary>
    public static class ImageTools
    {
        /// <summary>
        /// Converts a stream to its corresponding byte array.
        /// </summary>
        /// <param name="source">The source stream.</param>
        /// <returns>The extracted content byte array.</returns>
        public static byte[] GetBytes(Stream source)
        {
            byte[] bytes;

            source.Seek(0, SeekOrigin.Begin);
            bytes = new byte[source.Length];
            source.Read(bytes, 0, bytes.Length);

            return bytes;
        }
    }
    /// <summary>
    /// Provides utilities for working with serialized name-value setting pairs.
    /// </summary>
    public class NameValueSettingsParser
    {
        /// <summary>
        /// Takes a settings serialized string and returns a list of key-value pairs with the settings.
        /// </summary>
        /// <param name="settings">String that follows the "name1=value1;name2=value2" format.</param>
        /// <param name="settingSplitChar">Default of a semicolon separating individual settings.</param>
        /// <param name="valueSplitChar">Default of an equal sign separting the name from the value of a setting.</param>
        /// <returns>A list of extracted key-value pairs.</returns>
        public static List<KeyValuePair<string, string>> SplitString(string settings, char settingSplitChar = ';', char valueSplitChar = '=')
        {
            var ret = new List<KeyValuePair<string, string>>();

            if (string.IsNullOrEmpty(settings))
            {
                return ret;
            }

            var itemSep = new char[] { settingSplitChar };
            var pairSep = new char[] { valueSplitChar };

            foreach (var item in settings.Split(itemSep, StringSplitOptions.RemoveEmptyEntries))
            {
                var setting = item.Split(pairSep, StringSplitOptions.RemoveEmptyEntries);

                if (setting.Length == 2)
                {
                    ret.Add(new KeyValuePair<string, string>(setting[0], setting[1]));
                }
            }

            return ret;
        }
    }
    #endregion
}