Attribute VB_Name = "Word2Moin"
'Microsoft Word 2000 to MoinMoin converter.
' by John Whitlock (John-Whitlock@ieee.org), 2003
' This code is ugly, slow, and might not work the way you want it to.
' It is also public domain, so have fun.
' Feel free to contact me, but don't expect much help...
'
'To install:
' Start Word
' Start Visual Basic Editor (from Tools menu or Alt-F11)
' Import the module (File -> Import) into the Normal template
'
'To Run:
' Select Tools -> Macro -> Macros
' Select Word2Moin
' Select "Run"
'
'To Cancel: Hit Ctrl-Break
'
'What it does:
' Converts the Word field "TOC" into a Moin table with inter-document links
' Converts Word Headings into Moin Headlines 
'  Inserts Anchor() macro and section number, if TOC was found
' Converts Bold, Italic, Underlined, Superscript, and Subscript to Moin equivalents
' Converts Lists to Moin lists (does not handle multi-level lists well)
' Converts Tabs to Moin tables
' Converts Tables to Moin Tables (does not handle merged and empty cells well)
' Replaces page breaks with Moin line rules
' Separates paragraphs with extra line breaks
' Copies the results to the clipboard
'
'What doesn't work well:
' Section numbers - sometimes, the algorithm misses a section
' Multi-level lists - converted to flat lists
' Letter lists (a, b, c) - converted to numbered lists
' Empty Table Cells - Sometimes converted, sometimes not
' Merged Table Cells - No support for cell spanning
' Character conversion - dashes, left/right quote marks not converted to plain ASCII equivalents
'
'The character conversion issue might keep your page from validating as good HTML.
' To have Word convert for you, save the page as plain text.
'

Type TOC_Entry
    Number As String
    Name As String
    Found As Boolean
End Type

Enum eFormatType
    eftBold
    eftItalic
    eftUnder
    eftSuper
    eftSub
End Enum

Type TableCellFormat
    FirstCell As Boolean    'Cell is first cell in row
    LastCell As Boolean     'Cell is last cell in row
    Color As String         'Cell's background color
    HorizAlign As String    'L, C, R
    VertAlign As String     'T, C, B
    RowSpan As Integer      '0 or 1, for now
    ColSpan As Integer      '0 or 1, for now
End Type

Option Base 1
Option Explicit
Dim TOC_Entries() As TOC_Entry
Dim IsTOC As Boolean

Sub Word2Moin()
    
    Application.ScreenUpdating = False
    
    'Find the table of contents, if there is one
    IsTOC = False
    ConvertTableOfContents
    
    'Convert Headings
    Call ConvertHeading(wdStyleHeading1, "= ", " =")
    Call ConvertHeading(wdStyleHeading2, "== ", " ==")
    Call ConvertHeading(wdStyleHeading3, "=== ", " ===")
    Call ConvertHeading(wdStyleHeading4, "==== ", " ====")
    Call ConvertHeading(wdStyleHeading5, "===== ", " =====")
    
    ConvertFormat eftBold
    ConvertFormat eftItalic
    ConvertFormat eftUnder
    ConvertFormat eftSuper
    ConvertFormat eftSub
    
    ConvertLists
    ConvertTabs
    ConvertTables
    ReplacePageBreaks
    ExpandLineBreaks
    
ExitWord2Wiki:
    ' Copy to clipboard
    ActiveDocument.Content.Copy
    
    Application.ScreenUpdating = True
End Sub

Private Sub ExpandLineBreaks()
    Dim para As Paragraph
    For Each para In ActiveDocument.Paragraphs
        'Debug.Print para.Range.Text
        If _
           Not Len(para.Range.Text) = 1 And _
           InStr(para.Range.Text, "||") = 0 And _
           Not (Left(para.Range.Text, 1)) = "=" And _
           Not (Left(para.Range.Text, 2)) = "[[" And _
           Not (Left(para.Range.Text, 4)) = "----" And _
           True Then
            'Debug.Print para.Range.Text
            para.Range.InsertAfter vbCr
        End If
    Next para
End Sub

Private Sub ConvertTableOfContents()
    With ActiveDocument
        If .Fields.Count >= 1 Then
            Dim C As Integer
            Dim Max As Integer
                
            'Search Fields for a Table Of Contents
            For C = 1 To .Fields.Count
                'If we find a Table of Contents, process it
                If InStr(LTrim(.Fields(C).Code), "TOC") = 1 Then
                    .Fields(C).Update
                    
                    Dim TOC As String
                    Dim Entry As String
                    Dim LastPos As Long
                    Dim Pos As Long
                    Dim FirstTime As Boolean
                    
                    TOC = .Fields(C).Result
                    
                    'Get each entry in the table, insert into array
                    LastPos = 1
                    Pos = InStr(TOC, vbCr)
                    FirstTime = True
                    Do While (Pos > 0)
                        Dim F1 As Long
                        Dim F2 As Long
                        Dim EntryNum As Long
                        
                        Entry = Trim(Mid(TOC, LastPos, Pos - LastPos))
                        Entry = Replace(Entry, "^l", "")
                        
                        If (Len(Entry) > 0) Then
                            If (FirstTime) Then
                                EntryNum = 1
                                ReDim TOC_Entries(1)
                                FirstTime = False
                            Else
                                EntryNum = UBound(TOC_Entries) + 1
                                ReDim Preserve TOC_Entries(EntryNum)
                            End If
                                            
                            F1 = InStr(Entry, vbTab)
                        
                            If (F1 > 0) Then
                                F2 = InStr(F1 + 1, Entry, vbTab)
                                If (F2 = 0) Then
                                    'Handle Appendix sections'
                                    F1 = InStr(Entry, "-")
                                    F2 = InStr(F1 + 1, Entry, vbTab)
                                    If (F2 = 0) Then
                                        F1 = InStr(Entry, "  ")
                                        F2 = InStr(F1 + 1, Entry, vbTab)
                                    End If
                                    
                                    If (F1 = 0) Then
                                        'Give up on finding a number
                                        F1 = InStr(1, Entry, vbTab)
                                        TOC_Entries(EntryNum).Number = ""
                                        TOC_Entries(EntryNum).Name = Left(Entry, F1)
                                    Else
                                        TOC_Entries(EntryNum).Number = Replace(Mid(Entry, 1, F1 - 1), " ", "")
                                        TOC_Entries(EntryNum).Name = Mid(Entry, F1 + 1, F2 - F1 - 1)
                                    End If
                                Else
                                    TOC_Entries(EntryNum).Number = Trim(Mid(Entry, 1, F1 - 2))
                                    TOC_Entries(EntryNum).Name = Mid(Entry, F1 + 1, F2 - F1 - 1)
                                End If
                            End If
                            
                            'Check for null entries
                            If (Len(TOC_Entries(EntryNum).Number) = 0 And _
                               (Len(TOC_Entries(EntryNum).Name) = 0)) Then
                                ReDim Preserve TOC_Entries(EntryNum - 1)
                            Else
                                TOC_Entries(EntryNum).Found = False
                            End If
                                                    
                        End If
                        LastPos = Pos + 1
                        Pos = InStr(LastPos, TOC, vbCr)
                    Loop
                    
                    .Fields(C).Select
                    
                    'Delete Word version, insert MoinMoin version
                    With Selection
                        .Delete
                        .InsertAfter ("'''Table Of Contents'''" & vbCr & vbCr)
                        
                        For Pos = 1 To UBound(TOC_Entries)
                        'Create a table, with a slight indent for entries that are not top-level
                            .InsertAfter ("||")
                            If (Len(TOC_Entries(Pos).Number) = 1) Then
                                .InsertAfter ("||<(>")
                            Else
                                .InsertAfter (" ||")
                            End If
                            .InsertAfter ("'''" & TOC_Entries(Pos).Number & "'''||" & _
                                "[#s" & TOC_Entries(Pos).Number & " " & TOC_Entries(Pos).Name & "]||" & vbCr)
                        Next
                    End With
                    
                    'Stop looking for Table Of Contents
                    IsTOC = True
                    Exit For
                End If
            Next
        End If
    End With
End Sub

Private Sub ConvertHeading(headingStyle As Long, Optional preString As String = "", Optional postString As String = "")
    Dim normalStyle As Style
    Set normalStyle = ActiveDocument.Styles(wdStyleNormal)
    
    ActiveDocument.Select
    
    With Selection.find
    
        .ClearFormatting
        .Style = ActiveDocument.Styles(headingStyle)
        .Text = ""
        
        .format = True
        .MatchCase = False
        .MatchWholeWord = False
        .MatchWildcards = False
        .MatchSoundsLike = False
        .MatchAllWordForms = False
        
        .Forward = True
        .Wrap = wdFindContinue
        
        Do While .Execute
            With Selection
                Dim Heading As String
                
                Heading = .Text
                .Style = normalStyle
                .Collapse
                .MoveEndUntil vbCr
                .Delete
                
                'Eliminate any manual form feeds
                Heading = Replace(Heading, vbFormFeed, "")
                
                'Replace any newlines with spaces
                Heading = Replace(Heading, vbCr, " ")
                
                'Removed leading / training spaces
                Heading = Trim(Heading)
                                                       
                'Search the TOC entries for this section, insert bookmark etc.
                If (IsTOC) Then
                    Dim E As Long
                    For E = 1 To UBound(TOC_Entries)
                        If (Not TOC_Entries(E).Found) Then
                            If (StrComp(Heading, TOC_Entries(E).Name) = 0) Then
                                .InsertBefore "[[Anchor(s" & TOC_Entries(E).Number & ")]]" & vbCr
                                Heading = TOC_Entries(E).Number & " " & Heading
                                TOC_Entries(E).Found = True
                                Exit For
                            End If
                        End If
                    Next E
                End If
                                                       
                'Print the Heading
                .InsertAfter preString & Heading & postString
            End With
        Loop
    End With
End Sub

Private Sub ReplacePageBreaks()
    
    Selection.find.ClearFormatting
    Selection.find.Replacement.ClearFormatting
    With Selection.find
        .Text = "^m"
        .Replacement.Text = "----" & vbCr
        .Forward = True
        .Wrap = wdFindContinue
        .format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchWildcards = False
        .MatchSoundsLike = False
        .MatchAllWordForms = False
    End With
    Selection.find.Execute Replace:=wdReplaceAll
    
    Selection.find.ClearFormatting
    Selection.find.Replacement.ClearFormatting
    With Selection.find
        .Text = "^b"
        .Replacement.Text = "----" & vbCr
        .Forward = True
        .Wrap = wdFindContinue
        .format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchWildcards = False
        .MatchSoundsLike = False
        .MatchAllWordForms = False
    End With
    Selection.find.Execute Replace:=wdReplaceAll
    
End Sub

Private Sub ConvertFormat(format As eFormatType)
    ActiveDocument.Select
    With Selection.find
        Dim pre As String
        Dim post As String
    
        .ClearFormatting
        Select Case format
            Case eftBold:
                .Font.Bold = True
                pre = "'''"
                post = "'''"
            Case eftItalic:
                .Font.Italic = True
                pre = "''"
                post = "''"
            Case eftUnder:
                .Font.Underline = wdUnderlineSingle
                pre = "__"
                post = "__"
            Case eftSuper:
                .Font.Superscript = True
                pre = "^"
                post = "^"
            Case eftSub:
                .Font.Subscript = True
                pre = ",,"
                post = ",,"
        End Select
        
        .Text = ""
        
        .format = True
        .MatchCase = False
        .MatchWholeWord = False
        .MatchWildcards = False
        .MatchSoundsLike = False
        .MatchAllWordForms = False
        
        .Forward = True
        .Wrap = wdFindContinue
        
        Do While .Execute
            With Selection
                If InStr(1, .Text, vbCr) Then
                    ' Just process the chunk before any newline characters
                    ' We'll pick-up the rest with the next search
                    .Collapse
                    .MoveEndUntil vbCr
                End If

                ' Don't bother to markup whitespace (prevents a loop, as well)
                If (.Text = vbCr Or .Text = " " Or .Text = "") Then
                    .MoveRight
                Else
                    .InsertBefore pre
                    .InsertAfter post
                End If

                Select Case format
                    Case eftBold:       .Font.Bold = False
                    Case eftItalic:     .Font.Italic = False
                    Case eftUnder:      .Font.Underline = wdUnderlineNone
                    Case eftSuper:      .Font.Superscript = False
                    Case eftSub:        .Font.Subscript = False
                End Select
            End With
        Loop
    End With


End Sub

Private Function OutlineLevelToNumber(level As WdOutlineLevel) As Integer
    Select Case level
    Case wdOutlineLevel1: OutlineLevelToNumber = 2
    Case wdOutlineLevel2: OutlineLevelToNumber = 3
    Case wdOutlineLevel3: OutlineLevelToNumber = 4
    Case wdOutlineLevel4: OutlineLevelToNumber = 5
    Case wdOutlineLevel5: OutlineLevelToNumber = 6
    Case wdOutlineLevel6: OutlineLevelToNumber = 7
    Case wdOutlineLevel7: OutlineLevelToNumber = 8
    Case wdOutlineLevel8: OutlineLevelToNumber = 9
    Case wdOutlineLevel9: OutlineLevelToNumber = 10
    Case Else: OutlineLevelToNumber = 1
    End Select
End Function

'Does not handle nested lists
Private Sub ConvertLists()
    Dim para As Paragraph
    For Each para In ActiveDocument.ListParagraphs
        With para.Range
            If .ListFormat.ListType = wdListBullet Then
                '.InsertBefore String(OutlineLevelToNumber(para.OutlineLevel), " ") & "* "
                .InsertBefore " * "
            Else
                '.InsertBefore String(OutlineLevelToNumber(para.OutlineLevel), " ") & "1. "
                .InsertBefore " 1. "
            End If
            
            .ListFormat.RemoveNumbers
        End With
    Next para
End Sub

Private Function ColorToWiki(Color As Long) As String
    Dim raw As String
    raw = Hex(Color)
    
    If (raw = "FF000000") Then
        'Plain White
        ColorToWiki = "FFFFFF"
        Exit Function
    End If
    
    'Trim down long ones
    If (Len(raw) > 6) Then raw = Right(raw, 6)
    
    'Lengthen short ones
    Do While (Len(raw) < 6)
        raw = "0" & raw
    Loop
    
    'Swap Order
    Dim C2W As String
    C2W = Mid(raw, 5, 2) & Mid(raw, 3, 2) & Mid(raw, 1, 2)
    
    ColorToWiki = C2W
End Function

Private Sub ConvertTabs()
    Dim para As Paragraph
    For Each para In ActiveDocument.Paragraphs
        If (InStr(1, para.Range.Text, vbTab)) Then
            para.Range.InsertBefore "||"
            para.Range.Select
            Selection.Collapse
            Selection.MoveEndUntil vbCr
            Selection.InsertAfter "||"
        End If
    Next para
End Sub

'This method doesn't handle merged cells at all
'It will convert all tabs to ||, though
Private Sub ConvertTables()
    Dim thisTable As table
    For Each thisTable In ActiveDocument.Tables
        'Determine how many rows and columns there are
        Dim tableRow, tableCol As Long
        Dim tableMaxRow, tableMaxCol As Long
        thisTable.Select
        tableMaxRow = Selection.Information(wdMaximumNumberOfRows)
        tableMaxCol = Selection.Information(wdMaximumNumberOfColumns)
        
        'Create format arrays for mapping
        Dim tableFormats() As TableCellFormat
        ReDim tableFormats(tableMaxRow, tableMaxCol)
        Dim R, C As Long
        For R = 1 To tableMaxRow
            For C = 1 To tableMaxCol
                tableFormats(R, C).FirstCell = False
                tableFormats(R, C).LastCell = False
                tableFormats(R, C).Color = "FFFFFF"
                tableFormats(R, C).HorizAlign = "C"
                tableFormats(R, C).VertAlign = "C"
                tableFormats(R, C).RowSpan = 0
                tableFormats(R, C).ColSpan = 0
            Next C
        Next R
        
        ''Check format of each cell
        thisTable.Select
        Dim thisCell As Cell
        For Each thisCell In thisTable.Range.Cells
            With thisCell
                C = .ColumnIndex
                R = .RowIndex
                If (C = 1) Then tableFormats(R, C).FirstCell = True
                If .Range.Information(wdMaximumNumberOfColumns) = C Then tableFormats(R, C).LastCell = True
                'Information(wdAtEndOfRowMarker) Then tableFormats(R, C).LastCell = True
                tableFormats(R, C).Color = ColorToWiki(.Range.Shading.BackgroundPatternColor)
                If .Range.Paragraphs(1).Alignment = wdAlignParagraphLeft Then tableFormats(R, C).HorizAlign = "L"
                If .Range.Paragraphs(1).Alignment = wdAlignParagraphRight Then tableFormats(R, C).HorizAlign = "R"
                If .VerticalAlignment = wdCellAlignVerticalTop Then tableFormats(R, C).VertAlign = "T"
                If .VerticalAlignment = wdCellAlignVerticalBottom Then tableFormats(R, C).VertAlign = "B"
                'For now, I can't think of a way of making this accurate
                tableFormats(R, C).RowSpan = 1
                tableFormats(R, C).ColSpan = 1
            End With
        Next thisCell
        
        'You may be asking why this is a seperate step.  It's a good question.
        'It's mostly because determining the RowSpan and ColSpan might require a seperate step
        For Each thisCell In thisTable.Range.Cells
            If (Len(thisCell.Range.Text) > 2) Then
                With thisCell
                    'Convert cell contents
                    Dim rawText As String
                    Dim endText As String
                    'Toss out the carriage return
                    rawText = Left(.Range.Text, Len(.Range.Text) - 2)
                    endText = Right(.Range.Text, 1)
                    Dim newText As String
                    newText = ""
                    newText = Replace(rawText, vbCr, "[[BR]]")
                    'If (InStr(1, rawText, vbCr)) Then
                    '    Do While (Len(rawText) > 0)
                    '        Select Case Left(rawText, 1)
                    '        Case vbCr: newText = newText & "[[BR]]"
                    '        'Case vbLf: newText = newText & "[[BR]]"
                    '        Case Else: newText = newText & Left(rawText, 1)
                    '        End Select
                    '        rawText = Mid(rawText, 2)
                    '    Loop
                    'Else
                    '    newText = rawText
                    'End If
                    newText = newText & endText
                    
                    C = .ColumnIndex
                    R = .RowIndex
                    Dim format As String
                    Dim formatStarted As Boolean
                    format = ""
                    formatStarted = False
                    If tableFormats(R, C).FirstCell Then format = format & "||"
                                   
                    If tableFormats(R, C).ColSpan = 1 Then
                        If tableFormats(R, C).HorizAlign <> "L" Then
                            If (Not formatStarted) Then
                                formatStarted = True
                                format = format & "<"
                            End If
                            If tableFormats(R, C).HorizAlign = "C" Then
                                format = format & ":"
                            Else
                                format = format & ")"
                            End If
                        End If
                    Else
                        If tableFormats(R, C).HorizAlign <> "C" Then
                            If (Not formatStarted) Then
                                formatStarted = True
                                format = format & "<"
                            End If
                            If tableFormats(R, C).HorizAlign = "L" Then
                                format = format & "("
                            Else
                                format = format & ")"
                            End If
                        End If
                    End If
                    
                    If tableFormats(R, C).VertAlign <> "C" Then
                        If (Not formatStarted) Then
                            formatStarted = True
                            format = format & "<"
                        End If
                        If tableFormats(R, C).VertAlign = "T" Then
                            format = format & "^"
                        Else
                            format = format & "v"
                        End If
                    End If
                    
                    'Row Span - always 1, no action
                    'Col Span - always 1, no action
                    
                    'Color must be last
                    If tableFormats(R, C).Color <> "FFFFFF" Then
                        If (Not formatStarted) Then
                            formatStarted = True
                            format = format & "<"
                        End If
                        format = format & "#" & tableFormats(R, C).Color
                    End If
                    
                    If (formatStarted) Then format = format & ">"
                    
                    .Range.Text = format & newText
                    
                    If (tableFormats(R, C).LastCell) Then
                        .Range.InsertAfter "||"
                    End If
                End With
            End If
        Next thisCell
        
        'Exit Sub
        
        'Convert the table to text, convert tabs to "||"
        Dim aRange As Range
        Set aRange = thisTable.ConvertToText(wdSeparateByTabs)
        aRange.Select
        With Selection.find
            .ClearFormatting
            .Replacement.ClearFormatting
            .Text = "^t"
            .Replacement.Text = " ||"
            .Forward = True
            .Wrap = wdFindContinue
            .format = False
            .MatchCase = False
            .MatchWholeWord = False
            .MatchWildcards = False
            .MatchSoundsLike = False
            .MatchAllWordForms = False
            .Execute Replace:=wdReplaceAll
        End With
                    
    Next thisTable
End Sub
