Attribute VB_Name = "Word2Moin" 'Microsoft Word 2000 to MoinMoin converter. ' by John Whitlock (John-Whitlock@ieee.org), 2003 ' This code is ugly, slow, and might not work the way you want it to. ' It is also public domain, so have fun. ' Feel free to contact me, but don't expect much help... ' 'To install: ' Start Word ' Start Visual Basic Editor (from Tools menu or Alt-F11) ' Import the module (File -> Import) into the Normal template ' 'To Run: ' Select Tools -> Macro -> Macros ' Select Word2Moin ' Select "Run" ' 'To Cancel: Hit Ctrl-Break ' 'What it does: ' Converts the Word field "TOC" into a Moin table with inter-document links ' Converts Word Headings into Moin Headlines ' Inserts Anchor() macro and section number, if TOC was found ' Converts Bold, Italic, Underlined, Superscript, and Subscript to Moin equivalents ' Converts Lists to Moin lists (does not handle multi-level lists well) ' Converts Tabs to Moin tables ' Converts Tables to Moin Tables (does not handle merged and empty cells well) ' Replaces page breaks with Moin line rules ' Separates paragraphs with extra line breaks ' Copies the results to the clipboard ' 'What doesn't work well: ' Section numbers - sometimes, the algorithm misses a section ' Multi-level lists - converted to flat lists ' Letter lists (a, b, c) - converted to numbered lists ' Empty Table Cells - Sometimes converted, sometimes not ' Merged Table Cells - No support for cell spanning ' Character conversion - dashes, left/right quote marks not converted to plain ASCII equivalents ' 'The character conversion issue might keep your page from validating as good HTML. ' To have Word convert for you, save the page as plain text. ' Type TOC_Entry Number As String Name As String Found As Boolean End Type Enum eFormatType eftBold eftItalic eftUnder eftSuper eftSub End Enum Type TableCellFormat FirstCell As Boolean 'Cell is first cell in row LastCell As Boolean 'Cell is last cell in row Color As String 'Cell's background color HorizAlign As String 'L, C, R VertAlign As String 'T, C, B RowSpan As Integer '0 or 1, for now ColSpan As Integer '0 or 1, for now End Type Option Base 1 Option Explicit Dim TOC_Entries() As TOC_Entry Dim IsTOC As Boolean Sub Word2Moin() Application.ScreenUpdating = False 'Find the table of contents, if there is one IsTOC = False ConvertTableOfContents 'Convert Headings Call ConvertHeading(wdStyleHeading1, "= ", " =") Call ConvertHeading(wdStyleHeading2, "== ", " ==") Call ConvertHeading(wdStyleHeading3, "=== ", " ===") Call ConvertHeading(wdStyleHeading4, "==== ", " ====") Call ConvertHeading(wdStyleHeading5, "===== ", " =====") ConvertFormat eftBold ConvertFormat eftItalic ConvertFormat eftUnder ConvertFormat eftSuper ConvertFormat eftSub ConvertLists ConvertTabs ConvertTables ReplacePageBreaks ExpandLineBreaks ExitWord2Wiki: ' Copy to clipboard ActiveDocument.Content.Copy Application.ScreenUpdating = True End Sub Private Sub ExpandLineBreaks() Dim para As Paragraph For Each para In ActiveDocument.Paragraphs 'Debug.Print para.Range.Text If _ Not Len(para.Range.Text) = 1 And _ InStr(para.Range.Text, "||") = 0 And _ Not (Left(para.Range.Text, 1)) = "=" And _ Not (Left(para.Range.Text, 2)) = "[[" And _ Not (Left(para.Range.Text, 4)) = "----" And _ True Then 'Debug.Print para.Range.Text para.Range.InsertAfter vbCr End If Next para End Sub Private Sub ConvertTableOfContents() With ActiveDocument If .Fields.Count >= 1 Then Dim C As Integer Dim Max As Integer 'Search Fields for a Table Of Contents For C = 1 To .Fields.Count 'If we find a Table of Contents, process it If InStr(LTrim(.Fields(C).Code), "TOC") = 1 Then .Fields(C).Update Dim TOC As String Dim Entry As String Dim LastPos As Long Dim Pos As Long Dim FirstTime As Boolean TOC = .Fields(C).Result 'Get each entry in the table, insert into array LastPos = 1 Pos = InStr(TOC, vbCr) FirstTime = True Do While (Pos > 0) Dim F1 As Long Dim F2 As Long Dim EntryNum As Long Entry = Trim(Mid(TOC, LastPos, Pos - LastPos)) Entry = Replace(Entry, "^l", "") If (Len(Entry) > 0) Then If (FirstTime) Then EntryNum = 1 ReDim TOC_Entries(1) FirstTime = False Else EntryNum = UBound(TOC_Entries) + 1 ReDim Preserve TOC_Entries(EntryNum) End If F1 = InStr(Entry, vbTab) If (F1 > 0) Then F2 = InStr(F1 + 1, Entry, vbTab) If (F2 = 0) Then 'Handle Appendix sections' F1 = InStr(Entry, "-") F2 = InStr(F1 + 1, Entry, vbTab) If (F2 = 0) Then F1 = InStr(Entry, " ") F2 = InStr(F1 + 1, Entry, vbTab) End If If (F1 = 0) Then 'Give up on finding a number F1 = InStr(1, Entry, vbTab) TOC_Entries(EntryNum).Number = "" TOC_Entries(EntryNum).Name = Left(Entry, F1) Else TOC_Entries(EntryNum).Number = Replace(Mid(Entry, 1, F1 - 1), " ", "") TOC_Entries(EntryNum).Name = Mid(Entry, F1 + 1, F2 - F1 - 1) End If Else TOC_Entries(EntryNum).Number = Trim(Mid(Entry, 1, F1 - 2)) TOC_Entries(EntryNum).Name = Mid(Entry, F1 + 1, F2 - F1 - 1) End If End If 'Check for null entries If (Len(TOC_Entries(EntryNum).Number) = 0 And _ (Len(TOC_Entries(EntryNum).Name) = 0)) Then ReDim Preserve TOC_Entries(EntryNum - 1) Else TOC_Entries(EntryNum).Found = False End If End If LastPos = Pos + 1 Pos = InStr(LastPos, TOC, vbCr) Loop .Fields(C).Select 'Delete Word version, insert MoinMoin version With Selection .Delete .InsertAfter ("'''Table Of Contents'''" & vbCr & vbCr) For Pos = 1 To UBound(TOC_Entries) 'Create a table, with a slight indent for entries that are not top-level .InsertAfter ("||") If (Len(TOC_Entries(Pos).Number) = 1) Then .InsertAfter ("||<(>") Else .InsertAfter (" ||") End If .InsertAfter ("'''" & TOC_Entries(Pos).Number & "'''||" & _ "[#s" & TOC_Entries(Pos).Number & " " & TOC_Entries(Pos).Name & "]||" & vbCr) Next End With 'Stop looking for Table Of Contents IsTOC = True Exit For End If Next End If End With End Sub Private Sub ConvertHeading(headingStyle As Long, Optional preString As String = "", Optional postString As String = "") Dim normalStyle As Style Set normalStyle = ActiveDocument.Styles(wdStyleNormal) ActiveDocument.Select With Selection.find .ClearFormatting .Style = ActiveDocument.Styles(headingStyle) .Text = "" .format = True .MatchCase = False .MatchWholeWord = False .MatchWildcards = False .MatchSoundsLike = False .MatchAllWordForms = False .Forward = True .Wrap = wdFindContinue Do While .Execute With Selection Dim Heading As String Heading = .Text .Style = normalStyle .Collapse .MoveEndUntil vbCr .Delete 'Eliminate any manual form feeds Heading = Replace(Heading, vbFormFeed, "") 'Replace any newlines with spaces Heading = Replace(Heading, vbCr, " ") 'Removed leading / training spaces Heading = Trim(Heading) 'Search the TOC entries for this section, insert bookmark etc. If (IsTOC) Then Dim E As Long For E = 1 To UBound(TOC_Entries) If (Not TOC_Entries(E).Found) Then If (StrComp(Heading, TOC_Entries(E).Name) = 0) Then .InsertBefore "[[Anchor(s" & TOC_Entries(E).Number & ")]]" & vbCr Heading = TOC_Entries(E).Number & " " & Heading TOC_Entries(E).Found = True Exit For End If End If Next E End If 'Print the Heading .InsertAfter preString & Heading & postString End With Loop End With End Sub Private Sub ReplacePageBreaks() Selection.find.ClearFormatting Selection.find.Replacement.ClearFormatting With Selection.find .Text = "^m" .Replacement.Text = "----" & vbCr .Forward = True .Wrap = wdFindContinue .format = False .MatchCase = False .MatchWholeWord = False .MatchWildcards = False .MatchSoundsLike = False .MatchAllWordForms = False End With Selection.find.Execute Replace:=wdReplaceAll Selection.find.ClearFormatting Selection.find.Replacement.ClearFormatting With Selection.find .Text = "^b" .Replacement.Text = "----" & vbCr .Forward = True .Wrap = wdFindContinue .format = False .MatchCase = False .MatchWholeWord = False .MatchWildcards = False .MatchSoundsLike = False .MatchAllWordForms = False End With Selection.find.Execute Replace:=wdReplaceAll End Sub Private Sub ConvertFormat(format As eFormatType) ActiveDocument.Select With Selection.find Dim pre As String Dim post As String .ClearFormatting Select Case format Case eftBold: .Font.Bold = True pre = "'''" post = "'''" Case eftItalic: .Font.Italic = True pre = "''" post = "''" Case eftUnder: .Font.Underline = wdUnderlineSingle pre = "__" post = "__" Case eftSuper: .Font.Superscript = True pre = "^" post = "^" Case eftSub: .Font.Subscript = True pre = ",," post = ",," End Select .Text = "" .format = True .MatchCase = False .MatchWholeWord = False .MatchWildcards = False .MatchSoundsLike = False .MatchAllWordForms = False .Forward = True .Wrap = wdFindContinue Do While .Execute With Selection If InStr(1, .Text, vbCr) Then ' Just process the chunk before any newline characters ' We'll pick-up the rest with the next search .Collapse .MoveEndUntil vbCr End If ' Don't bother to markup whitespace (prevents a loop, as well) If (.Text = vbCr Or .Text = " " Or .Text = "") Then .MoveRight Else .InsertBefore pre .InsertAfter post End If Select Case format Case eftBold: .Font.Bold = False Case eftItalic: .Font.Italic = False Case eftUnder: .Font.Underline = wdUnderlineNone Case eftSuper: .Font.Superscript = False Case eftSub: .Font.Subscript = False End Select End With Loop End With End Sub Private Function OutlineLevelToNumber(level As WdOutlineLevel) As Integer Select Case level Case wdOutlineLevel1: OutlineLevelToNumber = 2 Case wdOutlineLevel2: OutlineLevelToNumber = 3 Case wdOutlineLevel3: OutlineLevelToNumber = 4 Case wdOutlineLevel4: OutlineLevelToNumber = 5 Case wdOutlineLevel5: OutlineLevelToNumber = 6 Case wdOutlineLevel6: OutlineLevelToNumber = 7 Case wdOutlineLevel7: OutlineLevelToNumber = 8 Case wdOutlineLevel8: OutlineLevelToNumber = 9 Case wdOutlineLevel9: OutlineLevelToNumber = 10 Case Else: OutlineLevelToNumber = 1 End Select End Function 'Does not handle nested lists Private Sub ConvertLists() Dim para As Paragraph For Each para In ActiveDocument.ListParagraphs With para.Range If .ListFormat.ListType = wdListBullet Then '.InsertBefore String(OutlineLevelToNumber(para.OutlineLevel), " ") & "* " .InsertBefore " * " Else '.InsertBefore String(OutlineLevelToNumber(para.OutlineLevel), " ") & "1. " .InsertBefore " 1. " End If .ListFormat.RemoveNumbers End With Next para End Sub Private Function ColorToWiki(Color As Long) As String Dim raw As String raw = Hex(Color) If (raw = "FF000000") Then 'Plain White ColorToWiki = "FFFFFF" Exit Function End If 'Trim down long ones If (Len(raw) > 6) Then raw = Right(raw, 6) 'Lengthen short ones Do While (Len(raw) < 6) raw = "0" & raw Loop 'Swap Order Dim C2W As String C2W = Mid(raw, 5, 2) & Mid(raw, 3, 2) & Mid(raw, 1, 2) ColorToWiki = C2W End Function Private Sub ConvertTabs() Dim para As Paragraph For Each para In ActiveDocument.Paragraphs If (InStr(1, para.Range.Text, vbTab)) Then para.Range.InsertBefore "||" para.Range.Select Selection.Collapse Selection.MoveEndUntil vbCr Selection.InsertAfter "||" End If Next para End Sub 'This method doesn't handle merged cells at all 'It will convert all tabs to ||, though Private Sub ConvertTables() Dim thisTable As table For Each thisTable In ActiveDocument.Tables 'Determine how many rows and columns there are Dim tableRow, tableCol As Long Dim tableMaxRow, tableMaxCol As Long thisTable.Select tableMaxRow = Selection.Information(wdMaximumNumberOfRows) tableMaxCol = Selection.Information(wdMaximumNumberOfColumns) 'Create format arrays for mapping Dim tableFormats() As TableCellFormat ReDim tableFormats(tableMaxRow, tableMaxCol) Dim R, C As Long For R = 1 To tableMaxRow For C = 1 To tableMaxCol tableFormats(R, C).FirstCell = False tableFormats(R, C).LastCell = False tableFormats(R, C).Color = "FFFFFF" tableFormats(R, C).HorizAlign = "C" tableFormats(R, C).VertAlign = "C" tableFormats(R, C).RowSpan = 0 tableFormats(R, C).ColSpan = 0 Next C Next R ''Check format of each cell thisTable.Select Dim thisCell As Cell For Each thisCell In thisTable.Range.Cells With thisCell C = .ColumnIndex R = .RowIndex If (C = 1) Then tableFormats(R, C).FirstCell = True If .Range.Information(wdMaximumNumberOfColumns) = C Then tableFormats(R, C).LastCell = True 'Information(wdAtEndOfRowMarker) Then tableFormats(R, C).LastCell = True tableFormats(R, C).Color = ColorToWiki(.Range.Shading.BackgroundPatternColor) If .Range.Paragraphs(1).Alignment = wdAlignParagraphLeft Then tableFormats(R, C).HorizAlign = "L" If .Range.Paragraphs(1).Alignment = wdAlignParagraphRight Then tableFormats(R, C).HorizAlign = "R" If .VerticalAlignment = wdCellAlignVerticalTop Then tableFormats(R, C).VertAlign = "T" If .VerticalAlignment = wdCellAlignVerticalBottom Then tableFormats(R, C).VertAlign = "B" 'For now, I can't think of a way of making this accurate tableFormats(R, C).RowSpan = 1 tableFormats(R, C).ColSpan = 1 End With Next thisCell 'You may be asking why this is a seperate step. It's a good question. 'It's mostly because determining the RowSpan and ColSpan might require a seperate step For Each thisCell In thisTable.Range.Cells If (Len(thisCell.Range.Text) > 2) Then With thisCell 'Convert cell contents Dim rawText As String Dim endText As String 'Toss out the carriage return rawText = Left(.Range.Text, Len(.Range.Text) - 2) endText = Right(.Range.Text, 1) Dim newText As String newText = "" newText = Replace(rawText, vbCr, "[[BR]]") 'If (InStr(1, rawText, vbCr)) Then ' Do While (Len(rawText) > 0) ' Select Case Left(rawText, 1) ' Case vbCr: newText = newText & "[[BR]]" ' 'Case vbLf: newText = newText & "[[BR]]" ' Case Else: newText = newText & Left(rawText, 1) ' End Select ' rawText = Mid(rawText, 2) ' Loop 'Else ' newText = rawText 'End If newText = newText & endText C = .ColumnIndex R = .RowIndex Dim format As String Dim formatStarted As Boolean format = "" formatStarted = False If tableFormats(R, C).FirstCell Then format = format & "||" If tableFormats(R, C).ColSpan = 1 Then If tableFormats(R, C).HorizAlign <> "L" Then If (Not formatStarted) Then formatStarted = True format = format & "<" End If If tableFormats(R, C).HorizAlign = "C" Then format = format & ":" Else format = format & ")" End If End If Else If tableFormats(R, C).HorizAlign <> "C" Then If (Not formatStarted) Then formatStarted = True format = format & "<" End If If tableFormats(R, C).HorizAlign = "L" Then format = format & "(" Else format = format & ")" End If End If End If If tableFormats(R, C).VertAlign <> "C" Then If (Not formatStarted) Then formatStarted = True format = format & "<" End If If tableFormats(R, C).VertAlign = "T" Then format = format & "^" Else format = format & "v" End If End If 'Row Span - always 1, no action 'Col Span - always 1, no action 'Color must be last If tableFormats(R, C).Color <> "FFFFFF" Then If (Not formatStarted) Then formatStarted = True format = format & "<" End If format = format & "#" & tableFormats(R, C).Color End If If (formatStarted) Then format = format & ">" .Range.Text = format & newText If (tableFormats(R, C).LastCell) Then .Range.InsertAfter "||" End If End With End If Next thisCell 'Exit Sub 'Convert the table to text, convert tabs to "||" Dim aRange As Range Set aRange = thisTable.ConvertToText(wdSeparateByTabs) aRange.Select With Selection.find .ClearFormatting .Replacement.ClearFormatting .Text = "^t" .Replacement.Text = " ||" .Forward = True .Wrap = wdFindContinue .format = False .MatchCase = False .MatchWholeWord = False .MatchWildcards = False .MatchSoundsLike = False .MatchAllWordForms = False .Execute Replace:=wdReplaceAll End With Next thisTable End Sub