Attachment 'WordToMoin.bas.txt'
Download 1 Attribute VB_Name = "Word2Moin"
2 'Microsoft Word 2000 to MoinMoin converter.
3 ' by John Whitlock (John-Whitlock@ieee.org), 2003
4 ' This code is ugly, slow, and might not work the way you want it to.
5 ' It is also public domain, so have fun.
6 ' Feel free to contact me, but don't expect much help...
7 '
8 'To install:
9 ' Start Word
10 ' Start Visual Basic Editor (from Tools menu or Alt-F11)
11 ' Import the module (File -> Import) into the Normal template
12 '
13 'To Run:
14 ' Select Tools -> Macro -> Macros
15 ' Select Word2Moin
16 ' Select "Run"
17 '
18 'To Cancel: Hit Ctrl-Break
19 '
20 'What it does:
21 ' Converts the Word field "TOC" into a Moin table with inter-document links
22 ' Converts Word Headings into Moin Headlines
23 ' Inserts Anchor() macro and section number, if TOC was found
24 ' Converts Bold, Italic, Underlined, Superscript, and Subscript to Moin equivalents
25 ' Converts Lists to Moin lists (does not handle multi-level lists well)
26 ' Converts Tabs to Moin tables
27 ' Converts Tables to Moin Tables (does not handle merged and empty cells well)
28 ' Replaces page breaks with Moin line rules
29 ' Separates paragraphs with extra line breaks
30 ' Copies the results to the clipboard
31 '
32 'What doesn't work well:
33 ' Section numbers - sometimes, the algorithm misses a section
34 ' Multi-level lists - converted to flat lists
35 ' Letter lists (a, b, c) - converted to numbered lists
36 ' Empty Table Cells - Sometimes converted, sometimes not
37 ' Merged Table Cells - No support for cell spanning
38 ' Character conversion - dashes, left/right quote marks not converted to plain ASCII equivalents
39 '
40 'The character conversion issue might keep your page from validating as good HTML.
41 ' To have Word convert for you, save the page as plain text.
42 '
43
44 Type TOC_Entry
45 Number As String
46 Name As String
47 Found As Boolean
48 End Type
49
50 Enum eFormatType
51 eftBold
52 eftItalic
53 eftUnder
54 eftSuper
55 eftSub
56 End Enum
57
58 Type TableCellFormat
59 FirstCell As Boolean 'Cell is first cell in row
60 LastCell As Boolean 'Cell is last cell in row
61 Color As String 'Cell's background color
62 HorizAlign As String 'L, C, R
63 VertAlign As String 'T, C, B
64 RowSpan As Integer '0 or 1, for now
65 ColSpan As Integer '0 or 1, for now
66 End Type
67
68 Option Base 1
69 Option Explicit
70 Dim TOC_Entries() As TOC_Entry
71 Dim IsTOC As Boolean
72
73 Sub Word2Moin()
74
75 Application.ScreenUpdating = False
76
77 'Find the table of contents, if there is one
78 IsTOC = False
79 ConvertTableOfContents
80
81 'Convert Headings
82 Call ConvertHeading(wdStyleHeading1, "= ", " =")
83 Call ConvertHeading(wdStyleHeading2, "== ", " ==")
84 Call ConvertHeading(wdStyleHeading3, "=== ", " ===")
85 Call ConvertHeading(wdStyleHeading4, "==== ", " ====")
86 Call ConvertHeading(wdStyleHeading5, "===== ", " =====")
87
88 ConvertFormat eftBold
89 ConvertFormat eftItalic
90 ConvertFormat eftUnder
91 ConvertFormat eftSuper
92 ConvertFormat eftSub
93
94 ConvertLists
95 ConvertTabs
96 ConvertTables
97 ReplacePageBreaks
98 ExpandLineBreaks
99
100 ExitWord2Wiki:
101 ' Copy to clipboard
102 ActiveDocument.Content.Copy
103
104 Application.ScreenUpdating = True
105 End Sub
106
107 Private Sub ExpandLineBreaks()
108 Dim para As Paragraph
109 For Each para In ActiveDocument.Paragraphs
110 'Debug.Print para.Range.Text
111 If _
112 Not Len(para.Range.Text) = 1 And _
113 InStr(para.Range.Text, "||") = 0 And _
114 Not (Left(para.Range.Text, 1)) = "=" And _
115 Not (Left(para.Range.Text, 2)) = "[[" And _
116 Not (Left(para.Range.Text, 4)) = "----" And _
117 True Then
118 'Debug.Print para.Range.Text
119 para.Range.InsertAfter vbCr
120 End If
121 Next para
122 End Sub
123
124 Private Sub ConvertTableOfContents()
125 With ActiveDocument
126 If .Fields.Count >= 1 Then
127 Dim C As Integer
128 Dim Max As Integer
129
130 'Search Fields for a Table Of Contents
131 For C = 1 To .Fields.Count
132 'If we find a Table of Contents, process it
133 If InStr(LTrim(.Fields(C).Code), "TOC") = 1 Then
134 .Fields(C).Update
135
136 Dim TOC As String
137 Dim Entry As String
138 Dim LastPos As Long
139 Dim Pos As Long
140 Dim FirstTime As Boolean
141
142 TOC = .Fields(C).Result
143
144 'Get each entry in the table, insert into array
145 LastPos = 1
146 Pos = InStr(TOC, vbCr)
147 FirstTime = True
148 Do While (Pos > 0)
149 Dim F1 As Long
150 Dim F2 As Long
151 Dim EntryNum As Long
152
153 Entry = Trim(Mid(TOC, LastPos, Pos - LastPos))
154 Entry = Replace(Entry, "^l", "")
155
156 If (Len(Entry) > 0) Then
157 If (FirstTime) Then
158 EntryNum = 1
159 ReDim TOC_Entries(1)
160 FirstTime = False
161 Else
162 EntryNum = UBound(TOC_Entries) + 1
163 ReDim Preserve TOC_Entries(EntryNum)
164 End If
165
166 F1 = InStr(Entry, vbTab)
167
168 If (F1 > 0) Then
169 F2 = InStr(F1 + 1, Entry, vbTab)
170 If (F2 = 0) Then
171 'Handle Appendix sections'
172 F1 = InStr(Entry, "-")
173 F2 = InStr(F1 + 1, Entry, vbTab)
174 If (F2 = 0) Then
175 F1 = InStr(Entry, " ")
176 F2 = InStr(F1 + 1, Entry, vbTab)
177 End If
178
179 If (F1 = 0) Then
180 'Give up on finding a number
181 F1 = InStr(1, Entry, vbTab)
182 TOC_Entries(EntryNum).Number = ""
183 TOC_Entries(EntryNum).Name = Left(Entry, F1)
184 Else
185 TOC_Entries(EntryNum).Number = Replace(Mid(Entry, 1, F1 - 1), " ", "")
186 TOC_Entries(EntryNum).Name = Mid(Entry, F1 + 1, F2 - F1 - 1)
187 End If
188 Else
189 TOC_Entries(EntryNum).Number = Trim(Mid(Entry, 1, F1 - 2))
190 TOC_Entries(EntryNum).Name = Mid(Entry, F1 + 1, F2 - F1 - 1)
191 End If
192 End If
193
194 'Check for null entries
195 If (Len(TOC_Entries(EntryNum).Number) = 0 And _
196 (Len(TOC_Entries(EntryNum).Name) = 0)) Then
197 ReDim Preserve TOC_Entries(EntryNum - 1)
198 Else
199 TOC_Entries(EntryNum).Found = False
200 End If
201
202 End If
203 LastPos = Pos + 1
204 Pos = InStr(LastPos, TOC, vbCr)
205 Loop
206
207 .Fields(C).Select
208
209 'Delete Word version, insert MoinMoin version
210 With Selection
211 .Delete
212 .InsertAfter ("'''Table Of Contents'''" & vbCr & vbCr)
213
214 For Pos = 1 To UBound(TOC_Entries)
215 'Create a table, with a slight indent for entries that are not top-level
216 .InsertAfter ("||")
217 If (Len(TOC_Entries(Pos).Number) = 1) Then
218 .InsertAfter ("||<(>")
219 Else
220 .InsertAfter (" ||")
221 End If
222 .InsertAfter ("'''" & TOC_Entries(Pos).Number & "'''||" & _
223 "[#s" & TOC_Entries(Pos).Number & " " & TOC_Entries(Pos).Name & "]||" & vbCr)
224 Next
225 End With
226
227 'Stop looking for Table Of Contents
228 IsTOC = True
229 Exit For
230 End If
231 Next
232 End If
233 End With
234 End Sub
235
236 Private Sub ConvertHeading(headingStyle As Long, Optional preString As String = "", Optional postString As String = "")
237 Dim normalStyle As Style
238 Set normalStyle = ActiveDocument.Styles(wdStyleNormal)
239
240 ActiveDocument.Select
241
242 With Selection.find
243
244 .ClearFormatting
245 .Style = ActiveDocument.Styles(headingStyle)
246 .Text = ""
247
248 .format = True
249 .MatchCase = False
250 .MatchWholeWord = False
251 .MatchWildcards = False
252 .MatchSoundsLike = False
253 .MatchAllWordForms = False
254
255 .Forward = True
256 .Wrap = wdFindContinue
257
258 Do While .Execute
259 With Selection
260 Dim Heading As String
261
262 Heading = .Text
263 .Style = normalStyle
264 .Collapse
265 .MoveEndUntil vbCr
266 .Delete
267
268 'Eliminate any manual form feeds
269 Heading = Replace(Heading, vbFormFeed, "")
270
271 'Replace any newlines with spaces
272 Heading = Replace(Heading, vbCr, " ")
273
274 'Removed leading / training spaces
275 Heading = Trim(Heading)
276
277 'Search the TOC entries for this section, insert bookmark etc.
278 If (IsTOC) Then
279 Dim E As Long
280 For E = 1 To UBound(TOC_Entries)
281 If (Not TOC_Entries(E).Found) Then
282 If (StrComp(Heading, TOC_Entries(E).Name) = 0) Then
283 .InsertBefore "[[Anchor(s" & TOC_Entries(E).Number & ")]]" & vbCr
284 Heading = TOC_Entries(E).Number & " " & Heading
285 TOC_Entries(E).Found = True
286 Exit For
287 End If
288 End If
289 Next E
290 End If
291
292 'Print the Heading
293 .InsertAfter preString & Heading & postString
294 End With
295 Loop
296 End With
297 End Sub
298
299 Private Sub ReplacePageBreaks()
300
301 Selection.find.ClearFormatting
302 Selection.find.Replacement.ClearFormatting
303 With Selection.find
304 .Text = "^m"
305 .Replacement.Text = "----" & vbCr
306 .Forward = True
307 .Wrap = wdFindContinue
308 .format = False
309 .MatchCase = False
310 .MatchWholeWord = False
311 .MatchWildcards = False
312 .MatchSoundsLike = False
313 .MatchAllWordForms = False
314 End With
315 Selection.find.Execute Replace:=wdReplaceAll
316
317 Selection.find.ClearFormatting
318 Selection.find.Replacement.ClearFormatting
319 With Selection.find
320 .Text = "^b"
321 .Replacement.Text = "----" & vbCr
322 .Forward = True
323 .Wrap = wdFindContinue
324 .format = False
325 .MatchCase = False
326 .MatchWholeWord = False
327 .MatchWildcards = False
328 .MatchSoundsLike = False
329 .MatchAllWordForms = False
330 End With
331 Selection.find.Execute Replace:=wdReplaceAll
332
333 End Sub
334
335 Private Sub ConvertFormat(format As eFormatType)
336 ActiveDocument.Select
337 With Selection.find
338 Dim pre As String
339 Dim post As String
340
341 .ClearFormatting
342 Select Case format
343 Case eftBold:
344 .Font.Bold = True
345 pre = "'''"
346 post = "'''"
347 Case eftItalic:
348 .Font.Italic = True
349 pre = "''"
350 post = "''"
351 Case eftUnder:
352 .Font.Underline = wdUnderlineSingle
353 pre = "__"
354 post = "__"
355 Case eftSuper:
356 .Font.Superscript = True
357 pre = "^"
358 post = "^"
359 Case eftSub:
360 .Font.Subscript = True
361 pre = ",,"
362 post = ",,"
363 End Select
364
365 .Text = ""
366
367 .format = True
368 .MatchCase = False
369 .MatchWholeWord = False
370 .MatchWildcards = False
371 .MatchSoundsLike = False
372 .MatchAllWordForms = False
373
374 .Forward = True
375 .Wrap = wdFindContinue
376
377 Do While .Execute
378 With Selection
379 If InStr(1, .Text, vbCr) Then
380 ' Just process the chunk before any newline characters
381 ' We'll pick-up the rest with the next search
382 .Collapse
383 .MoveEndUntil vbCr
384 End If
385
386 ' Don't bother to markup whitespace (prevents a loop, as well)
387 If (.Text = vbCr Or .Text = " " Or .Text = "") Then
388 .MoveRight
389 Else
390 .InsertBefore pre
391 .InsertAfter post
392 End If
393
394 Select Case format
395 Case eftBold: .Font.Bold = False
396 Case eftItalic: .Font.Italic = False
397 Case eftUnder: .Font.Underline = wdUnderlineNone
398 Case eftSuper: .Font.Superscript = False
399 Case eftSub: .Font.Subscript = False
400 End Select
401 End With
402 Loop
403 End With
404
405
406 End Sub
407
408 Private Function OutlineLevelToNumber(level As WdOutlineLevel) As Integer
409 Select Case level
410 Case wdOutlineLevel1: OutlineLevelToNumber = 2
411 Case wdOutlineLevel2: OutlineLevelToNumber = 3
412 Case wdOutlineLevel3: OutlineLevelToNumber = 4
413 Case wdOutlineLevel4: OutlineLevelToNumber = 5
414 Case wdOutlineLevel5: OutlineLevelToNumber = 6
415 Case wdOutlineLevel6: OutlineLevelToNumber = 7
416 Case wdOutlineLevel7: OutlineLevelToNumber = 8
417 Case wdOutlineLevel8: OutlineLevelToNumber = 9
418 Case wdOutlineLevel9: OutlineLevelToNumber = 10
419 Case Else: OutlineLevelToNumber = 1
420 End Select
421 End Function
422
423 'Does not handle nested lists
424 Private Sub ConvertLists()
425 Dim para As Paragraph
426 For Each para In ActiveDocument.ListParagraphs
427 With para.Range
428 If .ListFormat.ListType = wdListBullet Then
429 '.InsertBefore String(OutlineLevelToNumber(para.OutlineLevel), " ") & "* "
430 .InsertBefore " * "
431 Else
432 '.InsertBefore String(OutlineLevelToNumber(para.OutlineLevel), " ") & "1. "
433 .InsertBefore " 1. "
434 End If
435
436 .ListFormat.RemoveNumbers
437 End With
438 Next para
439 End Sub
440
441 Private Function ColorToWiki(Color As Long) As String
442 Dim raw As String
443 raw = Hex(Color)
444
445 If (raw = "FF000000") Then
446 'Plain White
447 ColorToWiki = "FFFFFF"
448 Exit Function
449 End If
450
451 'Trim down long ones
452 If (Len(raw) > 6) Then raw = Right(raw, 6)
453
454 'Lengthen short ones
455 Do While (Len(raw) < 6)
456 raw = "0" & raw
457 Loop
458
459 'Swap Order
460 Dim C2W As String
461 C2W = Mid(raw, 5, 2) & Mid(raw, 3, 2) & Mid(raw, 1, 2)
462
463 ColorToWiki = C2W
464 End Function
465
466 Private Sub ConvertTabs()
467 Dim para As Paragraph
468 For Each para In ActiveDocument.Paragraphs
469 If (InStr(1, para.Range.Text, vbTab)) Then
470 para.Range.InsertBefore "||"
471 para.Range.Select
472 Selection.Collapse
473 Selection.MoveEndUntil vbCr
474 Selection.InsertAfter "||"
475 End If
476 Next para
477 End Sub
478
479 'This method doesn't handle merged cells at all
480 'It will convert all tabs to ||, though
481 Private Sub ConvertTables()
482 Dim thisTable As table
483 For Each thisTable In ActiveDocument.Tables
484 'Determine how many rows and columns there are
485 Dim tableRow, tableCol As Long
486 Dim tableMaxRow, tableMaxCol As Long
487 thisTable.Select
488 tableMaxRow = Selection.Information(wdMaximumNumberOfRows)
489 tableMaxCol = Selection.Information(wdMaximumNumberOfColumns)
490
491 'Create format arrays for mapping
492 Dim tableFormats() As TableCellFormat
493 ReDim tableFormats(tableMaxRow, tableMaxCol)
494 Dim R, C As Long
495 For R = 1 To tableMaxRow
496 For C = 1 To tableMaxCol
497 tableFormats(R, C).FirstCell = False
498 tableFormats(R, C).LastCell = False
499 tableFormats(R, C).Color = "FFFFFF"
500 tableFormats(R, C).HorizAlign = "C"
501 tableFormats(R, C).VertAlign = "C"
502 tableFormats(R, C).RowSpan = 0
503 tableFormats(R, C).ColSpan = 0
504 Next C
505 Next R
506
507 ''Check format of each cell
508 thisTable.Select
509 Dim thisCell As Cell
510 For Each thisCell In thisTable.Range.Cells
511 With thisCell
512 C = .ColumnIndex
513 R = .RowIndex
514 If (C = 1) Then tableFormats(R, C).FirstCell = True
515 If .Range.Information(wdMaximumNumberOfColumns) = C Then tableFormats(R, C).LastCell = True
516 'Information(wdAtEndOfRowMarker) Then tableFormats(R, C).LastCell = True
517 tableFormats(R, C).Color = ColorToWiki(.Range.Shading.BackgroundPatternColor)
518 If .Range.Paragraphs(1).Alignment = wdAlignParagraphLeft Then tableFormats(R, C).HorizAlign = "L"
519 If .Range.Paragraphs(1).Alignment = wdAlignParagraphRight Then tableFormats(R, C).HorizAlign = "R"
520 If .VerticalAlignment = wdCellAlignVerticalTop Then tableFormats(R, C).VertAlign = "T"
521 If .VerticalAlignment = wdCellAlignVerticalBottom Then tableFormats(R, C).VertAlign = "B"
522 'For now, I can't think of a way of making this accurate
523 tableFormats(R, C).RowSpan = 1
524 tableFormats(R, C).ColSpan = 1
525 End With
526 Next thisCell
527
528 'You may be asking why this is a seperate step. It's a good question.
529 'It's mostly because determining the RowSpan and ColSpan might require a seperate step
530 For Each thisCell In thisTable.Range.Cells
531 If (Len(thisCell.Range.Text) > 2) Then
532 With thisCell
533 'Convert cell contents
534 Dim rawText As String
535 Dim endText As String
536 'Toss out the carriage return
537 rawText = Left(.Range.Text, Len(.Range.Text) - 2)
538 endText = Right(.Range.Text, 1)
539 Dim newText As String
540 newText = ""
541 newText = Replace(rawText, vbCr, "[[BR]]")
542 'If (InStr(1, rawText, vbCr)) Then
543 ' Do While (Len(rawText) > 0)
544 ' Select Case Left(rawText, 1)
545 ' Case vbCr: newText = newText & "[[BR]]"
546 ' 'Case vbLf: newText = newText & "[[BR]]"
547 ' Case Else: newText = newText & Left(rawText, 1)
548 ' End Select
549 ' rawText = Mid(rawText, 2)
550 ' Loop
551 'Else
552 ' newText = rawText
553 'End If
554 newText = newText & endText
555
556 C = .ColumnIndex
557 R = .RowIndex
558 Dim format As String
559 Dim formatStarted As Boolean
560 format = ""
561 formatStarted = False
562 If tableFormats(R, C).FirstCell Then format = format & "||"
563
564 If tableFormats(R, C).ColSpan = 1 Then
565 If tableFormats(R, C).HorizAlign <> "L" Then
566 If (Not formatStarted) Then
567 formatStarted = True
568 format = format & "<"
569 End If
570 If tableFormats(R, C).HorizAlign = "C" Then
571 format = format & ":"
572 Else
573 format = format & ")"
574 End If
575 End If
576 Else
577 If tableFormats(R, C).HorizAlign <> "C" Then
578 If (Not formatStarted) Then
579 formatStarted = True
580 format = format & "<"
581 End If
582 If tableFormats(R, C).HorizAlign = "L" Then
583 format = format & "("
584 Else
585 format = format & ")"
586 End If
587 End If
588 End If
589
590 If tableFormats(R, C).VertAlign <> "C" Then
591 If (Not formatStarted) Then
592 formatStarted = True
593 format = format & "<"
594 End If
595 If tableFormats(R, C).VertAlign = "T" Then
596 format = format & "^"
597 Else
598 format = format & "v"
599 End If
600 End If
601
602 'Row Span - always 1, no action
603 'Col Span - always 1, no action
604
605 'Color must be last
606 If tableFormats(R, C).Color <> "FFFFFF" Then
607 If (Not formatStarted) Then
608 formatStarted = True
609 format = format & "<"
610 End If
611 format = format & "#" & tableFormats(R, C).Color
612 End If
613
614 If (formatStarted) Then format = format & ">"
615
616 .Range.Text = format & newText
617
618 If (tableFormats(R, C).LastCell) Then
619 .Range.InsertAfter "||"
620 End If
621 End With
622 End If
623 Next thisCell
624
625 'Exit Sub
626
627 'Convert the table to text, convert tabs to "||"
628 Dim aRange As Range
629 Set aRange = thisTable.ConvertToText(wdSeparateByTabs)
630 aRange.Select
631 With Selection.find
632 .ClearFormatting
633 .Replacement.ClearFormatting
634 .Text = "^t"
635 .Replacement.Text = " ||"
636 .Forward = True
637 .Wrap = wdFindContinue
638 .format = False
639 .MatchCase = False
640 .MatchWholeWord = False
641 .MatchWildcards = False
642 .MatchSoundsLike = False
643 .MatchAllWordForms = False
644 .Execute Replace:=wdReplaceAll
645 End With
646
647 Next thisTable
648 End Sub
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.