Sub PhraseAlyse() ' Paul Beverley - Version 24.08.24 ' Lists phrases that are duplicated minWords = 4 maxWords = 8 minFreqToList = 3 doNotesToo = True strttime = Timer On Error GoTo ReportIt Set myDoc = ActiveDocument Set copyDoc = Documents.Add Set rngOld = myDoc.Content Set rng = copyDoc.Content rng.Text = rngOld.Text rng.Collapse wdCollapseEnd If myDoc.Footnotes.Count > 0 And doNotesToo = True Then rng.InsertAfter Text:=myDoc.StoryRanges(wdFootnotesStory) End If If myDoc.Endnotes.Count > 0 And doNotesToo = True Then rng.InsertAfter Text:=myDoc.StoryRanges(wdEndnotesStory) End If Set rng = copyDoc.Content With rng.Find .ClearFormatting .Replacement.ClearFormatting .Text = "[;:,^t" & ChrW(8211) & ChrW(8212) & "]" .Wrap = wdFindContinue .Forward = True .Replacement.Text = ". " .MatchWildcards = True .Execute Replace:=wdReplaceAll DoEvents .Text = "^13[0-9]{1,}[.\) ]{1,2}" .Replacement.Text = "^p" .Execute Replace:=wdReplaceAll DoEvents .Text = "[ ]{2,}" .Replacement.Text = " " .Execute Replace:=wdReplaceAll DoEvents .Text = "N.B." .Replacement.Text = " " .MatchWildcards = False .Execute Replace:=wdReplaceAll DoEvents .Text = "^p " .Replacement.Text = "^p" .Execute Replace:=wdReplaceAll DoEvents End With Set tempDoc = Documents.Add Set myResults = Documents.Add Set res = myResults.Content totalNumSents = copyDoc.Sentences.Count ' This should be enough, as we're doing it ' one letter at a time Dim myText(30000) As String allInits = "" j = 0 For sn = 1 To totalNumSents thisSentence = Trim(Replace(copyDoc.Sentences(sn).Text, vbCr, "")) numWds = copyDoc.Sentences(sn).Words.Count If InStr(Right(thisSentence, 2), ".") Then numWds = numWds - 1 If (numWds < minWords) = False And Len(thisSentence) > 10 Then ' Three goes at removing start & end 'stuff' For k = 1 To 3 myInit = Left(thisSentence, 1) If UCase(myInit) = LCase(myInit) Then thisSentence = Mid(thisSentence, 2) End If myLast = Right(thisSentence, 1) If UCase(myLast) = LCase(myLast) Then thisSentence = Left(thisSentence, Len(thisSentence) - 1) End If Next k myInit = LCase(Left(thisSentence, 1)) If InStr(allInits, myInit) = 0 Then allInits = allInits & myInit End If j = j + 1 myText(j) = thisSentence If j Mod 100 = 0 Then Debug.Print "Phrases logged: ", j End If DoEvents Next sn numSentencesNow = j numInits = Len(allInits) sp = ChrW(160) For i = 1 To 4 sp = sp & sp Next i init = "" For a = 1 To numInits Debug.Print sp & "To go: " & Str(numInits - a) Set rng = tempDoc.Content rng.Text = "" fstLttr = Mid(allInits, a, 1) Debug.Print fstLttr Application.ScreenUpdating = False For sn = 1 To numSentencesNow pText = myText(sn) init = LCase(Left(pText, 1)) If init = fstLttr Then rng.InsertAfter Text:=pText totWords = rng.Words.Count rng.Delete If totWords > maxWords Then totWords = maxWords For n = minWords To totWords ' Paste in the whole sentence rng.InsertAfter Text:=pText & " Dummy" rng.MoveStart wdWord, n rng.Select rng.Text = vbCr rng.Expand wdParagraph If InStr(rng, "Dummy") > 0 Then rng.Delete rng.Collapse wdCollapseEnd Next n End If DoEvents Next sn Application.ScreenUpdating = True Set rng = tempDoc.Content rng.Sort tempDoc.Paragraphs(1).Range.Text = "Rubbish" & vbCr rng.InsertAfter Text:=vbCr & "Rubbish" & vbCr numParas = tempDoc.Paragraphs.Count prevPara = tempDoc.Paragraphs(1).Range.Text wasAmatch = False numDupl = 1 For i = 2 To numParas thisPara = tempDoc.Paragraphs(i).Range.Text gottaMatch = (thisPara = prevPara) If Not (gottaMatch) Then If wasAmatch Then lenDiff = Len(prevPara) - Len(Replace(prevPara, ".", "")) If Not (numDupl < minFreqToList) And lenDiff = 0 Then res.InsertAfter Text:=Trim(Replace(prevPara, vbCr, "")) _ & " . . [" & Trim(Str(numDupl)) & "]" & vbCr End If numDupl = 1 End If Else numDupl = numDupl + 1 End If wasAmatch = gottaMatch prevPara = thisPara Next i Application.ScreenUpdating = True DoEvents Next a tempDoc.Close SaveChanges:=False copyDoc.Close SaveChanges:=False myResults.Activate Set rng = ActiveDocument.Content rng.Sort rng.InsertAfter Text:=vbCr Selection.HomeKey Unit:=wdStory Selection.TypeText Text:="Duplicates List" & vbCr Beep timGone = Timer - strttime Application.ScreenUpdating = True Beep myTime = Timer Do Loop Until Timer > myTime + 0.2 Beep m = Int(timGone / 60) s = Int(timGone) - m * 60 timeAll = "Time: " & Trim(Str(m)) & " m " & _ Trim(Str(s)) & " s" MsgBox ("Phrases checked: " & Trim(Str(numSentencesNow)) _ & vbCr & vbCr & timeAll) Exit Sub ' Switch the screen back on if there's an error ReportIt: Application.ScreenUpdating = True On Error GoTo 0 Resume End Sub