fork download
  1. /* C++ Program for Bad Character Heuristic of Boyer
  2. Moore String Matching Algorithm with Timing */
  3. #include <bits/stdc++.h>
  4. #include <chrono>
  5. using namespace std;
  6. #define NO_OF_CHARS 256
  7.  
  8. // The preprocessing function for Boyer Moore's bad character heuristic
  9. void badCharHeuristic(string str, int size, int badchar[NO_OF_CHARS]) {
  10. int i;
  11. // Initialize all occurrences as -1
  12. for (i = 0; i < NO_OF_CHARS; i++)
  13. badchar[i] = -1;
  14. // Fill the actual value of last occurrence of a character
  15. for (i = 0; i < size; i++)
  16. badchar[(int)str[i]] = i;
  17. }
  18.  
  19. /* A pattern searching function that uses Bad Character Heuristic of Boyer Moore Algorithm */
  20. void search(string txt, string pat) {
  21. int m = pat.size();
  22. int n = txt.size();
  23.  
  24. int badchar[NO_OF_CHARS];
  25.  
  26. // Fill the bad character array by calling the preprocessing function badCharHeuristic() for given pattern
  27. badCharHeuristic(pat, m, badchar);
  28.  
  29. int s = 0; // s is shift of the pattern with respect to text
  30. while (s <= (n - m)) {
  31. int j = m - 1;
  32.  
  33. // Keep reducing index j of pattern while characters of pattern and text are matching at this shift s
  34. while (j >= 0 && pat[j] == txt[s + j])
  35. j--;
  36.  
  37. // If the pattern is present at current shift, then index j will become -1 after the above loop
  38. if (j < 0) {
  39. // Uncomment the line below if you want to see the positions where pattern occurs
  40. // cout << "Pattern occurs at shift = " << s << endl;
  41.  
  42. /* Shift the pattern so that the next character in text aligns with the last occurrence of it in pattern.
  43.   The condition s+m < n is necessary for the case when pattern occurs at the end of text */
  44. s += (s + m < n) ? m - badchar[txt[s + m]] : 1;
  45. } else {
  46. /* Shift the pattern so that the bad character in text aligns with the last occurrence of it in pattern.
  47.   The max function is used to make sure that we get a positive shift.
  48.   We may get a negative shift if the last occurrence of bad character in pattern
  49.   is on the right side of the current character. */
  50. s += max(1, j - badchar[txt[s + j]]);
  51. }
  52. }
  53. }
  54.  
  55. /* Driver code */
  56. int main() {
  57. using namespace std::chrono;
  58.  
  59. // Generate large txt and pat
  60. int N = 10000000; // Length of txt (10 million characters)
  61. int M = 10000; // Length of pat (10 thousand characters)
  62.  
  63. // Create txt: 'AAAAAAAAAA...A' (N times)
  64. string txt(N, 'A');
  65.  
  66. // Create pat: 'AAAAAA...AAB' (M times 'A' and ending with 'B')
  67. string pat(M - 1, 'A');
  68. pat += 'B'; // Append 'B' at the end
  69.  
  70. // Measure the execution time of the search function
  71. auto start = high_resolution_clock::now();
  72. search(txt, pat);
  73. auto stop = high_resolution_clock::now();
  74.  
  75. auto duration = duration_cast<milliseconds>(stop - start);
  76. cout << "Execution time: " << duration.count() / 1000.0 << " seconds" << endl;
  77.  
  78. return 0;
  79. }
  80.  
Success #stdin #stdout 0.07s 22608KB
stdin
45
stdout
Execution time: 0.061 seconds