/* C++ Program for Bad Character Heuristic of Boyer
Moore String Matching Algorithm with Timing */
#include <bits/stdc++.h>
#include <chrono>
using namespace std;
#define NO_OF_CHARS 256
// The preprocessing function for Boyer Moore's bad character heuristic
void badCharHeuristic(string str, int size, int badchar[NO_OF_CHARS]) {
int i;
// Initialize all occurrences as -1
for (i = 0; i < NO_OF_CHARS; i++)
badchar[i] = -1;
// Fill the actual value of last occurrence of a character
for (i = 0; i < size; i++)
badchar[(int)str[i]] = i;
}
/* A pattern searching function that uses Bad Character Heuristic of Boyer Moore Algorithm */
void search(string txt, string pat) {
int m = pat.size();
int n = txt.size();
int badchar[NO_OF_CHARS];
// Fill the bad character array by calling the preprocessing function badCharHeuristic() for given pattern
badCharHeuristic(pat, m, badchar);
int s = 0; // s is shift of the pattern with respect to text
while (s <= (n - m)) {
int j = m - 1;
// Keep reducing index j of pattern while characters of pattern and text are matching at this shift s
while (j >= 0 && pat[j] == txt[s + j])
j--;
// If the pattern is present at current shift, then index j will become -1 after the above loop
if (j < 0) {
// Uncomment the line below if you want to see the positions where pattern occurs
// cout << "Pattern occurs at shift = " << s << endl;
/* Shift the pattern so that the next character in text aligns with the last occurrence of it in pattern.
The condition s+m < n is necessary for the case when pattern occurs at the end of text */
s += (s + m < n) ? m - badchar[txt[s + m]] : 1;
} else {
/* Shift the pattern so that the bad character in text aligns with the last occurrence of it in pattern.
The max function is used to make sure that we get a positive shift.
We may get a negative shift if the last occurrence of bad character in pattern
is on the right side of the current character. */
s += max(1, j - badchar[txt[s + j]]);
}
}
}
/* Driver code */
int main() {
using namespace std::chrono;
// Generate large txt and pat
int N = 10000000; // Length of txt (10 million characters)
int M = 10000; // Length of pat (10 thousand characters)
// Create txt: 'AAAAAAAAAA...A' (N times)
string txt(N, 'A');
// Create pat: 'AAAAAA...AAB' (M times 'A' and ending with 'B')
string pat(M - 1, 'A');
pat += 'B'; // Append 'B' at the end
// Measure the execution time of the search function
auto start = high_resolution_clock::now();
search(txt, pat);
auto stop = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>(stop - start);
cout << "Execution time: " << duration.count() / 1000.0 << " seconds" << endl;
return 0;
}