|
247.Given a input string find the longest substring which appears more than once in the string?
Microsoft Interview Questions and Answers
(Continued from previous question...)
247.Given a input string find the longest substring which appears more than once in the string?
Question:
Given a input string find the longest substring which appears more than once in the string?
maybe an answer:
Don't know exactly what the length is, but we do know it is between 0 and len(s).
The key insights which allow us to do binary search are:
1. if there's a substring with length k which appears more than once, then there's also a substring with length k-1 which also appears more than once;
2. if there's NO substring with length k which appears more than once, then there's also NO substring with length k+1 which also appears more than once;
for a given length of answer k, just insert all the hash values of all the substrings with length k into a hash table(totally O(len(s)) using polynomial hash) and check if there're two substrings with the same hash value.
so the time complexity is O(len*log(len))
This is the easiest way I know to solve this problem.
#include <cstdio>
#include <ctime>
#include <cstring>
#include <cstdlib>
const int maxl = 100000, maxc = 2;
const int maxh = 200000, basen = 3, base[basen] = { 113, 149, 173 };
struct node {
unsigned key[basen];
node *next;
};
void gen_rand_str(char *s, int len) {
srand(time(0));
for (int i = 0; i < len; ++i)
s[i] = 'a' + rand() % maxc;
s[len] = 0;
}
void insert(unsigned v[], node *h[], node *p) {
memcpy(p->key, v, sizeof(p->key));
p->next = h[v[0] % maxh];
h[v[0] % maxh] = p;
}
bool find(unsigned v[], node *h[]) {
for (node *p = h[v[0] % maxh]; p; p = p->next)
if (memcmp(p->key, v, sizeof(p->key)) == 0)
return 1;
return 0;
}
bool valid(char *s, int n, int len, int &pos) {
static node *h[maxh], pool[maxl];
memset(h, 0, sizeof(h));
int pp = 0;
unsigned v[basen], pow[basen];
for (int j = 0; j < basen; ++j)
pow[j] = 1, v[j] = 0;
for (int i = 0; i < len; ++i)
for (int j = 0; j < basen; ++j)
v[j] = v[j] * base[j] + s[i], pow[j] *= base[j];
insert(v, h, pool + (pp++));
for (int i = len; i < n; ++i) {
for (int j = 0; j < basen; ++j)
v[j] = v[j] * base[j] - pow[j] * s[i - len] + s[i];
if (find(v, h)) {
pos = i - len + 1;
return 1;
}
insert(v, h, pool + (pp++));
}
return 0;
}
//
//bool valid2(char *s, int n, int len, int &pos) {
// for (int i = 0; i < n - len; ++i)
// for (int j = i + 1; j < n - len; ++j)
// if (memcmp(s + i, s + j, len) == 0) {
// pos = i;
// return 1;
// }
// return 0;
//}
int main() {
int n = maxl;
static char s[maxl + 1];
gen_rand_str(s, n);
// puts(s);
int start_t = clock();
int lr = 1, rr = n - 1, mid, tpos, anslen = 0, anspos;
while (lr <= rr) {
mid = (lr + rr) >> 1;
if (valid(s, n, mid, tpos))
anslen = mid, anspos = tpos, lr = mid + 1;
else
rr = mid - 1;
}
int end_t = clock();
if (!anslen)
puts("No repeated substring !");
else {
printf("maxlen = %d\n", anslen);
for (int i = 0; i < anslen; ++i)
putchar(s[anspos + i]);
putchar('\n');
}
printf("Time used: %.2lf\n", double(end_t - start_t) / CLOCKS_PER_SEC);
//
// lr = 1, rr = n - 1, anslen = 0;
// while (lr <= rr) {
// mid = (lr + rr) >> 1;
// if (valid2(s, n, mid, tpos))
// anslen = mid, anspos = tpos, lr = mid + 1;
// else
// rr = mid - 1;
// }
// printf("maxlen by bruteforce: %d\n", anslen);
return 0;
}
(Continued on next question...)
Other Interview Questions
|