For example, given:
S:
"barfoothefoobarman"
L:
["foo", "bar"]
You should return the indices:
[0,9]
.(order does not matter).
Solution: the following implementation is nothing but brute force. To reduce the time complexity of searching, the hash table is used. Please use hash table properly, otherwise you will get TLE. Anyway, the time complexity is O(mn), where m and n are the lengths of S and L respectively. I believe there may be an optimal solution; maybe KMP like algorithm; not sure so far.
class Solution { public: vector<int> findSubstring(string S, vector<string> &L) { // IMPORTANT: Please reset any member data you declared, as // the same Solution instance will be reused for each test case vector<int> ret; if(S.empty() || L.empty() || S.size()<L.size()*L[0].size()) return ret; int wordLength = L[0].size(), sumLength=L.size()*wordLength; //Initilize hashtable: needFound[L[i]] is no. of occurences of L[i] in L unordered_map<string, int> needFound; for(int i=0; i<L.size(); ++i) needFound[L[i]]++; //Check one by one for(int i=0; i<=S.size()-sumLength; ++i){ unordered_map<string, int> hasFound; int j = 0; while(j<sumLength){ string cur = S.substr(i+j, wordLength); if(needFound.find(cur)!=needFound.end() && hasFound[cur]<needFound[cur]) hasFound[cur]++; else break; j+=wordLength; } if(j==sumLength) ret.push_back(i); } return ret; } };
I finally get a linear algorithm in terms of the length of S. To explain this solution, we define a valid window as an substring of S, say S[start:end] such that the frequency of each word in this substring is not larger than that in L. It is to be noted that if the frequency of each word in this subtring and in L is the same, we get the S', i.e., the substring in S that is a concatenation of each word in L exactly once.
To cover all possibilities, we check concatenation at 0, k, 2k, ..., (n-1)*k; then 1, k+1, 2k+1, ..., (n-1)*k+1; then 2, k+2, 2k+2, ..., (n-1)*k+2, etc.; until k-1, 2k-1, 3k-1, ..., (n-1)*k+k-1, where k is the length of each word (note: all the words have the same length), and n is the number of words. More details see comments in the following code. [Will explain more when I have time. :-)]
class Solution { public: vector<int> findSubstring(string S, vector<string> &L) { // IMPORTANT: Please reset any member data you declared, as // the same Solution instance will be reused for each test case vector<int> ret; if(S.empty() || L.empty() || S.size()<L.size()*L[0].size()) return ret; int wordLength = L[0].size(), sumLength=L.size()*wordLength; //Initilize hashtable: needFound[L[i]] is no. of occurences of L[i] in L unordered_map<string, int> needFound; for(int i=0; i<L.size(); ++i) needFound[L[i]]++; for(int i=0; i<wordLength; ++i){ unordered_map<string, int> hasFound; int count=0; for(int start=i, end=i; end<=S.size()-wordLength; end+=wordLength){ string cur = S.substr(end, wordLength); //the current segment is not found in L; the starting index of //substring s' cound not be beween start and end, so skip to 'end' if(needFound.find(cur)==needFound.end()){ start=end+wordLength; hasFound.clear(); count=0; continue; } hasFound[cur]++; if(hasFound[cur]>needFound[cur]){ //the current window is not valid //shrink the window to make it valid while(S.substr(start, wordLength)!=cur){ count--; hasFound[S.substr(start, wordLength)]--; start+=wordLength; } hasFound[S.substr(start, wordLength)]--; start+=wordLength; } else{ //the current winodw is valid count++; if(count==L.size()){ //found one ret.push_back(start); //shift the window to right one step //but we keep the information we obtained so far count--; hasFound[S.substr(start, wordLength)]--; start+=wordLength; } } } } return ret; } };