(I’m writing this in the context of JavaScript, but will accept an algorithmically correct answer in any language)
How do you find the shortest substring of each element in an array of strings where the substring is NOT contained within any of the other elements, ignoring case?
Suppose I have an input array such as:
var names = ["Anne", "Anthony", "LouAnn", "Kant", "Louise", "ark"];
The output should be something like:
var uniqueNames = ["ne", "h", "ua", "ka", "i", "r"];
For my purposes, you can safely assume that no element will be wholly contained within another element.
My Thoughts:
It seems that one could probably brute force this, along the lines of:
var names = ["Anne", "Anthony", "LouAnn", "Kant", "Louise", "ark"];
var uniqueNames = [], nameInd, windowSize, substrInd, substr, otherNameInd, foundMatch;
// For each name
for (nameInd = 0; nameInd < names.length; nameInd++)
{
var name = names[nameInd];
// For each possible substring length
windowLoop:
for (windowSize = 1; windowSize <= name.length; windowSize++)
{
// For each starting index of a substring
for (substrInd = 0; substrInd <= name.length-windowSize; substrInd++)
{
substr = name.substring(substrInd,substrInd+windowSize).toLowerCase();
foundMatch = false;
// For each other name
for (otherNameInd = 0; otherNameInd < names.length; otherNameInd++)
{
if (nameInd != otherNameInd && names[otherNameInd].toLowerCase().indexOf(substr) > -1)
{
foundMatch = true;
break;
}
}
if (!foundMatch)
{
// This substr works!
uniqueNames[nameInd] = substr;
break windowLoop;
}
}
}
}
But I have to imagine there’s a more elegant solution using tries/prefix trees, suffix arrays, or something interesting like that.
Edit:
I believe this is the form the selected answer would take programmatically in JavaScript:
var names = ["Anne", "Anthony", "LouAnn", "Kant", "Louise", "ark"];
var uniqueNames = [], permutations = {}, permutation, nameInd, windowSize, substrInd, substr;
// For each name
for (nameInd = 0; nameInd < names.length; nameInd++)
{
var name = names[nameInd];
// For each possible substring length
windowLoop:
for (windowSize = 1; windowSize <= name.length; windowSize++)
{
// For each starting index of a substring
for (substrInd = 0; substrInd <= name.length-windowSize; substrInd++)
{
substr = name.substring(substrInd,substrInd+windowSize).toLowerCase();
permutations[substr] = (typeof permutations[substr] === "undefined")?nameInd:-1;
}
}
}
for (substr in permutations)
{
permutation = permutations[substr];
if (permutation !== -1 && ((typeof uniqueNames[permutation] === "string" && substr.length < uniqueNames[permutation].length) || typeof uniqueNames[permutation] === "undefined"))
{
uniqueNames[permutation] = substr;
}
}
Say
Nis number of strings andLis maximum length of string. You’re doing up toN*L*L*Niterations.I can only improve it a bit by trading one iteration for extra memory. For each possible substring length (
Literations),enumerate all substrings of that length in each name (
N*L), and store it among with name’s index into a hashtable (1). If there is already an index for this substring, you know it won’t work, then you replace index with some special value, like-1.walk the hashtable, picking up substrings for which index is not
-1— that are the answers for their corresponding indexes, but only use them if that names don’t already have a shorter answer from a previous iterationThe memory usage can be greatly reduced by storing reference back into existing string instead of copying substrings.