Fuzzy search for enterprise grade Apps
Hi,
If Ninox truly wants “power” CRMs and ERPs to be built using the Ninox platform, Search needs to be better. Simple as. Otherwise, the risk of duplicating records (People, Companies, etc etc) increases as more and more records are added.
Fuzzy search needs to be built in to the native search boxes when searching for records or linking records in relationship fields. Eg “JP Morgan” should also return “J.P. Morgan”, “JPMorgan”, “J P Morgan”, “J.P. Moorgan”, you get the drift.
To take it to the next level, there should also be a fuzzymatch function giving users access to the same functionality in our scripts, where you pass as arguments one field to match (string), a sensitivity level (number, 0-100), and an array of values to match against, returning an array of matches found. Eg:
let company := this.“Company Name”;
let checkForMatch := (select Companies).“Company Name”;
let matches := fuzzy(company, 90, checkForMatch)
Functionality of this kind will let advanced users develop cool data cleansing / merging / de-duping tools. Please empower the Clean Data Obssessives amongst us!! Hope to see this in 2021 ;)
Keep up the good work Ninox.
K
3 replies
-
The scripting language is certainly powerful enough to let advanced users develop their own user-defined functions. You should give it a try.
-
The language is certainly accessible... but it feels any sophisticated routines are held together by sticky tape, as demonstrated by your own examples of manipulating arrays!!
Anyway, for the benefit of everyone - a simple-to-use formula (with fiendishly complex script) to calculate the Levenshtein Ratio between two strings (ranges from 0-100).
Usage example (assuming a table called Companies):
let matchCompany := “JP Morgan”;
let allCompanies := select Companies;
for c in allCompanies do
if fuzzyRatio(matchCompany, c.“Company Name) > 0.75 then
c
end;
end;
The script (requires a number of helper/utility functions):
”// utility function which turns a comma-separated string back into a matrix (array of arrays)“;
function repackMatrix(array : text,width : number) do
let width := width;
let textToArray := split(array, ”,“);
let textToArray := for i in textToArray do
number(i)
end;
let rows := count(textToArray) / width;
let matrix := for i in range(0, rows) do
slice(textToArray, i * width, i * width + width)
end;
matrix
end;
”// reads single value in a matrix based on array coordinates, with top left 0,0“;
function readMatrix(matrix : text,width : number,row : number,col : number) do
let width := width + 1;
let findPosition := row * width + col;
let textToArray := split(matrix, ”,“);
let valueFound := item(textToArray, findPosition);
number(valueFound)
end;
”// replaces value in a matrix (array of arrays), which is entered as comma-separated string“;
function updateMatrix(matrix : text,width : number,row : number,col : number,value : number) do
let width := width + 1;
let findPosition := row * width + col;
let textToArray := split(matrix, ”,“);
let newArray := for i in range(0, count(textToArray)) do
if i = findPosition then
value
else
number(item(textToArray, i))
end
end;
let repackedMatrix := repackMatrix(text(newArray), width);
repackedMatrix
end;
”// calculates the levenshtein ratio between two strings“;
”// see https://www.datacamp.com/community/tutorials/fuzzy-string-python for details“;
function fuzzyRatio(wordOne : text,wordTwo : text) do
let s := wordOne;
let t := wordTwo;
let rows := length(s) + 1;
let cols := length(t) + 1;
”// initialises a matrix of zeros“;
let distance := for i in range(0, rows) do
for j in range(0, cols) do
0
end
end;
”// populates matrix of zeros with the indeces of each character of both strongs“;
let indexed := for i in range(0, rows) do
for k in range(0, cols) do
if k = 0 then
i
else
if i = 0 then k else 0 end
end
end
end;
”// iterate over the matrix to compute the cost of edits/substitutions“;
let computed := for col in range(1, cols) do
for row in range(1, rows) do
let cost := 0;
if item(s, row - 1) = item(t, col - 1) then
cost := 0
else
cost := 2
end;
let updateValue := min(readMatrix(text(indexed), cols - 1, row - 1, col) + 1, readMatrix(text(indexed), cols - 1, row, col - 1) + 1, readMatrix(text(indexed), cols - 1, row - 1, col - 1) + cost);
indexed := updateMatrix(text(indexed), cols - 1, row, col, updateValue)
end
end;
let edits := readMatrix(text(indexed), cols - 1, rows - 1, cols - 1);
”// calculates the levenshtein ratio“;
let ratio := (length(s) + length(t) - edits) / (length(s) + length(t));
ratio
end -
Nice! I“m looking forward to trying it later. Thank you for sharing your code.
Content aside
- 4 yrs agoLast active
- 3Replies
- 525Views