0

Fuzzy search for enterprise grade Apps

Hi,

 

If Ninox truly wants “power” CRMs and ERPs to be built using the Ninox platform, Search needs to be better. Simple as. Otherwise, the risk of duplicating records (People, Companies, etc etc) increases as more and more records are added. 

Fuzzy search needs to be built in to the native search boxes when searching for records or linking records in relationship fields. Eg “JP Morgan” should also return “J.P. Morgan”, “JPMorgan”, “J P Morgan”, “J.P. Moorgan”, you get the drift. 

To take it to the next level, there should also be a fuzzymatch function giving users access to the same functionality in our scripts, where you pass as arguments one field to match (string), a sensitivity level (number, 0-100), and an array of values to match against, returning an array of matches found. Eg:

 

let company := this.“Company Name”;

let checkForMatch := (select Companies).“Company Name”;

let matches := fuzzy(company, 90, checkForMatch)

 

Functionality of this kind will let advanced users develop cool data cleansing / merging / de-duping tools. Please empower the Clean Data Obssessives amongst us!! Hope to see this in 2021 ;)

 

Keep up the good work Ninox. 

K

3 replies

null
    • Sean
    • 3 yrs ago
    • Reported - view

    The scripting language is certainly powerful enough to let advanced users develop their own user-defined functions. You should give it a try. 😉

    • Kaan_Dikmen
    • 3 yrs ago
    • Reported - view

    The language is certainly accessible... but it feels any sophisticated routines are held together by sticky tape, as demonstrated by your own examples of manipulating arrays!!

     

    Anyway, for the benefit of everyone - a simple-to-use formula (with fiendishly complex script) to calculate the Levenshtein Ratio between two strings (ranges from 0-100). 

     

    Usage example (assuming a table called Companies):

    let matchCompany := “JP Morgan”;

    let allCompanies := select Companies;

    for c in allCompanies do

      if fuzzyRatio(matchCompany, c.“Company Name) > 0.75 then

        c

      end;

    end;

     

    The script (requires a number of helper/utility functions):

     

    ”// utility function which turns a comma-separated string back into a matrix (array of arrays)“;
    function repackMatrix(array : text,width : number) do
    let width := width;
    let textToArray := split(array, ”,“);
    let textToArray := for i in textToArray do
    number(i)
    end;
    let rows := count(textToArray) / width;
    let matrix := for i in range(0, rows) do
    slice(textToArray, i * width, i * width + width)
    end;
    matrix
    end;
    ”// reads single value in a matrix based on array coordinates, with top left 0,0“;
    function readMatrix(matrix : text,width : number,row : number,col : number) do
    let width := width + 1;
    let findPosition := row * width + col;
    let textToArray := split(matrix, ”,“);
    let valueFound := item(textToArray, findPosition);
    number(valueFound)
    end;
    ”// replaces value in a matrix (array of arrays), which is entered as comma-separated string“;
    function updateMatrix(matrix : text,width : number,row : number,col : number,value : number) do
    let width := width + 1;
    let findPosition := row * width + col;
    let textToArray := split(matrix, ”,“);
    let newArray := for i in range(0, count(textToArray)) do
    if i = findPosition then
    value
    else
    number(item(textToArray, i))
    end
    end;
    let repackedMatrix := repackMatrix(text(newArray), width);
    repackedMatrix
    end;
    ”// calculates the levenshtein ratio between two strings“;
    ”// see https://www.datacamp.com/community/tutorials/fuzzy-string-python for details“;
    function fuzzyRatio(wordOne : text,wordTwo : text) do
    let s := wordOne;
    let t := wordTwo;
    let rows := length(s) + 1;
    let cols := length(t) + 1;
    ”// initialises a matrix of zeros“;
    let distance := for i in range(0, rows) do
    for j in range(0, cols) do
    0
    end
    end;
    ”// populates matrix of zeros with the indeces of each character of both strongs“;
    let indexed := for i in range(0, rows) do
    for k in range(0, cols) do
    if k = 0 then
    i
    else
    if i = 0 then k else 0 end
    end
    end
    end;
    ”// iterate over the matrix to compute the cost of edits/substitutions“;
    let computed := for col in range(1, cols) do
    for row in range(1, rows) do
    let cost := 0;
    if item(s, row - 1) = item(t, col - 1) then
    cost := 0
    else
    cost := 2
    end;
    let updateValue := min(readMatrix(text(indexed), cols - 1, row - 1, col) + 1, readMatrix(text(indexed), cols - 1, row, col - 1) + 1, readMatrix(text(indexed), cols - 1, row - 1, col - 1) + cost);
    indexed := updateMatrix(text(indexed), cols - 1, row, col, updateValue)
    end
    end;
    let edits := readMatrix(text(indexed), cols - 1, rows - 1, cols - 1);
    ”// calculates the levenshtein ratio“;
    let ratio := (length(s) + length(t) - edits) / (length(s) + length(t));
    ratio
    end

    • Sean
    • 3 yrs ago
    • Reported - view

    Nice! I“m looking forward to trying it later. Thank you for sharing your code.

Content aside

  • 3 yrs agoLast active
  • 3Replies
  • 520Views