diff --git a/search/search_quality/clusterize-postcodes.lisp b/search/search_quality/clusterize-postcodes.lisp index 6e7e67a7b2..72d37bed2e 100755 --- a/search/search_quality/clusterize-postcodes.lisp +++ b/search/search_quality/clusterize-postcodes.lisp @@ -28,11 +28,13 @@ exec /usr/bin/env sbcl --noinform --quit --load "$0" --end-toplevel-options "$@" (string-trim *seps* string)) (defun get-postcode-pattern (postcode) - "Simplifies postcode in a following way: - * all letters are replaced by 'n' - * all digits are replaced by 'a' + "Simplifies postcode in the following way: + * all letters are replaced by 'A' + * all digits are replaced by 'N' * hyphens and dots are replaced by a space - * other characters are left as-is + * other characters are capitalized + + This format follows https://en.wikipedia.org/wiki/List_of_postal_codes. " (map 'string #'(lambda (c) (cond ((alpha-char-p c) #\A) ((digit-char-p c) #\N) @@ -91,8 +93,9 @@ exec /usr/bin/env sbcl --noinform --quit --load "$0" --end-toplevel-options "$@" ; Prints number of postcodes in a cluster, accumulated ; percent of postcodes clustered so far, simplified version ; of a postcode and examples of postcodes. - (format t "~a (~2$%) ~a [~{~a~^, ~}]~%" + (format t "~a (~2$%) ~a [~{~a~^, ~}~:[~;, ...~]]~%" num-samples - (coerce (* 100 (/ curr-prefix-sum *total*)) 'double-float) + (* 100 (/ curr-prefix-sum *total*)) key - (subseq samples 0 (min num-samples 5))))) + (subseq samples 0 (min num-samples 5)) + (> num-samples 5))))