! char.ax.txt - character and symbol utility predicates ! uses form.ax ho.ax natnum.ax bit.ax !---+----|----+----|----+----|----+----|----+----|----+----|----+----|----+----8 !/ index: === character & char subset definitions === (char` _char`) - distinguishing atom for a syntax-extension character (char _char) - set of (8-bit) characters (charseq (_chars_) - sequence of characters (pchar _pchar) - set of printable characters (x20..x7E) (ucltr _ucltr) - uppercase letter ('A' - 'Z') (lcltr _lcltr) - lowercase letter ('a' - 'z') (ltr _ltr) - upper- or lower-case letter (/ucltr _/ucltr) - a non-uppercase-letter char expr (/lcltr _/lcltr) - a non-lowercase-letter char expr /ltr - char expr that is not a letter (write _chex _chstr) - write nested char expr to a flat string (lc>uc _lclex _uclex) - map lowercase letters to uppercase (in nested ch expr) (uc>lc _uclex _lclex) - map uppercase " lowercase " === symbol utilities === (sym` _sym`) - distinguishing atom for a syntax-extension symbol (sym _sym) - set of syntax-extension symbols (*pred _argseqs_) - apply predicate to seq of tuples formed from arg seqs === decimal number representation === (dig _digit) - decimal digit char ('0'..'9') (/dig _/dig) - a non-digit char expr (dec>nat _digseq _natnum) - map digit sequence to its natural number value (nat>dec _nat _digseq) - map natural num to digit seq ("0" for zero) (dsym>nat _dsym _nat) - map decimal digit symbol to natural number (nat>dsym _nat _dsym) - map natural number to digit symbol (sel_dstr _seq _el) - select sequence element given decimal dig string index (el>ix1 _grps _eldig) - map seq of elem groups to elem-to-1-digit-index map (skip _dsym _f) - skip _dsym iargs, then apply _f to remaining args (repl _dsym _uf) - replace iarg 0 seq with unary fn applied to elem [_dsym] (ins _dsym _expr _seq _seq') - insert expr into seq before elem [_dsym] !\ !---+----|----+----|----+----|----+----|----+----|----+----|----+----|----+----8 ! ======== character & char subset definitions ======== ! (char` _char`) - distinguishing atom for a syntax-extension character ! - alt: char_hdr (char` `char). ! we hide the actual atom here! ! Alternative def: ! Since an implementation parser supports syntax-extension characters, ! we can get the character header atom from the parser as follows: (char` %char_hdr)< (== (%char_hdr %bits) 'A'). ! Thus, we should check to see that the parser is generating the atom ! we are expecting: `OK_char_atom < (== `char %char_hdr). ! if `OK_char_atom not valid, ! ... we have an error!! ! -- This type of check could be part of other axiom tests. ! (char _char) - set of syntax-extension characters (which have 8 bits) (char (%char` %8bits))< (char` %char`), (bitseq %8bits), (=l %8bits (7 6 5 4 3 2 1 0)). ! Note that <, <= ordering between chars derives from bit ordering. ! Do we need to define ordering between char and char sequence? ! -- No, for now. ! -- (Actually, the bit ordering axioms will make a null char sequence () ! less than the sequence for a char (_char_atom _8bits). ! -- Could this be a problem???) ! (charseq (_chars_) - set of character sequences (def charseq (* char)). ! - uses higher-order forms def and * ! (pchar _pchar) - set of printable (ASCII) characters (x20..x7E) (pchar (%char` %8bits))< ! printable char% (char (%char` %8bits)), ! get char atom and its 8 bits (bits %0 %1), ! get 0,1 bit atoms (<= (%0 %0 %1 %0 %0 %0 %0 %0) %8bits), ! x20.. (<= %8bits (%0 %1 %1 %1 %1 %1 %1 %0)). ! ..x7E !z (1val (== %8bits (%0 %0 %1 $x)) ! x20..3F !z (== %8bits (%0 %1 $y))). ! x40..7F !z ! -- this covers ASCII 20..7F (but we should probably exclude 7F) ! (ucltr _ucltr) - uppercase letter ('A' - 'Z') (alt: ultr) (ucltr %ul)< (char %ul), (<= 'A' %ul), (<= %ul 'Z'). ! -- These axioms assume that the implementation parser uses ASCII ! for the syntax-extension characters! (May need to be confirmed!) ! (lcltr _lcltr) - lowercase letter ('a' - 'z') (alt: lltr) (lcltr %ll)< (char %ll), (<= 'a' %ll), (<= %ll 'z'). ! (ltr _ltr) - upper- or lower-case letter (ltr %l)< (1val (ucltr %l) (lcltr %l)). !or: (def ltr (1val ucltr lcltr)). ! (/ucltr _/ucltr) - char expr that is not an uppercase letter (alt: /ultr) ! (A character expression is a char or seq of >=0 char exprs.) (/ucltr %/ul)< (char %/ul), (1val (< %/ul 'A') (< 'Z' %/ul)). (/ucltr %chseq)< (charseq %chseq). ! char seq is not uppercase ltr ! (/lcltr _/lcltr) - char expr that is not a lowercase letter (alt: /lltr) (/lcltr %/ll)< (char %/ll), (1val (< %/ll 'a') (< 'z' %/ll)). (/lcltr %chseq)< (charseq %chseq). ! char seq is not lowercase ltr ! (/ltr _/ltr) - char expr that is not a letter (/ltr %/l)< (val (/ucltr %/l) (/lcltr %/l)). ! (write _chex _chstr) - write nested char expr to a flat char string (write %c (%c))< ! write single char to a singleton char seq (char %c). (write () ()). ! write seq of char exprs to a char string (write (% $) %chstr)< (write % %chstr0), (write ($) %chstr1), (cat %chstr0 %chstr1 %chstr). ! (lc>uc _lclex _uclex) - map lowercase letters to uppercase (in nested expr) (lc>uc (%char` (%b7 %b6 %1 $b)) (%char` (%b7 %b6 %0 $b))< ! lc -> uc (lcltr (%char` (%b7 %b6 %1 $b))), (bits %0 %1). (lc>uc %c %c)< ! non-lowercase-letter char is unchanged (char %c), (/lcltr %c). (lc>uc () ()). ! map nested char seqs (lc>uc (%c $c) (%c' $c'))< (lc>uc %c %c'), (lc>uc ($c) ($c')). ! (uc>lc _uclex _lclex) - map uppercase letters to lowercase (in nested expr) (uc>lc (%char` (%b7 %b6 %0 $b)) (%char` (%b7 %b6 %1 $b))< ! uc -> lc (ucltr (%char` (%b7 %b6 %0 $b))), (bits %0 %1). (uc>lc %c %c)< ! non-uppercase-letter char is unchanged (char %c), (/ucltr %c). (uc>lc () ()). ! map nested char seqs (uc>lc (%c $c) (%c' $c'))< (uc>lc %c %c'), (uc>lc ($c) ($c')). !---+----|----+----|----+----|----+----|----+----|----+----|----+----|----+----8 ! ======== symbol utilities ======== ! (sym` _sym`) - distinguishing atom for a syntax-extension symbol ! alt: sym_hdr (sym` `). ! alternative symbol atom: `sym ! -- See char` above for getting symbol atom from implementation parser! ! (sym _sym) - set of syntax-extension symbols (sym (%sym` (%c1)))< (sym` %sym`), (pchar %c1), ! 1st symbol char is printable char (/in %c1 " `%$'""!()") ! other than blank ` % $ ' " ! ( ) (sym (%sym` ($cstr %c)))< (sym (%sym` ($cstr))), (pchar %c), ! remaining symbol chars are printable chars (/in %c " ()"). ! other than blank ( ) ! (*pred _argseqs_) - predicate applies to all tuples formed from arg seqs ! - equivalent to ((* _pred) _argseqs_) ! ((%sym` ('*' $pname)) $nulls)< ! pred name begins with letter? - No! (sym` %sym`), (copies () ($nulls)). ((%sym` ('*' $pname)) $argseqs')< ((%sym` ($pname)) $args), ((%sym` ('*' $pname)) $argseqs), (distr ($args) ($argseqs) ($argseqs')). ! - don't need (* _pname) h-o form! *pname is more concise ! - for now we will keep both !---+----|----+----|----+----|----+----|----+----|----+----|----+----|----+----8 ! ======== decimal number representation ======== ! (dig _digit) - decimal digit char (finite_set dig "0123456789"). ! uses higher-order form ! -> (ord dig _dig _natk) - get digit num - defined from 'finite_set' !/ old: (dig %digit)< (char %digit), (<= '0' %digit), (<= %digit '9'). !\ -- we would still need to define the ord function ! (/dig _/dig) - a non-digit char expr (/dig %/dig)< (char %/dig), (1val (< %/dig '0') (< '9' %/dig)). (/dig %chseq)< (charseq %chseq). ! char seq is not a digit ! (dec>nat _digseq _natnum) - map decimal digit seq to its natural num value ! - null seq maps to zero; other seqs can have leading zeros (dec>nat () %0)< (zero %0). (dec>nat ($d %d) %n')< (dec>nat ($d) %n), (len (* * * * * * * * * *) %10), ! need nat num constant 10 ! - we can't use decimal 10 since that is what we're trying to define! (times %n %10 %10n), (ord dig %d %k), ! get "ordinal" value of a 'finite_set' element (plus %10n %k %n'). ! (nat>dec _nat _digseq) - map natural number to decimal digit seq ! - zero maps to "0", everything else has non-zero leading digit (nat>dec %z "0")< ! we don't use null string for decimal number repr (zero %z). (nat>dec %n (%d $d))< ! other decimal numbers have non-zero leading digit (dec>nat (%d $d) %n), (< '0' %d). ! (dsym>nat _dsym _nat) - map decimal digit symbol to natural number (dsym>nat (%sym` %dstr) %n)< (sym` %sym`), (dec>nat %dstr %n). ! Note that null-string symbol maps to zero, but that shouldn't cause prob. ! (nat>dsym _nat _dsym) - map natural number to decimal digit symbol ! - zero maps to 0, everything else has non-zero leading digit (nat>dsym %n (%sym` %dstr))< (sym` %sym`), (nat>dec %n %dstr). !/ --- old --- - use 'sel_dstr' instead! ! (_dsym _seq _elem) - select element from seq given decimal symbol index 0.. ! - the decimal symbol itself is the selection function (%dsym %seq %elem)< ! decimal symbol is the selection function name! (dsym>nat %dsym %nat), ! get natural number value of decimal symbol (sel# %nat %seq %elem). ! select sequence element given nat num index !\ ! (sel_dstr _seq _el) - select sequence element given decimal dig string index ! ex: (sel2 (a b c d) c) - note 0,1,.. indexing ! alt: sel#_dstr, #_dstr, _dstr (%sym` ('sel' $dstr)) %seq %elem)< (sym` %sym`), ! get symbol atom (dec>nat ($dstr) %i), ! get decimal index as natural number i (sel# %i %seq %el). ! select ith element ! *** tbd: Do all the ...# index fns with dec dig str index in the fn symbol! ! (el>ix1 _grps _eldig) - map seq of elem groups to elem-to-1-digit-index map ! example: ! (el>ix1 ("ab" "c" "def") ! (('a' '0') ('b' '0') ('c' '1') ('d' '2') ('e' '2') ('f' '2'))) ! -- works for <=10 groups ! -- apply to phonecode example! (def el>ix1 ! fn applied to above example ... ! ("ab" "c" "def") (dup ! duplicate arg ! ("ab" "c" "def") ("ab" "c" "def") len ! get length of 1st arg ! _n3 ("ab" "c" "def") iota0n- ! get indexing numbers 0 up to 3 ! (_n0 _n1 _n2) ("ab" "c" "def") *nat>dec ! convert natural numbers to decimal digit strings ! ("0" "1" "2") ("ab" "c" "def") *sel0 ! get leading digit of decimal digit strings (works for <=10 nums) ! ('0' '1' '2') ("ab" "c" "def") (* (.- join)) ! join each digit with each element of its group sequence ! ((('0' 'a') ('0' 'b')) (('1' 'c')) (('2' 'd') ('2' 'e') ('2' 'f'))) ! - note that digit-letter mappings are still in groups cat* ! catenate groups into a flat map ! (('0' 'a') ('0' 'b') ('1' 'c') ('2' 'd') ('2' 'e') ('2' 'f')) *rev ! reverse to get letter->digit map ! (('a' '0') ('b' '0') ('c' '1') ('d' '2') ('e' '2') ('f' '2')) ! - function result ) ). ! (skip _dsym _f) - skip _dsym iargs, then apply _f to remaining args ! - This skip operation is used within a function composition. (((skip %dsym %f) $f) $iaskip $args)< (dsym>nat %dsym %n), (len ($iaskip) %n), ! skip n input args ((%f $f) $args). ! then apply fn to remaining args in composition ! (repl _dsym _uf) - replace iarg0 seq with unary fn applied to elem [_dsym] (((repl %dsym %uf) $f) ($pre %ex $suf) $args)< (dsym>nat %dsym %n), (len ($pre) %n), (%uf %ex %ex'), ! apply unary fn to nth elem of 1st arg (($f) ($pre %ex' $suf) $args). ! replace that elem with result ! (ins _dsym _expr _seq _seq') - insert expr into seq before elem [_dsym] (((ins %dsym %expr ($pre $suf) ($pre %expr $suf))< ! insert expr into seq (dsym>nat %dsym %n), (len ($pre) %n).