(* Wojciech Muła, $Id: mystring.ml,v 1.11 2006-06-04 08:15:13 wojtek Exp $ *) (** Makes conversion table (use with function {!translate}). Conversion table tells how to translate charset into another one. Charsets are defined by strings - set1 and set2; char from set1 is changed to char from set2 at the same position (Sets are trimmed to the same length.) For example: let lowercase = "abcdefghijklmnopqrstuvxyz" and uppercase = "ABCDEFGHIJKLMNOPQRSTUVXYZ" let toupper_conv = make_conv_table lowercase uppercase *) let make_conv_table set1 set2 = let l = min (String.length set1) (String.length set2) and a = Array.make 256 ' ' in for i = 0 to 255 do a.(i) <- Char.chr i done; for i = 0 to l-1 do a.(Char.code set1.[i]) <- set2.[i] done; a (** Translates string using precalculated conversion table. *) let translate convtbl str = let s = String.copy str in for i = 0 to String.length s - 1 do s.[i] <- convtbl.(Char.code s.[i]) done; s (** Translates single character using conversion table *) let translate_c convtbl c = convtbl.(Char.code c) (** Translates string (conversion is defined with two charsets, see make_conv_table) *) let translate2 set1 set2 str = translate (make_conv_table set1 set2) str let lower' = "abcdefghijklmnopqrstuvxyz" and upper' = "ABCDEFGHIJKLMNOPQRSTUVXYZ" and digit' = "0123456789" let toupper_tbl = make_conv_table lower' upper' let tolower_tbl = make_conv_table upper' lower' let swapcase_tbl = make_conv_table (upper'^lower') (lower'^upper') (** *) let toupper = translate toupper_tbl let tolower = translate tolower_tbl let swapcase = translate swapcase_tbl let toupper_c = translate_c toupper_tbl let tolower_c = translate_c tolower_tbl let swapcase_c = translate_c swapcase_tbl let make_cset set = let a = Array.make 256 false in for i = 0 to String.length set - 1 do a.(Char.code set.[i]) <- true done; a let in_cset set c = set.(Char.code c) let in_cset_s set str = let n = String.length str in let rec aux i = if i=n then false else (set.(Char.code str.[i])) && aux (i+1) in aux 0 let alpha = make_cset (lower' ^ upper') and alnum = make_cset (lower' ^ upper' ^ digit') and blank = make_cset " \t" and cntrl = (let a = Array.make 256 false in for i=0 to 31 do a.(i) <- true; done; a.(127) <- true; a) and digit = make_cset digit' and graph = (let a = Array.make 256 false in for i=33 to 126 do a.(i) <- true; done; a) and lower = make_cset lower' and upper = make_cset upper' and punct = make_cset "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" and print = (let a = Array.make 256 false in for i=32 to 126 do a.(i) <- true; done; a) and space = make_cset " \t\r\n\014\013" and xdigit = make_cset (digit' ^ "abcdefABCDEF") let isalpha c = alpha.(Char.code c) let isalnum c = alnum.(Char.code c) let isblank c = blank.(Char.code c) let iscntrl c = cntrl.(Char.code c) let isdigit c = digit.(Char.code c) let isgraph c = graph.(Char.code c) let islower c = lower.(Char.code c) let isupper c = upper.(Char.code c) let ispunct c = punct.(Char.code c) let isprint c = print.(Char.code c) let isspace c = space.(Char.code c) let isxdigit c = xdigit.(Char.code c) let isalpha_s = in_cset_s alpha let isalnum_s = in_cset_s alnum let isblank_s = in_cset_s blank let iscntrl_s = in_cset_s cntrl let isdigit_s = in_cset_s digit let isgraph_s = in_cset_s graph let islower_s = in_cset_s lower let isupper_s = in_cset_s upper let ispunct_s = in_cset_s punct let isprint_s = in_cset_s print let isspace_s = in_cset_s space let isxdigit_s = in_cset_s xdigit (* returns true if str1 is prefix of str2 startswith "GNU" "GNU/Linux" = true startswith "GUN" "GNU/Linux" = false *) let startswith str1 str2 = let l1 = String.length str1 and l2 = String.length str2 in if l2 = l1 then str1=str2 else if l2 < l1 then false else let rec aux i = if i=l1 then true else (print_endline (string_of_int i); str1.[i] = str2.[i] && aux (i+1)) in aux 0 (* returns true if str1 is postfix of str2 endswith "ogg" "music.ogg" = true endswith "mp3" "music.ogg" = false *) let endswith str1 str2 = let l1 = String.length str1 and l2 = String.length str2 in if l2 = l1 then str1=str2 else if l2 < l1 then false else let k = l2 - l1 in let rec aux i = if i=l1 then true else (print_endline (string_of_int i); str1.[i] = str2.[k+i] && aux (i+1)) in aux 0 let lstrip f s = let n = String.length s in let i = ref 0 in while !i < n && f s.[!i] do incr i done; if !i = n then String.copy "" else String.sub s !i (n - !i) (* strips characters (at beginning of string) that belongs to set (set is a string) lstrip_s "_?" "__?___?_some text___?___?" = "some text___?___?" *) let lstrip_s set s = let a = make_cset set in lstrip (in_cset a) s (* strips characters c (at beginning of string) lstrip_c '_' "__?___?_some text___?___?" = "?___?_some text___?___?" *) let lstrip_c c s = lstrip (fun x -> x=c) s let rstrip f s = let n = String.length s in let i = ref (n-1) in while !i > 0 && f s.[!i] do decr i done; if !i = 0 then String.copy "" else String.sub s 0 (!i+1) (* strips characters (at end of string) that belongs to set (set is a string) rstrip_s "_?" "__?___?_some text___?___?" = "__?___?_some text" *) let rstrip_s set s = let a = make_cset set in rstrip (in_cset a) s (* strips character c (at end of string) that belongs to set (set is a string) rstrip_c '_' "__?___?_some text___?___" = "__?___?_some text___?" *) let rstrip_c c s = rstrip (fun x -> x=c) s let strip f s = let n = String.length s in let i = ref 0 in while !i < n && f s.[!i] do incr i done; if !i = n then String.copy "" else ( let j = ref (n-1) in while !j > !i && f s.[!j] do decr j done; String.sub s !i (!j - !i + 1) ) (* strips characters (at begginngin & end of string) that belongs to set (set is a string) strip_s "_?" "__?___?_some text___?___?" = "some text" *) let strip_s set s = let a = make_cset set in strip (in_cset a) s (* strips character c (at begginning & end of string) strip_c '_' "__?___?_some text___?___" = "?___?_some text___?" *) let strip_c c s = strip (fun x -> x=c) s (* splits str into list of strings; whitespaces are defined by function f of type char -> bool see: split_c, split_s *) let split f str = let first = ref 0 and last = ref 0 and tokens = ref [] and intoken = ref false in for i = 0 to String.length str - 1 do if f str.[i] then begin if !intoken = true then begin let s = String.sub str (!first) (!last - !first + 1) in tokens := s::!tokens end; first := i; last := i; intoken := false; end else begin if !intoken = false then first := i; intoken := true; last := i; end; done; List.rev (if !intoken = true then let s = String.sub str (!first) (!last - !first + 1) in s::!tokens else !tokens) (* splits string into list of strings separator is a single char # split_c ' ' " Ocaml does it better " ["Ocaml"; "does"; "it"; "better"] *) let split_c chr str = split (fun c -> c=chr) str (* splits string into list of strings separators is a set of chars (strings) # split_s ".,?!-_" ".-,Ocaml-?-does_!.-it-...!_better,._!" ["Ocaml"; "does"; "it"; "better"] *) let split_s set str = let a = make_cset set in split (in_cset a) str (* splits str into at most n strings; whitespaces are defined by function f of type char -> bool see: split_c, split_s *) let splitn f str n = assert(n > -1); let first = ref 0 and last = ref 0 and tokens = ref [] and intoken = ref false and splitted = ref 0 in for i = 0 to String.length str - 1 do if !splitted < n+1 then ( if f str.[i] then begin if !intoken = true then begin let s = String.sub str (!first) (!last - !first + 1) in tokens := s::(!tokens); end; first := i; last := i; intoken := false; end else begin if !intoken = false then ( first := i; incr splitted; ); intoken := true; last := i; end; ) else intoken := true done; List.rev (if !intoken = true then let s = String.sub str (!first) (String.length str - !first) in s::(!tokens) else !tokens) let splitn_c c s n = splitn (fun x -> x=c) s n let splitn_s set s n = let a = make_cset set in splitn (in_cset a) s n (* expands tabs *) let expandtabs str tabsize = let l = ref [] and len = ref 0 and first = ref 0 and n = String.length str in for i = 0 to n - 1 do if str.[i] = '\t' then begin let s1 = String.sub str !first (i - !first) in let l1 = String.length s1 in len := !len + l1; let s2 = String.make (tabsize - !len mod tabsize) ' ' in let l2 = String.length s2 in l := !l @ [s1; s2]; len := !len + l2; first := i + 1; end done; if !first < n then String.concat "" (!l @ [String.sub str !first (n - !first)]) else String.concat "" !l (* str1[0:n] = str2[f:f+n] *) let cmp str1 str2 f n = try for i = 0 to n-1 do if str1.[i] <> str2.[i+f] then raise Not_found done; true with Not_found -> false exception Found of int (* returns index of first occurence str1 in str2 Karp-Rabin algorithm *) let strstr str1 str2 = let l1 = String.length str1 and l2 = String.length str2 in if l1 = l2 then (if str1 = str2 then 0 else -1) else if l1 > l2 then -1 else if l2 = 0 then -1 else if l1 = 0 then 0 else let calcsum str n = let sum = ref 0 in for i = 0 to n-1 do sum := !sum + Char.code str.[i] done; !sum in let h1 = calcsum str1 l1 and h2 = ref (calcsum str2 l1) in try if (!h2 = h1) && (cmp str1 str2 0 l1 = true) then raise (Found 0); for i = 1 to l2-l1 do h2 := !h2 + (Char.code str2.[i+l1-1]) - (Char.code str2.[i-1]); if !h2 = h1 && cmp str1 str2 i l1 = true then raise (Found i); done; -1 with Found i -> i (* returns index of all occurences str1 in str2 *) let strstrall str1 str2 = let l1 = String.length str1 and l2 = String.length str2 in if l1 = l2 then (if str1 = str2 then [0] else []) else if l1 > l2 then [] else if l2 = 0 then [] else if l1 = 0 then [] else let calcsum str n = let sum = ref 0 in for i = 0 to n-1 do sum := !sum + Char.code str.[i] done; !sum in let h1 = calcsum str1 l1 and h2 = ref (calcsum str2 l1) and l = ref [] in if (!h2 = h1) && (cmp str1 str2 0 l1 = true) then l := 0 :: !l; for i = 1 to l2-l1 do h2 := !h2 + (Char.code str2.[i+l1-1]) - (Char.code str2.[i-1]); if !h2 = h1 && cmp str1 str2 i l1 = true then l := i :: !l; done; !l let filter f s = let n = String.length s in let s' = String.create n and k = ref 0 in for i = 0 to n-1 do if f s.[i] then ( s'.[ !k ] <- s.[i]; incr k; ); done; String.sub s' 0 !k let map f s = let n = String.length s in let s' = String.create n and k = ref 0 in for i = 0 to n-1 do s'.[ !k ] <- f s.[i]; incr k; done; s' let fold_left f init s = let n = String.length s in let rec aux acc i = if i=n then acc else aux (f acc s.[i]) (i+1) in aux init 0 let concat' sl = let n = List.fold_left (fun n s -> n + String.length s) 0 sl in let s = String.create n in let fn i s' = let l = String.length s' in String.blit s' 0 s i l; i+l; in ignore(List.fold_left fn 0 sl); s