|
Ocaml/FAQ/File Access
Материал из Wiki.crossplatform.ru
[править] 7. File Access
[править] Introduction
#load "str.cma";;
(* Print all lines that contain the word "blue" in the input file
/usr/local/widgets/data to stdout. *)
let () =
let in_channel = open_in "/usr/local/widgets/data" in
try
while true do
let line = input_line in_channel in
try
ignore (Str.search_forward (Str.regexp_string "blue") line 0);
print_endline line
with Not_found -> ()
done
with End_of_file ->
close_in in_channel
(*-----------------------------*)
let () =
let regexp = Str.regexp ".*[0-9]" in
try
while true do
(* reads from stdin *)
let line = input_line stdin in
(* writes to stderr *)
if not (Str.string_match regexp line 0)
then prerr_endline "No digit found.";
(* writes to stdout *)
Printf.printf "Read: %s\n" line;
flush stdout
done
with End_of_file ->
close_out stdout
(*-----------------------------*)
(* Write to an output file the usual way. *)
let () =
let logfile = open_out "/tmp/log" in
output_string logfile "Countdown initiated...\n";
close_out logfile;
print_endline "You have 30 seconds to reach minimum safety distance."
(* Write to an output file using redirection. *)
#load "unix.cma";;
let () =
let logfile = open_out "/tmp/log" in
let old_descr = Unix.dup Unix.stdout in
(* switch to logfile for output *)
Unix.dup2 (Unix.descr_of_out_channel logfile) Unix.stdout;
print_endline "Countdown initiated...";
(* return to original output *)
Unix.dup2 old_descr Unix.stdout;
print_endline "You have 30 seconds to reach minimum safety distance."
[править] Opening a File
(* open file "path" for reading only *)
let source =
try open_in path
with Sys_error msg -> failwith ("Couldn't read from " ^ msg)
(* open file "path" for writing only *)
let sink =
try open_out path
with Sys_error msg -> failwith ("Couldn't write to " ^ msg)
(*-----------------------------*)
#load "unix.cma";;
(* open file "path" for reading only *)
let source =
try Unix.openfile path [Unix.O_RDONLY] 0o644
with Unix.Unix_error (code, func, param) ->
failwith (Printf.sprintf "Couldn't open %s for reading: %s"
path (Unix.error_message code))
(* open file "path" for writing only *)
let sink =
try Unix.openfile path [Unix.O_WRONLY; Unix.O_CREAT] 0o644
with Unix.Unix_error (code, func, param) ->
failwith (Printf.sprintf "Couldn't open %s for writing: %s"
path (Unix.error_message code))
(*-----------------------------*)
(* open file "path" for reading and writing *)
let fh =
try Unix.openfile filename [Unix.O_RDWR] 0o644
with Unix.Unix_error (code, func, param) ->
failwith (Printf.sprintf "Couldn't open %s for read and write: %s"
filename (Unix.error_message code))
(*-----------------------------*)
(* open file "path" read only *)
let fh = open_in path
let fh = Unix.openfile path [Unix.O_RDONLY] 0o644
(*-----------------------------*)
(* open file "path" write only, create it if it does not exist *)
let fh = open_out path
let fh = Unix.openfile path [Unix.O_WRONLY; Unix.O_TRUNC; Unix.O_CREAT] 0o600
(*-----------------------------*)
(* open file "path" write only, fails if file exists *)
let fh = Unix.openfile path [Unix.O_WRONLY; Unix.O_EXCL; Unix.O_CREAT] 0o600
(*-----------------------------*)
(* open file "path" for appending *)
let fh =
open_out_gen [Open_wronly; Open_append; Open_creat] 0o600 path
let fh =
Unix.openfile path [Unix.O_WRONLY; Unix.O_APPEND; Unix.O_CREAT] 0o600
(*-----------------------------*)
(* open file "path" for appending only when file exists *)
let fh = Unix.openfile path [Unix.O_WRONLY; Unix.O_APPEND] 0o600
(*-----------------------------*)
(* open file "path" for reading and writing *)
let fh = Unix.openfile path [Unix.O_RDWR] 0o600
(*-----------------------------*)
(* open file "path" for reading and writing,
create a new file if it does not exist *)
let fh = Unix.openfile path [Unix.O_RDWR; Unix.O_CREAT] 0o600
(*-----------------------------*)
(* open file "path" for reading and writing, fails if file exists *)
let fh = Unix.openfile path [Unix.O_RDWR; Unix.O_EXCL; Unix.O_CREAT] 0o600
[править] Opening Files with Unusual Filenames
(* Nothing different needs to be done with OCaml *)
[править] Expanding Tildes in Filenames
#load "str.cma";;
#load "unix.cma";;
let expanduser =
let regexp = Str.regexp "^~\\([^/]*\\)" in
let replace s =
match Str.matched_group 1 s with
| "" ->
(try Unix.getenv "HOME"
with Not_found ->
(try Unix.getenv "LOGDIR"
with Not_found ->
(Unix.getpwuid (Unix.getuid ())).Unix.pw_dir))
| user -> (Unix.getpwnam user).Unix.pw_dir in
Str.substitute_first regexp replace
(*-----------------------------*)
~user
~user/blah
~
~/blah
[править] Making Perl Report Filenames in Errors
#load "unix.cma";;
open Unix
(* Raises an exception on failure. *)
let file = openfile filename [ O_RDONLY ] 0o640
exception ErrString of string
let file =
try openfile filename [ O_RDONLY ] 0o640
with Unix_error (e, f, n) ->
raise (ErrString
(Printf.sprintf "Could not open %s for read: %s"
n (error_message e)))
[править] Creating Temporary Files
(* Open a new temporary file for writing. Filename.open_temp_file
safeguards against race conditions and returns both the filename
and an output channel. *)
let name, out_channel = Filename.open_temp_file "prefix-" ".suffix"
(* Install an at_exit handler to remove the temporary file when this
program exits. *)
let () = at_exit (fun () -> Sys.remove name)
(*-----------------------------*)
#load "unix.cma";;
let () =
(* Open a temporary file for reading and writing. *)
let name = Filename.temp_file "prefix-" ".suffix" in
let descr = Unix.openfile name [Unix.O_RDWR] 0o600 in
(* Write ten lines of output. *)
let out_channel = Unix.out_channel_of_descr descr in
for i = 1 to 10 do
Printf.fprintf out_channel "%d\n" i
done;
flush out_channel;
(* Seek to the beginning and read the lines back in. *)
let in_channel = Unix.in_channel_of_descr descr in
seek_in in_channel 0;
print_endline "Tmp file has:";
let rec loop () =
print_endline (input_line in_channel);
loop () in
try loop() with End_of_file -> ();
(* Close the underlying file descriptor and remove the file. *)
Unix.close descr;
Sys.remove name
[править] Storing Files Inside Your Program Text
#load "str.cma";;
let main data =
List.iter
(fun line ->
(* process the line *)
())
(Str.split (Str.regexp "\n") data)
let () = main "\
your data goes here
"
[править] Writing a Filter
#load "str.cma";;
let parse_args () =
match List.tl (Array.to_list Sys.argv) with
| [] -> ["-"]
| args -> args
let run_filter func args =
List.iter
(fun arg ->
let in_channel =
match arg with
| "-" -> stdin
| arg -> open_in arg in
try
begin
try
while true do
func (input_line in_channel)
done
with End_of_file -> ()
end;
close_in in_channel
with e ->
close_in in_channel;
raise e)
args
let () =
run_filter
(fun line ->
(* do something with the line *)
())
(parse_args ())
(*-----------------------------*)
(* arg demo 1: Process optional -c flag *)
let chop_first = ref false
let args =
match parse_args () with
| "-c" :: rest -> chop_first := true; rest
| args -> args
(* arg demo 2: Process optional -NUMBER flag *)
let columns = ref None
let args =
match parse_args () with
| arg :: rest
when Str.string_match (Str.regexp "^-\\([0-9]+\\)$") arg 0 ->
columns := Some (int_of_string (Str.matched_group 1 arg));
rest
| args -> args
(* arg demo 3: Process clustering -a, -i, -n, or -u flags *)
let append = ref false
let ignore_ints = ref false
let nostdout = ref false
let unbuffer = ref false
let args =
let rec parse_flags = function
| "" -> ()
| s ->
(match s.[0] with
| 'a' -> append := true
| 'i' -> ignore_ints := true
| 'n' -> nostdout := true
| 'u' -> unbuffer := true
| _ ->
Printf.eprintf "usage: %s [-ainu] [filenames] ...\n"
Sys.argv.(0);
flush stderr;
exit 255);
parse_flags (String.sub s 1 (String.length s - 1)) in
List.rev
(List.fold_left
(fun acc ->
function
| "" -> acc
| s when s.[0] = '-' ->
parse_flags (String.sub s 1 (String.length s - 1));
acc
| arg -> arg :: acc)
[]
(parse_args ()))
(*-----------------------------*)
(* findlogin - print all lines containing the string "login" *)
let () =
run_filter
(fun line ->
if Str.string_match (Str.regexp ".*login.*") line 0
then print_endline line)
(parse_args ())
(*-----------------------------*)
(* lowercase - turn all lines into lowercase *)
let () =
run_filter
(fun line -> print_endline (String.lowercase line))
(parse_args ())
(*-----------------------------*)
(* countchunks - count how many words are used *)
let chunks = ref 0
let () =
run_filter
(fun line ->
if line <> "" && line.[0] == '#'
then ()
else chunks := !chunks
+ List.length (Str.split (Str.regexp "[ \t]+") line))
(parse_args ());
Printf.printf "Found %d chunks\n" !chunks
[править] Modifying a File in Place with Temporary File
(* Modify a file in place. *)
let modify func old new' =
let old_in = open_in old in
let new_out = open_out new' in
begin
try
while true do
let line = input_line old_in in
func new_out line
done
with End_of_file -> ()
end;
close_in old_in;
close_out new_out;
Sys.rename old (old ^ ".orig");
Sys.rename new' old
(* Insert lines at line 20. *)
let () =
let count = ref 0 in
modify
(fun out line ->
incr count;
if !count = 20
then (output_string out "Extra line 1\n";
output_string out "Extra line 2\n");
output_string out line;
output_string out "\n")
old new'
(* Delete lines 20..30. *)
let () =
let count = ref 0 in
modify
(fun out line ->
incr count;
if !count < 20 || !count > 30
then (output_string out line;
output_string out "\n"))
old new'
[править] Modifying a File in Place with -i Switch
(* An equivalent of Perl's -i switch does not exist in OCaml. *)
[править] Modifying a File in Place Without a Temporary File
#load "str.cma";;
#load "unix.cma";;
(* Modify a file in place. *)
let modify func file =
let in' = open_in file in
let lines = ref [] in
begin
try
while true do
let line = input_line in' in
lines := func line :: !lines
done
with End_of_file -> ()
end;
close_in in';
let lines = List.rev !lines in
let out = open_out file in
List.iter
(fun line ->
output_string out line;
output_string out "\n")
lines;
close_out out
(* Replace DATE with the current date. *)
let () =
let tm = Unix.localtime (Unix.time ()) in
let date = Printf.sprintf "%02d/%02d/%04d"
(tm.Unix.tm_mon + 1)
tm.Unix.tm_mday
(tm.Unix.tm_year + 1900) in
modify
(Str.global_replace (Str.regexp "DATE") date)
infile
[править] Locking a File
#load "unix.cma";;
let descr = Unix.openfile path [Unix.O_RDWR] 0o664
let () =
Unix.lockf descr Unix.F_LOCK 0;
(* update file, then ... *)
Unix.close descr
let () =
try Unix.lockf descr Unix.F_TLOCK 0
with Unix.Unix_error (error, _, _) ->
Printf.eprintf
"can't immediately write-lock the file (%s), blocking ...\n"
(Unix.error_message error);
flush stderr;
Unix.lockf descr Unix.F_LOCK 0
(*-----------------------------*)
#load "unix.cma";;
let descr = Unix.openfile "numfile" [Unix.O_RDWR; Unix.O_CREAT] 0o664
let () =
Unix.lockf descr Unix.F_LOCK 0;
(* Now we have acquired the lock, it's safe for I/O *)
let num =
try int_of_string (input_line (Unix.in_channel_of_descr descr))
with _ -> 0 in
ignore (Unix.lseek descr 0 Unix.SEEK_SET);
Unix.ftruncate descr 0;
let out = Unix.out_channel_of_descr descr in
output_string out (string_of_int (num + 1));
output_string out "\n";
flush out;
Unix.close descr
[править] Flushing Output
(* OCaml automatically flushes after calling these functions: *)
let () =
print_endline "I get flushed.";
print_newline (); (* Me too! *)
prerr_endline "So do I.";
prerr_newline () (* As do I. *)
(* The Printf functions allow a format specifier of "%!" to trigger
an immediate flush. *)
let () = Printf.printf "I flush %s%! and %s!\n%!" "here" "there"
(*-----------------------------*)
(* seeme - demo stdio output buffering *)
#load "unix.cma";;
let () =
output_string stdout "Now you don't see it...";
Unix.sleep 2;
print_endline "now you do"
(*-----------------------------*)
(* A channel can be explicitly flushed: *)
let () = flush stderr
(* All channels can be flushed at once (errors are ignored): *)
let () = flush_all ()
(* Closing a channel flushes automatically: *)
let () =
output_string stdout "I get written.\n";
close_out stdout
(* Calls to exit result in a flush_all, and exit is always called at
termination even if an error occurs. *)
let () =
output_string stderr "Bye!\n";
exit 0
[править] Reading from Many Filehandles Without Blocking
#load "unix.cma";;
let () =
(* list all file descriptors to poll *)
let readers = [file_descr1; file_descr2; file_descr3] in
let ready, _, _ = Unix.select readers [] [] 0.0 in
(* input waiting on the filehandles in "ready" *)
()
(*-----------------------------*)
let () =
let in_channel = Unix.in_channel_of_descr file_descr in
let found, _, _ = Unix.select [file_descr] [] [] 0.0 (* just check *) in
match found with
| [] -> ()
| _ ->
let line = input_line in_channel in
Printf.printf "I read %s\n%!" line
[править] Doing Non-Blocking I/O
#load "unix.cma";;
(* Pass the O_NONBLOCK flag when calling Unix.openfile. *)
let file_descr =
try Unix.openfile "/dev/cua0" [Unix.O_RDWR; Unix.O_NONBLOCK] 0o666
with Unix.Unix_error (code, func, param) ->
Printf.eprintf "Can't open modem: %s\n" (Unix.error_message code);
exit 2
(*-----------------------------*)
(* If the file descriptor already exists, use Unix.set_nonblock. *)
let () = Unix.set_nonblock file_descr
(*-----------------------------*)
(* In non-blocking mode, calls that would block throw exceptions. *)
let () =
let chars_written =
try
Some (Unix.single_write file_descr buffer 0 (String.length buffer))
with
| Unix.Unix_error (Unix.EAGAIN, _, _)
| Unix.Unix_error (Unix.EWOULDBLOCK, _, _) -> None in
match chars_written with
| Some n when n = String.length buffer ->
(* successfully wrote *)
()
| Some n ->
(* incomplete write *)
()
| None ->
(* would block *)
()
let () =
let chars_read =
try
Some (Unix.read file_descr buffer 0 buffer_size)
with
| Unix.Unix_error (Unix.EAGAIN, _, _)
| Unix.Unix_error (Unix.EWOULDBLOCK, _, _) -> None in
match chars_read with
| Some n ->
(* successfully read n bytes from file_descr *)
()
| None ->
(* would block *)
()
[править] Determining the Number of Bytes to Read
#load "unix.cma";;
(* OCaml does not expose the FIONREAD ioctl call. It's better to use
non-blocking reads anyway. There is the following function in
Pervasives which gives you the length of an input channel, but it
works by doing a seek so it only works on regular files: *)
let () =
let length = in_channel_length in_channel in
(* ... *)
()
[править] Storing Filehandles in Variables
(* Channels and file descriptors are ordinary, first-class values in
OCaml. No special contortions are necessary to store them in data
structures, pass them as arguments, etc. *)
[править] Caching Open Output Filehandles
module FileCache = struct
let isopen = Hashtbl.create 0
let maxopen = ref 16
let resize () =
if Hashtbl.length isopen >= !maxopen
then
begin
let newlen = !maxopen / 3 in
let items = ref [] in
Hashtbl.iter
(fun filename (chan, count) ->
items := (count, filename, chan) :: !items)
isopen;
let items = Array.of_list !items in
Array.sort compare items;
let pivot = Array.length items - newlen in
for i = 0 to Array.length items - 1 do
let (count, filename, chan) = items.(i) in
if i < pivot
then (close_out chan;
Hashtbl.remove isopen filename)
else (Hashtbl.replace isopen filename (chan, 0))
done
end
let output ?(mode=[Open_creat; Open_append]) ?(perm=0o640) filename data =
let (chan, count) =
try Hashtbl.find isopen filename
with Not_found ->
resize ();
(open_out_gen mode perm filename, 0) in
output_string chan data;
flush chan;
Hashtbl.replace isopen filename (chan, count + 1)
let close filename =
try
match Hashtbl.find isopen filename with (chan, _) ->
close_out chan;
Hashtbl.remove isopen filename
with Not_found -> ()
end
(*-----------------------------*)
(* splitwulog - split wuftpd log by authenticated user *)
#load "str.cma";;
let outdir = "/var/log/ftp/by-user"
let regexp = Str.regexp " "
let () =
try
while true do
let line = input_line stdin in
let chunks = Array.of_list (Str.split regexp line) in
let user = chunks.(Array.length chunks - 5) in
let path = Filename.concat outdir user in
FileCache.output path (line ^ "\n")
done
with End_of_file -> ()
[править] Printing to Many Filehandles Simultaneously
(* Save your channels in a list and iterate through them normally. *)
let () =
List.iter
(fun channel ->
output_string channel stuff_to_print)
channels
(* For convenience, you can define a helper function and use currying. *)
let write data channel = output_string channel data
let () = List.iter (write stuff_to_print) channels
(*-----------------------------*)
(* Open a pipe to "tee". Requires a Unix environment. *)
#load "unix.cma";;
let () =
let channel =
Unix.open_process_out "tee file1 file2 file3 >/dev/null" in
output_string channel "whatever\n";
ignore (Unix.close_process_out channel)
(*-----------------------------*)
(* Redirect standard output to a tee. *)
let () =
let reader, writer = Unix.pipe () in
match Unix.fork () with
| 0 ->
Unix.close writer;
Unix.dup2 reader Unix.stdin;
Unix.close reader;
Unix.execvp "tee" [| "tee"; "file1"; "file2"; "file3" |]
| pid ->
Unix.close reader;
Unix.dup2 writer Unix.stdout;
Unix.close writer
let () =
print_endline "whatever";
close_out stdout;
ignore (Unix.wait ())
[править] Opening and Closing File Descriptors by Number
(* An abstraction barrier exists between file descriptor numbers and
file_descr values, but Ocamlnet provides functions in the Netsys
module to map between the two. *)
#load "unix.cma";;
#directory "+netsys";;
#load "netsys.cma";;
(* Open the descriptor itself. *)
let file_descr = Netsys.file_descr_of_int fdnum
let in_channel = Unix.in_channel_of_descr file_descr
(* Open a copy of the descriptor. *)
let file_descr = Unix.dup (Netsys.file_descr_of_int fdnum)
let in_channel = Unix.in_channel_of_descr file_descr
(* After processing... *)
let () = close_in in_channel
[править] Copying Filehandles
#load "unix.cma";;
let () =
(* Take copies of the file descriptors. *)
let oldout = Unix.dup Unix.stdout in
let olderr = Unix.dup Unix.stderr in
(* Redirect stdout and stderr. *)
let output =
Unix.openfile
"/tmp/program.out"
[Unix.O_WRONLY; Unix.O_CREAT; Unix.O_TRUNC]
0o666 in
Unix.dup2 output Unix.stdout;
Unix.close output;
let copy = Unix.dup Unix.stdout in
Unix.dup2 copy Unix.stderr;
Unix.close copy;
(* Run the program. *)
ignore (Unix.system joe_random_process);
(* Close the redirected file handles. *)
Unix.close Unix.stdout;
Unix.close Unix.stderr;
(* Restore stdout and stderr. *)
Unix.dup2 oldout Unix.stdout;
Unix.dup2 olderr Unix.stderr;
(* Avoid leaks by closing the independent copies. *)
Unix.close oldout;
Unix.close olderr
[править] Program: netlock
drivelock.ml:
#!/usr/bin/ocaml
(* drivelock - demo LockDir module *)
#use "netlock.ml";;
let die msg = prerr_endline msg; exit 1
let () =
Sys.set_signal Sys.sigint
(Sys.Signal_handle (fun _ -> die "outta here"));
LockDir.debug := true;
let path =
try Sys.argv.(1)
with Invalid_argument _ ->
die ("usage: " ^ Sys.argv.(0) ^ " <path>") in
(try LockDir.nflock ~naptime:2 path
with LockDir.Error _ ->
die ("couldn't lock " ^ path ^ " in 2 seconds"));
Unix.sleep 100;
LockDir.nunflock path
(*-----------------------------*)
netlock.ml:
#load "unix.cma";;
(* module to provide very basic filename-level *)
(* locks. No fancy systems calls. In theory, *)
(* directory info is sync'd over NFS. Not *)
(* stress tested. *)
module LockDir :
sig
exception Error of string
val debug : bool ref
val check : int ref
val nflock : ?naptime:int -> string -> unit
val nunflock : string -> unit
end = struct
exception Error of string
let debug = ref false
let check = ref 1
module StringSet = Set.Make(String)
let locked_files = ref StringSet.empty
(* helper function *)
let name2lock pathname =
let dir = Filename.dirname pathname in
let file = Filename.basename pathname in
let dir = if dir = "." then Sys.getcwd () else dir in
let lockname = Filename.concat dir (file ^ ".LOCKDIR") in
lockname
let nflock ?(naptime=0) pathname =
let lockname = name2lock pathname in
let whosegot = Filename.concat lockname "owner" in
let start = Unix.time () in
let missed = ref 0 in
(* if locking what I've already locked, raise exception *)
if StringSet.mem pathname !locked_files
then raise (Error (pathname ^ " already locked"));
Unix.access (Filename.dirname pathname) [Unix.W_OK];
begin
try
while true do
try
Unix.mkdir lockname 0o777;
raise Exit
with Unix.Unix_error (e, _, _) ->
incr missed;
if !missed > 10
then raise (Error
(Printf.sprintf "can't get %s: %s"
lockname (Unix.error_message e)));
if !debug
then
begin
let owner = open_in whosegot in
let lockee = input_line owner in
close_in owner;
Printf.eprintf "%s[%d]: lock on %s held by %s\n%!"
Sys.argv.(0) (Unix.getpid ()) pathname lockee
end;
Unix.sleep !check;
if naptime > 0 && Unix.time () > start +. float naptime
then raise Exit
done
with Exit -> ()
end;
let owner =
try
open_out_gen [Open_wronly; Open_creat; Open_excl] 0o666 whosegot
with Sys_error e ->
raise (Error ("can't create " ^ e)) in
Printf.fprintf owner "%s[%d] on %s\n"
Sys.argv.(0) (Unix.getpid ()) (Unix.gethostname ());
close_out owner;
locked_files := StringSet.add pathname !locked_files
(* free the locked file *)
let nunflock pathname =
let lockname = name2lock pathname in
let whosegot = Filename.concat lockname "owner" in
Unix.unlink whosegot;
if !debug then Printf.eprintf "releasing lock on %s\n%!" lockname;
locked_files := StringSet.remove pathname !locked_files;
Unix.rmdir lockname
(* anything forgotten? *)
let () =
at_exit
(fun () ->
StringSet.iter
(fun pathname ->
let lockname = name2lock pathname in
let whosegot = Filename.concat lockname "owner" in
Printf.eprintf "releasing forgotten %s\n%!" lockname;
Unix.unlink whosegot;
Unix.rmdir lockname)
!locked_files)
end
[править] Program: lockarea
(* The "fcntl" system call is not available in the OCaml standard library.
You would have to drop down to C in order to lock regions of a file as
described in the original Perl recipe. *)
|