use bufio; use bytes; use fmt; use hash; use hash::fnv; use io; use os; use rt; use sort; use sort::cmp; use strings::{fromutf8_unsafe, split, toutf8}; use types; type data = struct { min: i64, max: i64, sum: i64, count: size }; fn i64parse(bytes: []u8) (i64, size) = { let index = 0z; let u: i64 = 0; let neg = 1; if (bytes[index] == '-') { neg = -1; index += 1; }; u = (bytes[index] - '0'): i64; index += 1; if (bytes[index] != '.') { u = u * 10 + (bytes[index] - '0'): i64; index += 1; }; index += 1; u = u * 10 + (bytes[index] - '0'): i64; index += 2; return (u * neg, index); }; @test fn parsei64() void = { assert(i64parse(toutf8("0.0")).0 == 0); assert(i64parse(toutf8("9.2")).0 == 92); assert(i64parse(toutf8("-9.2")).0 == -92); assert(i64parse(toutf8("98.2")).0 == 982); assert(i64parse(toutf8("-98.2")).0 == -982); }; fn mmap(fd: io::file) []u8 = { let sb = rt::st { ... }; rt::fstat(fd, &sb)!; let ptr = rt::mmap(null, sb.sz, rt::PROT_READ, rt::MAP_PRIVATE, fd, 0)!; // Hardcoded value for MADV_SEQUENTIAL, from libc-rs it seems like it's // 2 on all currently supported platforms (Linux, FreeBSD, NetBSD, // OpenBSD, Dragonfly). assert(rt::syscall(rt::SYS_madvise, ptr: uintptr: u64, sb.sz: u64, 2) == 0); return *(&types::slice { data = ptr, length = sb.sz, capacity = 0 }: *[]u8); }; fn free_mmap(map: []u8) void = { let map = &map: *types::slice; rt::munmap(map.data: *opaque, map.length)!; }; fn names_cmp(a: *opaque, b: *opaque) int = { const a = *(a: *(u64, str)); const b = *(b: *(u64, str)); return cmp::strs(&a.1, &b.1); }; export fn main() void = { let map: hashmap = [("", data { ... })...]; if (len(os::args) != 2) fmt::fatalf("usage: {} ", os::args[0]); const handle = os::open(os::args[1])!; defer io::close(handle)!; const fmap = mmap(handle); free_mmap(fmap); let buf: *[1024 * 1024]u8 = alloc([0...])!; let read_start = 0z; let names_buf: *[10000](u64, str) = alloc([(0, "")...])!; let names = names_buf[..0]; let fnv = fnv::fnv64a(); for (true) { const n = match (io::read(handle, buf[read_start..])) { case let n: size => yield n; case io::EOF => break; }; if (read_start + n == 0) break; let chunk = buf[..read_start + n]; let newline = match(bytes::index(chunk, '\n': u8)) { case let idx: size => yield idx; case void => break; }; let remaining = chunk[newline + 1..]; chunk = chunk[..newline + 1]; for (true) { let sep_idx = 0z; let has_semi = false; for (const char .. chunk) { if (char == ';': u8) { has_semi = true; break; }; fnv.v ^= char; fnv.v *= 1099511628211; sep_idx += 1; }; if (!has_semi) break; const hash = fnv::sum64(&fnv); hash::reset(&fnv); let after = chunk[sep_idx + 1..]; const name = fromutf8_unsafe(chunk[..sep_idx]); const (temp, index) = i64parse(after); chunk = after[index..]; let item = getitem(&map, hash); if (item.count == 0) { static append(names, (hash, name))!; item.min = temp; item.max = temp; } else { if (item.min > temp) item.min = temp; if (item.max < temp) item.max = temp; }; item.sum += temp; item.count += 1; setitem(&map, hash, name, item); }; read_start = len(remaining); buf[..read_start] = remaining[..]; }; sort::sort(names, size((u64, str)), &names_cmp)!; fmt::print("{")!; for (let i = 0z; i < len(names); i += 1) { let (station, item) = map[names[i].0: size & (BUCKETS - 1)]; if (station == "") { i -= 1; continue; }; if (i > 0) fmt::print(", ")!; fmt::printf("\"{}\"={:.1f}/{:.1f}/{:.1f}", station, item.min: f64 / 10.0, item.max: f64 / 10.0, item.sum: f64 / 10.0 / item.count: f64, )!; }; fmt::println("}")!; };