nfortune/src/nfortunepkg/datfile.nim

188 lines
6.5 KiB
Nim

## Code related to handling ``fortune-mod`` .dat files (ones genereted by the
## ``strfile`` utility). Includes both deseralizing, serializing, and generation.
##
## This code aims to be compatible with version 2 ``fortune-mod`` dat files.
import streams
import system
import endians
import strformat
type
DatFile* = ref object
## deserialized .dat file
##
## The original ``strfile`` provides options for both randomizing and
## ordering the offsets, and indicates if that was done by setting a flag in
## the datfile. nfortune does not provide functionality for randomizing or
## ordering the offsets, and so will never set the flags.
##
## Original strfile outputs n+1 offsets, where n is the number of strings in
## file. The offsets point to the first byte of a given string, except for
## the last offset, which will point one byte past the end of file.
version*: uint32
stringCount*: uint32 ## total number of fortunes in this file
longestLength*: uint32 ## length of longest fortune
shortestLength*: uint32 ## lenght of shortest fortune
randomized*: bool ## are the offsets randomized
ordered*: bool ## are the offsets ordered
rot*: bool ## is the text rot-13'd
delimiter*: char ## delimiter character used in fortune file, ``%`` by default
offsets*: seq[int32] ## offsets into the fortune file where each fortune begins
DatFileParseError = object of system.ValueError
const
# These correspond to the bool fields in DatFile
StrRandom: uint32 = 0x1
StrOrdered: uint32 = 0x2
StrRotated: uint32 = 0x4
DatfileVersion: uint32 = 2 # corresponds to fortune-mod's
DefaultDelim*: char = '%'
proc read32(input: streams.Stream): uint32
{.raises: [DatFileParseError, OSError, IOError].} =
## Read a big endian int32 from stream. Consumes 4 bytes from ``input``.
var buffer: array[4, char]
if input.readData(addr(buffer), 4) != 4:
raise newException(DatFileParseError, "Failed to parse int32")
# streams assumes machine endianness but datfiles are explicitly big endian,
# so we have to do this memory poking flip
bigEndian32(addr(result), addr(buffer))
proc toBigEndian(input: uint32): array[4, char] =
## Read a 32 bit number and convert it to big endian. Does not type check
## input.
bigEndian32(addr(result), unsafeAddr(input))
proc deserialize*(input: Stream): DatFile
{.raises: [DatFileParseError, OSError, IOError].} =
## Deserialize a dat file in ``input`` into a ``DatFile`` object
result = new DatFile
result.version = read32(input)
if result.version != DatfileVersion:
raise newException(DatFileParseError, "Unrecognized dat file version")
result.stringCount = read32(input)
result.longestLength = read32(input)
result.shortestLength = read32(input)
var flags = read32(input)
result.randomized = cast[bool](flags and StrRandom)
result.ordered = cast[bool](flags and StrOrdered)
result.rot = cast[bool](flags and StrRotated)
result.delimiter = input.readChar()
if result.delimiter == '\x00':
raise newException(DatFileParseError, "Unexpected EOF")
input.setPosition(input.getPosition + 3) # skip padding
# note that the dat file always includes one extra offset past the end of the
# fortune file, not included in stringCount
result.offsets = newSeq[int32](result.stringCount+1)
var buffer: array[4, char]
for i, _ in result.offsets:
if input.readData(addr(buffer), 4) < 4:
raise newException(DatFileParseError, "Failed to prase a string offset")
var currentNum: int32
bigEndian32(addr(currentNum), addr(buffer))
result.offsets[i] = currentNum
proc serialize*(self: DatFile, output: Stream) =
## Serialize ``self`` to ``output`` stream (which should be availble for
## writing). Does not close stream.
output.write(toBigEndian(self.version))
output.write(toBigEndian(self.stringCount))
output.write(toBigEndian(self.longestLength))
output.write(toBigEndian(self.shortestLength))
let flags: uint32 = ( 0 or
(if self.randomized: StrRandom else: 0) or
(if self.ordered: StrOrdered else: 0) or
(if self.rot: StrRotated else: 0) )
output.write(toBigEndian(flags))
output.write(self.delimiter)
for _ in countup(1,3): output.write(0'u8) # 3 bytes of padding
var buffer: array[4, char]
for offset in self.offsets:
bigEndian32(addr(buffer), unsafeAddr(offset))
output.write(buffer)
proc generateFromFile*(input: Stream, delim: char = DefaultDelim): DatFile =
## Generate a DatFile from ``input``, a fortune file.
result = Datfile(
version: DatfileVersion,
longestLength: low(uint32),
shortestLength: high(uint32),
randomized: false,
ordered: false,
rot: false,
delimiter: delim,
offsets: @[0'i32] # first offset is always zero, the start of file
)
var currentOffset: int32 = 0
# delimiters have to be on a line by themselves
let delimString: array[3, char] = ['\n', delim, '\n']
var peeked: array[3, char]
while not input.atEnd():
# we copy directly to a char array here to avoid allocating a lot of strings
let charCount = input.peekData(addr(peeked), 3)
if charCount < 3:
# we're approaching the end and there isn't enough chars left for there to
# be a valid delimiter, so we seek one past the end and bail
currentOffset += cast[int32](charCount)
input.setPosition(cast[int](currentOffset))
break
if peeked == delimString:
# lengths include the final newline, hence the +1
let lastLen: uint32 = cast[uint32](currentOffset -
result.offsets[result.offsets.high()]) + 1
if lastLen < result.shortestLength:
result.shortestLength = lastLen
if lastlen > result.longestLength:
result.longestLength = lastLen
# advancing 3 puts us after the second newline in delim string
currentOffset += 3'i32
result.offsets.add(currentOffset)
else:
currentOffset += 1'i32
input.setPosition(cast[int](currentOffset))
# if the last string is terminated with a delimiter, we'll have added the last
# offset already; if not we'll have to add it now. This is consistent with
# original strfile behavior
if result.offsets[result.offsets.len()-1] == current_offset:
result.stringCount = cast[uint32](result.offsets.len() - 1)
else:
result.stringCount = cast[uint32](result.offsets.len())
result.offsets.add(currentOffset)