188 lines
6.5 KiB
Nim
188 lines
6.5 KiB
Nim
## Code related to handling ``fortune-mod`` .dat files (ones genereted by the
|
|
## ``strfile`` utility). Includes both deseralizing, serializing, and generation.
|
|
##
|
|
## This code aims to be compatible with version 2 ``fortune-mod`` dat files.
|
|
|
|
import streams
|
|
import system
|
|
import endians
|
|
import strformat
|
|
|
|
type
|
|
DatFile* = ref object
|
|
## deserialized .dat file
|
|
##
|
|
## The original ``strfile`` provides options for both randomizing and
|
|
## ordering the offsets, and indicates if that was done by setting a flag in
|
|
## the datfile. nfortune does not provide functionality for randomizing or
|
|
## ordering the offsets, and so will never set the flags.
|
|
##
|
|
## Original strfile outputs n+1 offsets, where n is the number of strings in
|
|
## file. The offsets point to the first byte of a given string, except for
|
|
## the last offset, which will point one byte past the end of file.
|
|
|
|
version*: uint32
|
|
stringCount*: uint32 ## total number of fortunes in this file
|
|
longestLength*: uint32 ## length of longest fortune
|
|
shortestLength*: uint32 ## lenght of shortest fortune
|
|
randomized*: bool ## are the offsets randomized
|
|
ordered*: bool ## are the offsets ordered
|
|
rot*: bool ## is the text rot-13'd
|
|
delimiter*: char ## delimiter character used in fortune file, ``%`` by default
|
|
offsets*: seq[int32] ## offsets into the fortune file where each fortune begins
|
|
|
|
DatFileParseError = object of system.ValueError
|
|
|
|
|
|
const
|
|
# These correspond to the bool fields in DatFile
|
|
StrRandom: uint32 = 0x1
|
|
StrOrdered: uint32 = 0x2
|
|
StrRotated: uint32 = 0x4
|
|
DatfileVersion: uint32 = 2 # corresponds to fortune-mod's
|
|
DefaultDelim*: char = '%'
|
|
|
|
|
|
proc read32(input: streams.Stream): uint32
|
|
{.raises: [DatFileParseError, OSError, IOError].} =
|
|
## Read a big endian int32 from stream. Consumes 4 bytes from ``input``.
|
|
var buffer: array[4, char]
|
|
if input.readData(addr(buffer), 4) != 4:
|
|
raise newException(DatFileParseError, "Failed to parse int32")
|
|
|
|
# streams assumes machine endianness but datfiles are explicitly big endian,
|
|
# so we have to do this memory poking flip
|
|
bigEndian32(addr(result), addr(buffer))
|
|
|
|
proc toBigEndian(input: uint32): array[4, char] =
|
|
## Read a 32 bit number and convert it to big endian. Does not type check
|
|
## input.
|
|
|
|
bigEndian32(addr(result), unsafeAddr(input))
|
|
|
|
proc deserialize*(input: Stream): DatFile
|
|
{.raises: [DatFileParseError, OSError, IOError].} =
|
|
## Deserialize a dat file in ``input`` into a ``DatFile`` object
|
|
|
|
result = new DatFile
|
|
|
|
result.version = read32(input)
|
|
|
|
if result.version != DatfileVersion:
|
|
raise newException(DatFileParseError, "Unrecognized dat file version")
|
|
|
|
result.stringCount = read32(input)
|
|
result.longestLength = read32(input)
|
|
result.shortestLength = read32(input)
|
|
|
|
var flags = read32(input)
|
|
result.randomized = cast[bool](flags and StrRandom)
|
|
result.ordered = cast[bool](flags and StrOrdered)
|
|
result.rot = cast[bool](flags and StrRotated)
|
|
result.delimiter = input.readChar()
|
|
|
|
|
|
if result.delimiter == '\x00':
|
|
raise newException(DatFileParseError, "Unexpected EOF")
|
|
|
|
input.setPosition(input.getPosition + 3) # skip padding
|
|
|
|
# note that the dat file always includes one extra offset past the end of the
|
|
# fortune file, not included in stringCount
|
|
result.offsets = newSeq[int32](result.stringCount+1)
|
|
var buffer: array[4, char]
|
|
for i, _ in result.offsets:
|
|
if input.readData(addr(buffer), 4) < 4:
|
|
raise newException(DatFileParseError, "Failed to prase a string offset")
|
|
|
|
var currentNum: int32
|
|
bigEndian32(addr(currentNum), addr(buffer))
|
|
|
|
result.offsets[i] = currentNum
|
|
|
|
proc serialize*(self: DatFile, output: Stream) =
|
|
## Serialize ``self`` to ``output`` stream (which should be availble for
|
|
## writing). Does not close stream.
|
|
|
|
output.write(toBigEndian(self.version))
|
|
output.write(toBigEndian(self.stringCount))
|
|
output.write(toBigEndian(self.longestLength))
|
|
output.write(toBigEndian(self.shortestLength))
|
|
|
|
|
|
let flags: uint32 = ( 0 or
|
|
(if self.randomized: StrRandom else: 0) or
|
|
(if self.ordered: StrOrdered else: 0) or
|
|
(if self.rot: StrRotated else: 0) )
|
|
|
|
output.write(toBigEndian(flags))
|
|
output.write(self.delimiter)
|
|
|
|
for _ in countup(1,3): output.write(0'u8) # 3 bytes of padding
|
|
|
|
var buffer: array[4, char]
|
|
for offset in self.offsets:
|
|
|
|
bigEndian32(addr(buffer), unsafeAddr(offset))
|
|
|
|
output.write(buffer)
|
|
|
|
proc generateFromFile*(input: Stream, delim: char = DefaultDelim): DatFile =
|
|
## Generate a DatFile from ``input``, a fortune file.
|
|
|
|
result = Datfile(
|
|
version: DatfileVersion,
|
|
longestLength: low(uint32),
|
|
shortestLength: high(uint32),
|
|
randomized: false,
|
|
ordered: false,
|
|
rot: false,
|
|
delimiter: delim,
|
|
offsets: @[0'i32] # first offset is always zero, the start of file
|
|
)
|
|
|
|
var currentOffset: int32 = 0
|
|
|
|
# delimiters have to be on a line by themselves
|
|
let delimString: array[3, char] = ['\n', delim, '\n']
|
|
|
|
var peeked: array[3, char]
|
|
|
|
while not input.atEnd():
|
|
# we copy directly to a char array here to avoid allocating a lot of strings
|
|
let charCount = input.peekData(addr(peeked), 3)
|
|
|
|
if charCount < 3:
|
|
# we're approaching the end and there isn't enough chars left for there to
|
|
# be a valid delimiter, so we seek one past the end and bail
|
|
currentOffset += cast[int32](charCount)
|
|
input.setPosition(cast[int](currentOffset))
|
|
break
|
|
|
|
if peeked == delimString:
|
|
# lengths include the final newline, hence the +1
|
|
let lastLen: uint32 = cast[uint32](currentOffset -
|
|
result.offsets[result.offsets.high()]) + 1
|
|
|
|
if lastLen < result.shortestLength:
|
|
result.shortestLength = lastLen
|
|
if lastlen > result.longestLength:
|
|
result.longestLength = lastLen
|
|
|
|
# advancing 3 puts us after the second newline in delim string
|
|
currentOffset += 3'i32
|
|
result.offsets.add(currentOffset)
|
|
else:
|
|
currentOffset += 1'i32
|
|
|
|
input.setPosition(cast[int](currentOffset))
|
|
|
|
# if the last string is terminated with a delimiter, we'll have added the last
|
|
# offset already; if not we'll have to add it now. This is consistent with
|
|
# original strfile behavior
|
|
if result.offsets[result.offsets.len()-1] == current_offset:
|
|
result.stringCount = cast[uint32](result.offsets.len() - 1)
|
|
else:
|
|
result.stringCount = cast[uint32](result.offsets.len())
|
|
result.offsets.add(currentOffset)
|
|
|