package core:encoding/xml

⌘K
Ctrl+K
or
/

    Overview

    An XML 1.0 / 1.1 parser

    Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
    Made available under Odin's BSD-3 license.
    
    A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
    
    List of contributors:
    	Jeroen van Rijn: Initial implementation.
    
    An XML 1.0 / 1.1 parser
    
    Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
    Made available under Odin's BSD-3 license.
    
    This file contains helper functions.
    
    An XML 1.0 / 1.1 parser
    
    Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
    Made available under Odin's BSD-3 license.
    
    A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
    
    List of contributors:
    	Jeroen van Rijn: Initial implementation.
    
    An XML 1.0 / 1.1 parser
    
    Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
    Made available under Odin's BSD-3 license.
    
    A from-scratch XML implementation, loosely modelled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
    
    Features:
    	- Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage.
    	- Simple to understand and use. Small.
    
    Caveats:
    	- We do NOT support HTML in this package, as that may or may not be valid XML.
    	  If it works, great. If it doesn't, that's not considered a bug.
    
    	- We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences.
    	- <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options.
    
    MAYBE:
    - XML writer?
    - Serialize/deserialize Odin types?
    
    List of contributors:
    	Jeroen van Rijn: Initial implementation.
    

    Types

    Attribute ¶

    Attribute :: struct {
    	key: string,
    	val: string,
    }
    Related Procedures With Parameters
    Related Procedures With Returns

    Attributes ¶

    Attributes :: [dynamic]Attribute

    Document ¶

    Document :: struct {
    	elements:        [dynamic]Element,
    	element_count:   u32,
    	prologue:        [dynamic]Attribute,
    	encoding:        Encoding,
    	doctype:         struct {
    		// 			We only scan the 
    		ident: string,
    		rest:  string,
    	},
    	// 		If we encounter comments before the root node, and the option to intern comments is given, this is where they'll live.
    	// 		Otherwise they'll be in the element tree.
    	comments:        [dynamic]string,
    	// 		Internal
    	tokenizer:       ^Tokenizer,
    	allocator:       runtime.Allocator,
    	// 		Input. Either the original buffer, or a copy if `.Input_May_Be_Modified` isn't specified.
    	input:           []u8,
    	strings_to_free: [dynamic]string,
    }
    Related Procedures With Parameters
    Related Procedures With Returns

    Element ¶

    Element :: struct {
    	ident:   string,
    	value:   [dynamic]Value,
    	attribs: [dynamic]Attribute,
    	kind:    enum int {
    		Element = 0, 
    		Comment, 
    	},
    	parent:  u32,
    }

    Element_ID ¶

    Element_ID :: u32

    Encoding ¶

    Encoding :: enum int {
    	Unknown, 
    	UTF_8, 
    	ISO_8859_1, 
    	// 		Aliases
    	LATIN_1    = 2, 
    }

    Error ¶

    Error :: enum int {
    	// 		General return values.
    	None                          = 0, 
    	General_Error, 
    	Unexpected_Token, 
    	Invalid_Token, 
    	// 		Couldn't find, open or read file.
    	File_Error, 
    	// 		File too short.
    	Premature_EOF, 
    	// 		XML-specific errors.
    	No_Prolog, 
    	Invalid_Prolog, 
    	Too_Many_Prologs, 
    	No_DocType, 
    	Too_Many_DocTypes, 
    	DocType_Must_Preceed_Elements, 
    	// 		If a DOCTYPE is present _or_ the caller
    	// 		asked for a specific DOCTYPE and the DOCTYPE
    	// 		and root tag don't match, we return `.Invalid_DocType`.
    	Invalid_DocType, 
    	Invalid_Tag_Value, 
    	Mismatched_Closing_Tag, 
    	Unclosed_Comment, 
    	Comment_Before_Root_Element, 
    	Invalid_Sequence_In_Comment, 
    	Unsupported_Version, 
    	Unsupported_Encoding, 
    	// 		
    	Unhandled_Bang, 
    	Duplicate_Attribute, 
    	Conflicting_Options, 
    }
    Related Procedures With Returns

    Error_Handler ¶

    Error_Handler :: proc(pos: Pos, fmt: string, args: ..any)
    Related Procedures With Parameters

    Option_Flag ¶

    Option_Flag :: enum int {
    	// 		If the caller says that input may be modified, we can perform in-situ parsing.
    	// 		If this flag isn't provided, the XML parser first duplicates the input so that it can.
    	Input_May_Be_Modified, 
    	// 		Document MUST start with `
    	Must_Have_Prolog, 
    	// 		Document MUST have a `
    	Must_Have_DocType, 
    	// 		By default we skip comments. Use this option to intern a comment on a parented Element.
    	Intern_Comments, 
    	// 		How to handle unsupported parts of the specification, like 
    	Error_on_Unsupported, 
    	Ignore_Unsupported, 
    	// 		By default CDATA tags are passed-through as-is.
    	// 		This option unwraps them when encountered.
    	Unbox_CDATA, 
    	// 		By default SGML entities like `>`, ` ` and ` ` are passed-through as-is.
    	// 		This option decodes them when encountered.
    	Decode_SGML_Entities, 
    	// 		If a tag body has a comment, it will be stripped unless this option is given.
    	Keep_Tag_Body_Comments, 
    }

    Option_Flags ¶

    Option_Flags :: bit_set[Option_Flag; u16]

    Options ¶

    Options :: struct {
    	flags:            bit_set[Option_Flag; u16],
    	expected_doctype: string,
    }
    Related Procedures With Parameters

    Pos ¶

    Pos :: struct {
    	file:   string,
    	offset: int,
    	// starting at 0
    	line:   int,
    	// starting at 1
    	column: int,
    }
    Related Procedures With Parameters

    Token ¶

    Token :: struct {
    	kind: Token_Kind,
    	text: string,
    	pos:  Pos,
    }
    Related Procedures With Returns

    Token_Kind ¶

    Token_Kind :: enum int {
    	Invalid, 
    	Ident, 
    	Literal, 
    	Rune, 
    	String, 
    	Double_Quote,  // "
    	Single_Quote,  // '
    	Colon,         // :
    	Eq,            // =
    	Lt,            // <
    	Gt,            // >
    	Exclaim,       // !
    	Question,      // ?
    	Hash,          // #
    	Slash,         // /
    	Dash,          // -
    	Open_Bracket,  // [
    	Close_Bracket, // ]
    	EOF, 
    }
    Related Procedures With Parameters

    Tokenizer ¶

    Tokenizer :: struct {
    	// Immutable data
    	path:        string,
    	src:         string,
    	err:         Error_Handler,
    	// Tokenizing state
    	ch:          rune,
    	offset:      int,
    	read_offset: int,
    	line_offset: int,
    	line_count:  int,
    	// Mutable data
    	error_count: int,
    }
    Related Procedures With Parameters

    Value ¶

    Value :: union {
    	string, 
    	u32, 
    }

    Constants

    CDATA_END ¶

    CDATA_END :: "]]>"

    CDATA_START ¶

    CDATA_START :: "
    

    COMMENT_END ¶

    COMMENT_END :: "-->"

    COMMENT_START ¶

    COMMENT_START :: "