PoML tutorial




This is a tutorial for PoML (Polymorphic ML, pronounced "pommel") which is a programming language (targeting LLVM bitcode) that doesn't quite exist yet.



// This is a single-line comment.



This is a comment block.

//.   These can be nested, too.   .//



// This is a statement. It says that a variable "x" exists with the value of 5. Statements end with whitespace then a period.

x = 5 .

// Variable names are case sensitive for the first character of the name, and case-insensitive for the rest.


// This is a command. It tells the computer to set the value of x to 7. Commands end with a semicolon.

x << 7;



// This is a tuple. It stores a fixed number of things in a single variable.

my_tuple = 1, 2.2, "three" .


// "Show" is a function that converts its input to a string, adds a newline, and prints that to stdout.

Show my_tuple.2;       // prints the third element of the tuple: "three"


// This is a record.

// Some code goes inside the record, and all the newly defined variables at the end become values in it.

my_record = {a = "first" . b = 2 . c = 3 .} .

Show my_record.c;       // prints "3"


// Suppose we want to create an "object" that contains an int and a function that doubles and returns that int.

// We can do that with a function that creates a record:

NewIntObject () =


    a = 1 .

    GetVal () = a << a*2; a .


IntObj = NewIntObject() .

x = IntObj.GetVal() .



// This is a string.

s = "text" .

// Code can be put in a string using []. The result is automatically converted to a string.

x = 10 .

y = 20 .

s = "x is [x] and y is [y]" .

Show s;                // prints "x is 10 and y is 20"


// Special symbols can be escaped with \.

s = "use \\ to escape \" and \[" .


// Strings defined with !" can contain special symbols and matching !""! pairs.

s = !"This string (defined with !"[string]"!) does not require escaping " and does not execute code in []."! .



// This is an array.

my_array = [1; 2; 3; 4; 5] .


// This selects the first element, converts it to a string, and prints that.

Show my_array.[0];       // prints "1"

// The "." is there to clarify that this is indexing my_array, not defining a new array [0].


// Arrays can also be declared without filling them, using a type annotation:

declared_array is int[10] .


// Suppose we want to make a size 100 array, filled with the letter "a".

// We can do that with a function Make in Array:

a_array = Array.Make 100 "a" .

// Array is a record that contains functions for arrays. It's included in the default library.


// array.len gives the length of an array:

length = a_array.len .


// If you want to write "Make" instead of "Array.Make" you can open Array.

// This brings everything in Array into the current namespace.

open Array;

b_array = Make 100 "b" .


// Any record can be opened. Let's open my_record:

open my_record;

Show a;       // prints "first"


// We're done with those records now, so we can close them:

close Array;

close my_record;

// These commands only open or close records within the current lexical scope.



// This is a function that outputs (its input + 1).

AddOne x = x + 1 .

// By convention, functions use CamelCase names, and data uses names_with_underscores.

Show (AddOne 5);       // prints "6"


// Functions are "pass by reference" so this function adds 1 to its input:

Increment x = x << x+1 .



// Maybe we want our function to do something different when the input is a string.

// We can define a different function for string inputs, like this:

maybe AddOne (s is string) = s & " one" .        // & is used to concatenate strings together.


Show (AddOne "number");       // prints "number one"


 // This is an equivalent way of assigning multiple definitions to AddOne:

Increment x = x + 1 .

AppendOne (s is string) = s & " one" .

AddOne = maybe Increment maybe AppendOne .

 // Functions always use the first such "maybe" definition that accepts the input types.



// Like in OCaml, recursive function definitions must start with rec.

// This is an infinite loop that repeatedly adds 1 to its input:

rec IncrementForever x = IncrementForever (x+1) .



// Functions can be chained together using " ' ".

0 ' AddOne ' AddOne ' AddOne ' Show;        // prints "3"



// "()" is nil, which does nothing.

// If you want to call a function that takes no inputs, you use nil as a "fake" input.

ReturnTen () = 10 .

x = ReturnTen .

Show x .        // prints "x is () -> int" which is the type signature of ReturnTen.


Show(ReturnTen());        // prints "10"

nil = () .

x = ReturnTen nil .

Show x .        // also prints "10"





// We can define anonymous functions ("lambdas"), and those can be assigned to variables.

// The following 2 definitions are equivalent:

AddOne x = x + 1 .

AddOne = \x: x+1 .


// Lambda definitions can be ended by a "." matching a definition before they start, a "]" to explicitly end them, or by enclosing them in ().


// \\ is equivalent to \ but must be the first match of a lambda, so \\ can start a nested lambda.


// Constants can also be matched against:

rec factorial =

    \0 \1 : 1

    \n: n * factorial(n-1)



// This construct can also apply different cases to variant types.

// Suppose we have an enum:

type MyEnum = One | Two | Three .

// We can do different things depending on the enum:

PrintMyEnum =

    \One: Show "First";

    \Two: Show "Second";

    \: Show "Third";            // The \: at the end indicates a default option.


PrintMyEnum Two;        // prints "Second"



// As shorthand, when a lambda does not contain ":" and contains some combination of "x", "y", "z" it binds inputs to those letters.

GreaterThanTwo = \x: x > 2] .

GreaterThanTwo = \x > 2] .

// These definitions are equivalent.


// (: body) is shorthand for (\(): body)

b = (: x > 2)

b = (\(): x > 2)

// These expressions are equivalent.


// Fallthrough of cases in a lambda can be done explicitly using ->:

FizzBuzz n = (n mod 3 == 0), (n mod 5 == 0) '

    \(true, _) as input: Show "Fizz"; input ->        // input goes to the first matching case below this one

    \_, true: Show "Buzz";

    \false, false: Show "[n]";


    Show "\n";






// Suppose we want to add one to every element in an array.

// We can do that with Iter (iterate):

Iter input_array \x << x+1] ;        // adds 1 to every element of an array


// If we instead want a new array that's changed, we can use Map:

new_array = Map input_array \x+1] .


// Map and Iter can take a tuple of any number of arrays, if you give them a function that takes multiple inputs:

sum_array = Map (array_1, array_2) \x+y] .

// sum_array is as long as the shortest input array.



// Suppose we want "+" to also add 2 arrays together. We can define that with:

maybe (+) a b = Map (a,b) (+) .

// The (+) makes the operator "+" be treated as a regular function.

// (This overloading for + is included in the standard library.)



// true is a boolean value.

// AllTrue checks if a function returns true for every element in an array or tuple of arrays.

name = "test string" .

banned_strings = ["foo"; "bar"] .

result = banned_strings ' AllTrue \s: name != s] .           // result is true



// To filter an array, we can use Filter with a function that takes an element and returns a bool.

filtered_array = Filter input_array \x>0] .



Filter uses array slices.

An array value contains a pointer and a length.

An array slice uses different pointer and length values to point at part of an array.

So, Filter allocates an array of the same size as the input, and returns a slice that might be shorter.



// You can create array slices as follows:

array_slice = input_array.[start, end] .

// "end" inside an array index is the index of the last element.

// So, this creates a slice of everything except the first and last elements:

slice = input_array.[1, end-1] .


// You may want to copy the filtered array slice into a smaller array, so the larger array can be garbage collected.

// Copy is the universal copy function in PoML.

filtered_array = Copy(Filter input_array \x>0]) .

// Equivalently, that can instead be written as a chain:

filtered_array = input_array ' Filter \x>0] ' Copy .



// Map and Iter can accept a function of an integer as any input except the first and last.
// Those inputs are treated as an array with values of function(index).

output_array = Map (input_array, \x]) \x+y] .

// output_array has the index added to each element of input_array



// Suppose we want a more advanced data structure that acts like an array but allows fast insertions and deletions.

// We can convert a basic array into a dynamic array:

dynamic_array = DyAr [0; 1; 2; 3] .

first = dynamic_array.[0] .

// Why does the same access syntax work for data structures that may be defined in libraries?

// The compiler actually translates that to a special function Dot:

first = Dot dynamic_array (0) .


// How about assignments to that? The compiler does the following series of transforms:

dynamic_array.[0] << 20;

Dot dynamic_array (0) << 20;

Assign dynamic_array (0) 20;

// Because << causes this kind of transformation, << has a unique type and thus cannot be conditionally returned by a function.

// In other words, functions must either never return << or always return it.





// PoML's equivalent of "if" statements has the following structure:



| condition_b:      // optional "else"


| condition_c:      // there can be any number of these


|:                        // optional default case


]       // end


// conditionals can also return values:

b = true .

bool_text = b: "true" |: "false" .


// Here is a factorial function using this:

rec factorial n =

    n==0 || n==1 : 1

    |: n * factorial(n-1)

    .            // The "]" to end the conditional is implied when a definition containing it ends.





// throw goes downward until a matching catch is found.


throw #print_this x;                    // # marks an exception type

Show "This text is skipped.";       // this line is not executed

catch print_this x: Show x;;        // catch statements end with ";;"

// This prints "22".


// goto is like throw, but it goes up instead of down.

// So, this is an infinite loop:

catch 10;;


goto #10;


// The target of jumps cannot have variables in scope that the origin does not.

// If there are any such variables, that's an invalid jump that gives a compiler error.


// Exceptions must be caught, or else the compiler gives an error.

// If you want to crash the program, use fail instead.

fail;            // crashes the program and prints the location of this fail in the source code

fail "error message";        // crashes the program and prints "error message"





// for and while loops are functions defined using goto. The C-style for loop uses this definition:

for n test update body =
    i = n .
    catch loop_start;;
    test i:
        body i;
        update i;
        goto #loop_start;


// This prints numbers from 0 to 9:

for 0 \x<10] (++) \i:     // In C, this would be: for (int i=0; i<10; i++)
    Show i;


// for can also take a single input, and repeat that many times. Here's that definition:

maybe for n body =
    i = n .
    catch loop_start;;
    i != 0:
        goto #loop_start;


// Here is an example of that usage:

x = 0 .

for 10 (:

    x << x + 2;


Show x;         // prints "20"


// Here is an equivalent way of writing that loop:

x = 0 .

body() = x << x + 2 .

for 10 body;

Show x;


// Here is a for loop that prints 99 Bottles of Beer:

ShowLine n = Show "[n] bottles of beer on the wall. Take one down, pass it around,\n" .
for 99 \x>1] (--) ShowLine;
Show "1 bottle of beer on the wall. Take it down, pass it around, no more bottles of beer.\n";



// Here is the while loop definition:

while test body =
    catch loop_start;;
        goto #loop_start;


// Here is a while loop example, which prints 0 to 9:

i = 0 .

while (:i<10) (:

    Show i;




// Here is an equivalent way of writing that loop:

i = 0 .

test() = i<10 .

body() =

    Show i;



while test body;



// throw cannot jump into a named function, but can jump into a lambda or an if clause when no local variables are defined above the catch.

// That makes low-level optimizations such as Duff's device possible:

n = count / 4 .

remainder = count % 4 .

throw [#; #1; #2; #3].[remainder];        // a # does nothing when thrown

for n \:

    f data;

    catch 3;;

    f data;

    catch 2;;

    f data;

    catch 1;;

    f data;


// The above applies f to data for count times, with an unrolled loop.





// Enums with multiple values are defined using "|" like this:

type MyEnum = One | Two | Three .


// If we want to represent a tree of strings, we can use a recursive type:

type StringTree = Node (StringTree[]) | Leaf string .

// That says a StringTree element can either be a Leaf with a string OR a Node with a StringTree array.


// The following lines all define StringTree values.

leaf = Leaf "text" .

node = Node [leaf, leaf, leaf] .

string_tree = ["a"; ["b"; "c"]; "d"] is StringTree .

// Declaring a variant type for the array first checks for a matching StringTree constructor.

// When that fails, it checks for a matching StringTree constructor for each element. This is done recursively while it fails.

// When multiple constructors match, the first constructor in the definition order is used.


// We can process a StringTree using pattern matching:

rec PrintStringTree =

    \Leaf s: Show s;

    \Node n: Iter n PrintStringTree;


// That's a recursive function that prints all the strings in a StringTree.


// Variant types can also be used to create mixed arrays:

type IntOrString = Int int | String string .

mixed_array = [1, 2, 3, "a", "b", "c"] is IntOrString[] .





// Pointers are created with $ and dereferenced with @.

p = $20 .

Show p;            // prints "int ref"

Show @p;         // prints "20"


// PoML uses reference counting for garbage collection: data keeps a counter of how many pointers to it exist.

// Everything having a counter is inefficient, so only specially marked data has a reference counter.

// Only that marked data can have pointers to it copied.



// Suppose you have an array of strings. Each string in a is a reference to a character array.

a = Array.Make 100 "some string" .

// The array is garbage collected when a goes out of scope.


// Now we assign the array pointer to a new variable.

b = a .

// This is not a problem: the array is collected when both a and b are out of scope.


// Next, we assign one to a variable.

s = a.[n] .

// Now s is holding a reference to one of them, preventing garbage collection.


// If s goes out of scope before a or at the same time, then there is still no problem.

// But we might want to collect the rest of a before s.

// So, to prevent memory leaks, that's a compiler error if a goes out of scope before s does.

// To avoid the compiler error, the strings in a must all be marked as reference-counted using a special rc type annotation.

a = Array.Make 100 ("some string" is rc) .

// Now there's no problem with storing strings from a in a variable.

s = a[n] .


// What if we want to use circular references? For example, consider a pair of arrays A and B sometimes containing pointers to each other's elements.

// In that case, we need to contain A and B in a single object containing immutable pointers to both:

root_object = ($A is const, $B is const) is rc .

// That causes A and B to be reference counted and garbage collected together.



See the garbage collection implementation page for more details on this.





// Users can define custom operators, which can be binary or unary.

// Here's a definition for C's "+=" operator.

(+=) x y = x << x+y .


// Unary operators are applied before functions, and cannot have whitespace between them and their variable.

// Here's an increment operator:

(++) x = x << x+1 .

// That's used after a variable, because it doesn't return a value:


// Unary operators that return a value are instead used before the variable:

(<+>) x = x + 1 .

x_plus_1 = <+>x .





// Multiple function arguments are converted into a tuple, so the following 2 function definitions are equivalent:

let f x y z = 2*x*y*z;
let f (x, y, z) = 2*x*y*z;

// Those both have the type signature:

f is (int, int, int) -> int


// Currying of functions is not allowed, because it complicates typechecking and makes code harder to read. Instead, you should define a new function with constants for some parameters.


// Variables have a base type, which indicates their actual representation in a computer, and can also have tags, which indicate what overloaded functions can or should be used for them.

// For example, a string is a byte array that's tagged string. Here's the type definition for strings:

type string = byte[] +string .


// Type definitions can include a list of tags to give the base type.

// Type annotations can use +required_tag to require a tag, -banned_tag to exclude banned_tag, and (+tag1 | +tag2) to work with either tag1 OR tag2.


// const is a special type tag that disallows mutation.

x = 10 is const .     // the value of this x cannot be modified


back to index