commit 737d6383066f71e032d9ae30a8741165ff21b435
parent 8ff1c82b5235653c8a56a44dedc494c6d8eec618
Author: dwrz <dwrz@dwrz.net>
Date: Fri, 6 Nov 2020 16:18:48 +0000
Add x86-assembly
Diffstat:
11 files changed, 632 insertions(+), 0 deletions(-)
diff --git a/x86-assembly/slides.org b/x86-assembly/slides.org
@@ -0,0 +1,391 @@
+* 1. x86 Assembly
+
+- David Wen Riccardi-Zhu
+- Senior Software Engineer @ Good Uncle
+- dwrz@dwrz.net
+
+Slides and code: https://github.com/dwrz/talks/x86-assembly.
+
+* 2. Overview
+
+1. Follow a trail of questions... into the depths of the machine.
+2. Peel away some abstractions on the way.
+3. Gloss over a bunch of stuff (we have an hour, not a semester).
+4. Surface (hopefully) with a better understanding of how computers work.
+
+* 3. What does this program do?
+
+#+begin_src bash :results raw
+node -e "console.log('Hello, World');"
+#+end_src
+
+* 4. How does this program /work/?
+
+#+begin_src bash :results raw
+node -e "console.log('Hello, World');"
+#+end_src
+
+How does "Hello World" get onto the terminal?
+
+* 5. What does this program do?
+
+#+begin_src bash
+node -e ""
+#+end_src
+
+Does it do /anything/?
+
+* 6. It does /something/.
+
+We can tell, because it takes a few milliseconds to execute.
+
+#+begin_src bash :results raw
+time node -e "console.log('Hello, World');"
+#+end_src
+
+#+begin_src bash :results raw
+time node -e ""
+#+end_src
+
+* 7. It /definitely/ does /something/.
+
+We can tell, because it's making system calls.
+
+#+begin_src bash :results raw
+strace node -e ""
+#+end_src
+
+Lot of noise for a program that does "nothing"...
+
+#+begin_src bash :results raw
+strace node -e "console.log('Hello, World');"
+#+end_src
+
+* 8. JavaScript
+We've actually started with something difficult.
+
+JavaScript is an interpreted language.
+
+#+begin_src js
+const myProgram = '0 + 1'.replace('+', '-');
+
+const result = eval(myProgram);
+
+console.log(result); // -1
+#+end_src
+
+#+RESULTS:
+
+* 9. C
+Let's use a simpler example.
+
+C is compiled.
+
+We create our own program, rather than use a program that interprets strings.
+
+#+begin_src C :results raw
+#include <stdio.h>
+
+int main(void) {
+ printf("Hello, World\n");
+}
+#+end_src
+
+* 10. Compile and Execute
+
+#+begin_src bash
+gcc hello-world.c -o hello-world
+#+end_src
+
+#+begin_src bash
+./hello-world
+#+end_src
+
+#+begin_src bash
+strace ./hello-world
+#+end_src
+
+* 11. write
+This seems familiar:
+#+begin_src C
+write(1, "Hello, World\n", 13Hello, World
+) = 13
+#+end_src
+
+It's in our node program, too:
+#+begin_src bash
+strace node -e "console.log('Hello, World');" 2>&1 | grep "Hello"
+#+end_src
+
+/What is this?/
+What is ~write~?
+What is 1?
+What is 13?
+
+* 12. System Calls
+~strace~ shows us system calls. ~write~ is a system call.
+
+What is a system call?
+
+#+begin_src bash
+man syscalls
+#+end_src
+
+#+begin_quote
+The system call is the fundamental interface between an application and the Linux kernel.
+#+end_quote
+
+* 13. write
+
+#+begin_src bash
+man 2 write
+#+end_src
+
+#+begin_src C
+write(int fd, const void *buf, size_t count);
+#+end_src
+
+#+begin_quote
+write() writes up to count bytes from the buffer starting at buf to the file referred to by the file descriptor fd.
+#+end_quote
+
+* 14. write
+
+#+begin_src text
+write(1, "Hello, World\n", 13Hello, World
+) = 13
+#+end_src
+
+File Descriptor 1 = Standard Out (inherited from terminal process)
+Hello World = Buffer
+Count = 13 bytes
+
+|---+---+---+---+---+---+---+---+---+---+---+---+----|
+| H | e | l | l | o | , | | W | o | r | l | d | \n |
+|---+---+---+---+---+---+---+---+---+---+---+---+----|
+| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | 1 | 2 | 3 |
+|---+---+---+---+---+---+---+---+---+---+---+---+----|
+
+* 15. Back to Nothing
+#+begin_src C
+int main(void) {}
+#+end_src
+
+#+begin_src bash
+gcc exit.c -o exit
+#+end_src
+
+#+begin_src bash
+strace ./exit
+#+end_src
+
+* 16. Exit Code
+In C, the return type prefixes the function.
+
+~main~ returns an ~int~; the default is zero (indicating no error).
+
+#+begin_src C
+int main(void) {
+ return 1;
+}
+#+end_src
+
+#+begin_src C
+exit_group(1)
+#+end_src
+
+* 17. exit_group
+#+begin_src bash
+man 2 exit_group
+#+end_src
+
+#+begin_src C
+void exit_group(int status);
+#+end_src
+
+* 18. Exit
+
+[[file:src/exit/exit.s:]]
+
+* 19. Assemble, Link, Execute, Trace
+#+begin_src bash
+as exit.s -o exit.o
+
+ld exit.o -o exit
+
+./exit
+
+strace ./exit
+#+end_src
+
+* 20. x86 Assembly
+- Human readable form of machine code.
+- 1-to-1 mapping between one assembly instruction and one CPU instruction.
+- Hardware specific: e.g., x86 Assembly differs from ARM Assembly.
+- Often OS specific --> Linux System Calls != BSD, Mac, Windows system calls.
+- Different syntax formats: ATT, Intel.
+- What instructions? Need to consult hardware manual.
+ - [[https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html][Intel x86 Developer Manual]] is ~5,000 pages long, plus errata.
+
+* 21. Use Cases
+- Low-level programming (micro-controllers, operating systems)
+- Resource Constrained Hardware
+ - [[https://github.com/chrislgarry/Apollo-11][Apollo 11 Guidance Computer Assembly]]
+ - [[https://github.com/pret/pokered][Pokemon Red/Blue Assembly]]
+- Performance
+- Control
+- Reverse Engineering
+
+* 22. Instructions
+- Describe an operation the CPU should perform, e.g.:
+ - Move data in and out of registers
+ - Modify register contents
+ - Modify stack
+ - Control program flow
+- Represented by numbers (opcodes).
+
+* 23. Instruction Cycle
+- On every tick of its internal clock, the CPU:
+ - *Fetches* the next instruction.
+ - *Decodes* it (what operation, on what operands).
+ - *Executes* the instruction.
+ - Increments the instruction pointer.
+
+* 24. Registers
+- Storage on the CPU (fastest storage).
+- Act as a scratchpad -- temporary variables.
+- General Purpose Registers
+ - RAX, RBX, RCX
+ - RSP, RBP (stack pointer, stack frame pointer)
+- Special Purpose Registers
+ - RIP (Instruction Pointer)
+ - RFLAGS (negative, zero, etc.)
+- It's possible to use just a portion of the register.
+#+begin_src text
+|__64__|__56__|__48__|__40__|__32__|__24__|__16__|__8___|
+|__________________________RAX__________________________|
+|xxxxxxxxxxxxxxxxxxxxxxxxxxx|____________EAX____________|
+|xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx|_____AX______|
+|xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx|__AH__|__AL__|
+#+end_src
+
+* 25. Exit++
+
+[[file:src/math/math.s]]
+
+* 26. Sections
+What happens when we run a program? A few things...
+
+One of them: the kernel loads the executable into memory.
+
+Assembly sections refer to executable's memory layout:
+
+|-----------|
+| TEXT | --> Code (instructions)
+| RODATA | --> const str = "Hello, World";
+| DATA | --> var str = "Hello, World";
+| BSS | --> var str;
+| ↓ HEAP ↓ |
+| |
+| ↑ STACK ↑ |
+|-----------|
+
+* 27. Hello World
+
+[[file:src/hello-world/hello-world.s]]
+
+* 28. Control Flow
+Programs are either sequential, looping, or branching.
+
+- CPU sets FLAGS register after instruction: e.g., result is zero, negative.
+- Jump to code based on the state of FLAGS.
+- Jump changes RIP (instruction pointer).
+
+[[file:src/control-flow/control-flow.s]]
+
+* 29. Stack
+RSP register points to the top of the stack.
+RBP register (typically) points to the (current) base of the stack.
+Together, they form a stack frame.
+
+Instructions:
+- ~push~ :: decrements RSP, moves bytes onto stack.
+- ~pop~ :: increments RSP, moves stack bytes into register.
+
+[[file:src/stack/stack.s]]
+
+* 30. Functions
+Why do we use functions? Same reasons apply in Assembly:
+- Reuse
+- Organization
+- Abstraction
+- Splitting work
+
+Problems:
+- How to pass arguments?
+ - Registers -- which ones?
+ - Stack -- what order?
+- Whose job is it to preserve or clean up registers? Caller? Callee?
+ - E.g., caller saves a value in %rbx to use after function returns.
+ - Callee uses %rbx and overwrites that value.
+- How to pass return value(s)?
+
+* 31. Convention
+Which side of the street should we drive on?
+Either way works, both are used in practice.
+What matters is agreement on an approach.
+
+System V AMD64 ABI is calling convention for Unix x64 systems:
+- Some registers must be saved by the caller, so callee can use them.
+- Some registers must be saved by callee, if the plan to use them later.
+- Some registers used to pass arguments.
+- Stack used to pass extra or large arguments.
+- RAX and RDX are used for return values.
+
+* 32. Stack Arguments
+
+[[file:src/func/func.s]]
+
+Each row is 8 bytes (64 bits).
+|----------------+-----------+----------------|
+| Address | Data | Stack Pointers |
+|----------------+-----------+----------------|
+| 0x7fffffffe8f8 | | |
+| 0x7fffffffe900 | 0x0 (rbp) | |
+| 0x7fffffffe908 | 0x401002 | |
+| 0x7fffffffe910 | 3 | ←rsp |
+|----------------+-----------+----------------|
+←rbp
+
+* 33. Safety and Security
+
+[[file:src/safety/safety.s]]
+
+* 34. Review
+Where we started:
+
+#+begin_src bash
+node -e ""
+#+end_src
+
+- CPU processes instructions
+- Uses registers and memory (stack)
+- Control flow with jump instruction and flags register
+- Functions
+- System Calls
+- Comparison with Compiled and Interpreted Languages
+- Tradeoffs
+
+* 35. Conclusion
+- Appreciation for higher level, and work done to get us here.
+- Insight into how computers work.
+- A platform to better understand things like functions, closures, APIs, pass by reference and pass by value, performance.
+
+* 36. References / Further Reading
+
+- Davy Wybiral, [[https://www.youtube.com/playlist?list=PLmxT2pVYo5LB5EzTPZGfFN0c2GDiSXgQe][Intro to x86 Assembly Language]]
+- Jennifer Rexford, [[https://www.cs.princeton.edu/courses/archive/fall05/cos217/][Princeton COS 217: Introduction to Programming Systems]]
+- [[https://en.wikipedia.org/wiki/Objdump][objdump]]
+- [[https://en.wikipedia.org/wiki/Structured_program_theorem][Structured Program Theorem]]
+- [[https://www.gnu.org/software/gdb/][GDB]]
+- [[https://www.youtube.com/watch?v=tpIctyqH29Q&list=PL8dPuuaLjXtNlUrzyH5r6jN9ulIgZBpdo][Crash Course: Computer Science]]
diff --git a/x86-assembly/src/control-flow/control-flow.s b/x86-assembly/src/control-flow/control-flow.s
@@ -0,0 +1,23 @@
+.data
+stop:
+ .int 3
+
+.text
+ .global _start
+
+_start:
+ mov $0, %rcx
+loop:
+ cmp stop, %rcx
+ # If equal, change RIP to exit.
+ je exit
+ inc %rcx
+ # Unconditionnaly change RIP to loop.
+ jmp loop
+exit:
+ # Try this: comment out the following three instructions.
+ # What happens if you assemble and link? Why?
+ # Think about RIP and the Fetch-Decode-Execute cycle.
+ mov $231, %rax
+ mov %rcx, %rdi # Exit code -- let's see the value of %rcx.
+ syscall
diff --git a/x86-assembly/src/exit/exit.c b/x86-assembly/src/exit/exit.c
@@ -0,0 +1,3 @@
+int main(void) {
+ // return 1;
+}
diff --git a/x86-assembly/src/exit/exit.s b/x86-assembly/src/exit/exit.s
@@ -0,0 +1,12 @@
+.text
+ .global _start
+
+_start:
+ # Store system call 1 in the EAX register.
+ mov $1, %eax
+
+ # Store the exit code 0 in the EBX register.
+ mov $0, %ebx
+
+ # Call interrupt handler 0x80 (128) -- on Linux, this is the kernel.
+ int $0x80
diff --git a/x86-assembly/src/func/func.s b/x86-assembly/src/func/func.s
@@ -0,0 +1,31 @@
+.text
+ .global _start
+
+_start:
+ # Store 3 on the stack.
+ push $3
+ # call stores return address on the stack.
+ call square
+ # Use the return value, in %rax, as our exit status number.
+ mov %rax, %rdi
+ mov $231, %rax
+ syscall
+
+# square expects an integer on the stack, 16 bytes down from RBP.
+# The squared value is returned on %rax.
+square:
+ # Function Epilogue --> {
+ push %rbp
+ mov %rsp, %rbp
+
+ # Retrieve the parameter from the stack.
+ # 8 bytes down from RSP is the return address from square.
+ # 8 bytes down from that is the parameter to multiple.
+ mov 16(%rbp), %rax
+ mul %rax
+
+ # Function Prologue --> }
+ mov %rbp, %rsp
+ pop %rbp
+ # ret pops and return to address on stack.
+ ret
diff --git a/x86-assembly/src/hello-world/hello-world.c b/x86-assembly/src/hello-world/hello-world.c
@@ -0,0 +1,5 @@
+#include <stdio.h>
+
+int main(void) {
+ printf("Hello, World\n");
+}
diff --git a/x86-assembly/src/hello-world/hello-world.s b/x86-assembly/src/hello-world/hello-world.s
@@ -0,0 +1,17 @@
+.data
+str:
+ .ascii "Hello, World\n"
+
+.text
+ .global _start
+
+_start:
+ mov $1, %rax # 64-bit system call number for sys_write.
+ mov $1, %rdi # stdout.
+ mov $str, %rsi # Address of the buffer.
+ mov $13, %rdx # Number of bytes to write.
+ syscall
+
+ mov $231, %rax # 64-bit system call number for exit_group.
+ mov $0, %rdi # Exit code.
+ syscall
diff --git a/x86-assembly/src/main.go b/x86-assembly/src/main.go
@@ -0,0 +1,23 @@
+package main
+
+import (
+ "fmt"
+ "strings"
+ "time"
+)
+
+func main() {
+ now := time.Now()
+ for i := 0; i < 1000; i++ {
+ fmt.Println(i)
+ }
+ fmt.Printf("took %v\n", time.Since(now))
+
+ now = time.Now()
+ var str strings.Builder
+ for i := 0; i < 1000; i++ {
+ str.WriteString(fmt.Sprintf("%d\n", i))
+ }
+ fmt.Println(str.String())
+ fmt.Printf("took %v\n", time.Since(now))
+}
diff --git a/x86-assembly/src/math/math.s b/x86-assembly/src/math/math.s
@@ -0,0 +1,32 @@
+.text
+ .global _start
+
+_start:
+ mov $0, %rax # rax = 0
+ inc %rax # rax++
+ dec %rax # rax--
+ add $7, %rax # rax += 7
+ sub $5, %rax # rax -= 5
+
+ # At this point, rax = 2.
+ # Multiplication cannot use immediate addressing.
+ # So we store the multiplier in %rbx.
+ # mul assumes the multiplicand is in %rax.
+ # The product is also stored in %rax.
+ mov $12, %rbx
+ mul %rbx # rax *= 12
+
+ # rax = 24.
+ # Division also cannot use immediate addressing.
+ # Store the divisor in %rbx.
+ # div assumes the dividend is in %rax.
+ # The quotient is also stored in %rax.
+ mov $3, %rbx
+ div %rbx
+
+ # rax = 8
+ # Let's use this as our exit code.
+ # We're using 64-bits now -- the exit code should be in %rdi.
+ mov %rax, %rdi
+ mov $231, %rax
+ syscall
diff --git a/x86-assembly/src/safety/safety.s b/x86-assembly/src/safety/safety.s
@@ -0,0 +1,47 @@
+.data
+romeo:
+ .ascii "What's in a name? That which we call a rose,\n"
+juliet:
+ .ascii "By any other name would smell as sweet.\n"
+ .ascii "\n"
+stop:
+ # Try changing this number. What happens? Why?
+ # Hint: try 0, 1, 11, 12, 21, 22.
+ .int 11
+
+.text
+ .global _start
+
+_start:
+ # Print the string before it's messed up.
+ mov $1, %rax # sys_write.
+ mov $1, %rdi # stdout
+ mov $romeo, %rsi # address
+ mov $86, %rdx # count bytes
+ syscall
+
+ # Setup.
+ lea romeo, %rax # Load the starting address of string into %rax.
+ mov $0xb98c9ff0, %ebx # 🌹 = b98c9ff0 (4 bytes)
+ mov $0, %rcx # Set the intial counter.
+
+ # Overwrite the strings with 🌹.
+ # Increment the address in %rax by four bytes on each iteration.
+loop:
+ mov %ebx, (%rax) # Copy
+ inc %rcx # increment our counter
+ add $4, %rax # Try changing this to sub. What happens? Why?
+ cmp stop, %rcx
+ jne loop
+
+ # Print the string after it's been messed up.
+ # NB: nothing stopped us from overwriting past the "romeo" string.
+ mov $1, %rax # sys_write
+ mov $1, %rdi # stdout
+ mov $romeo, %rsi # address
+ mov $85, %rdx # count bytes
+ syscall
+
+ mov $231, %rax # 64-bit system call number for exit_group.
+ mov $0, %rdi # Exit code.
+ syscall # Call the kernel.
diff --git a/x86-assembly/src/stack/stack.s b/x86-assembly/src/stack/stack.s
@@ -0,0 +1,48 @@
+.text
+ .global _start
+
+_start:
+ # Push some 64-bit integers onto the stack.
+ push $3
+ push $2
+ push $1
+
+ # Let's print "Hey\n", using the stack.
+ # man ascii
+ # |---------+----+-----+-----+-----|
+ # | ASCII | H | e | y | \n |
+ # |---------+----+-----+-----+-----|
+ # | Decimal | 10 | 171 | 145 | 110 |
+ # |---------+----+-----+-----+-----|
+ # | Hex | 48 | 65 | 79 | 0A |
+ # |---------+----+-----+-----+-----|
+
+ # Remember that the stack is backwards:
+ # it starts at high memory addresses, and "grows" down.
+ # However, the write system call follows a low-to-high address order.
+ # This means we need to reverse the order of our string.
+ # <-- high low -->
+ # \n y e H
+ push $0x000000000A796548
+
+ # %rsp points to the top of the stack.
+ # (Where 'H' is -- a lower address).
+ # We can pass this address to the kernel.
+ # write will start at 'H', and work up to '\n'.
+ mov $1, %rax # write
+ mov $1, %rdi # stdout
+ lea (%rsp), %rsi # address of buffer
+ # Try changing the number of bytes to 9, 17, 25.
+ # Then call the program with strace.
+ # You'll be able to see the previous numbers we pushed onto the stack.
+ mov $4, %rdx # number of bytes
+ syscall
+
+ # Remember, the stack is LIFO.
+ pop %rdi # pop the "Hey\n" string
+ pop %rdi # pop 1
+ pop %rdi # pop 2
+
+ # We'll use the last popped value in %rdi as our exit code.
+ mov $231, %rax
+ syscall