Ahmad Yoosofan
Compiler course
University of Kashan
# install python (already installed in most Linux) # install pip3 # sudo apt-get install python3-pip # for Debian and Ubuntu pip3 install sly # or just copy sly files in the same folder of the project
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 5 tokens = {NUMBER, PLUS} 6 ignore = ' \t' 7 8 PLUS = r'\+' 9 NUMBER = r'[0-9]+' #\d+ 10 11 if __name__ == '__main__': 12 data = '3 + 42 +8' 13 lexer = CalcLexer() 14 for tok in lexer.tokenize(data): 15 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 222.plus.py 2 3 type='NUMBER', value='3' 4 type='PLUS', value='+' 5 type='NUMBER', value='42' 6 type='PLUS', value='+' 7 type='NUMBER', value='8'
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 5 tokens = {NUMBER, PLUS} 6 ignore = ' \t' 7 8 PLUS = r'\+' 9 NUMBER = r'[0-9]+' #\d+ 10 11 def error(self, t): 12 print("Illegal character '%s'" % t.value[0]) 13 self.index += 1 14 15 if __name__ == '__main__': 16 data = '3 + 42 +8.43+456' 17 lexer = CalcLexer() 18 for tok in lexer.tokenize(data): 19 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 233.error.py 2 3 type='NUMBER', value='3' 4 type='PLUS', value='+' 5 type='NUMBER', value='42' 6 type='PLUS', value='+' 7 type='NUMBER', value='8' 8 Illegal character '.' 9 type='NUMBER', value='43' 10 type='PLUS', value='+' 11 type='NUMBER', value='456'
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 5 tokens = {NUMBER, PLUS} 6 ignore = ' \t' 7 8 PLUS = r'\+' 9 NUMBER = r'\d+' #[0-9]+ 10 11 def NUMBER(self, t): 12 t.value = int(t.value) 13 print('number:', t.value) 14 return t 15 16 def error(self, t): 17 print("Illegal character '%s'" % t.value[0]) 18 self.index += 1 19 20 if __name__ == '__main__': 21 data = '3 + 42 +8' 22 lexer = CalcLexer() 23 for tok in lexer.tokenize(data): 24 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 244.plus.py 2 3 number: 3 4 type='NUMBER', value=3 5 type='PLUS', value='+' 6 number: 42 7 type='NUMBER', value=42 8 type='PLUS', value='+' 9 number: 8 10 type='NUMBER', value=8
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 5 tokens = {NUMBER, PLUS} 6 ignore = ' \t' 7 8 PLUS = r'\+' 9 NUMBER = r'\d+' #[0-9]+ 10 11 def NUMBER(self, t): 12 t.value = int(t.value) 13 return t 14 15 def error(self, t): 16 print("Illegal character '%s'" % t.value[0]) 17 self.index += 1 18 19 if __name__ == '__main__': 20 data = '3 + 42 +8' 21 lexer = CalcLexer() 22 for tok in lexer.tokenize(data): 23 print('type=%r, value=%r, index=%r' % ( 24 tok.type, tok.value, tok.index))
1 python3 255.index.py 2 3 type='NUMBER', value=3, index=0 4 type='PLUS', value='+', index=2 5 type='NUMBER', value=42, index=4 6 type='PLUS', value='+', index=7 7 type='NUMBER', value=8, index=8
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 5 tokens = {NUMBER, PLUS} 6 ignore = ' \t' 7 8 PLUS = r'\+' 9 10 @_(r'\d+') 11 def NUMBER(self, t): 12 t.value = int(t.value) 13 return t 14 15 def error(self, t): 16 print("Illegal character '%s'" % t.value[0]) 17 self.index += 1 18 19 if __name__ == '__main__': 20 data = '3 + 42 +8' 21 lexer = CalcLexer() 22 for tok in lexer.tokenize(data): 23 print('type=%r, value=%r, index=%r' % ( 24 tok.type, tok.value, tok.index))
1 python3 264.just.number.function.py 2 3 type='NUMBER', value=3, index=0 4 type='PLUS', value='+', index=2 5 type='NUMBER', value=42, index=4 6 type='PLUS', value='+', index=7 7 type='NUMBER', value=8, index=8
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 5 tokens = {NUMBER, PLUS} 6 ignore = ' \t' 7 8 ignore_comment = r'\#.*' 9 ignore_newline = r'\n+' 10 11 PLUS = r'\+' 12 13 @_(r'\d+') 14 def NUMBER(self, t): 15 t.value = int(t.value) 16 return t 17 18 def error(self, t): 19 print("Illegal character '%s'" % t.value[0]) 20 self.index += 1 21 22 if __name__ == '__main__': 23 data = '''3 + 42 +8 # First comment 24 + 46+980+51+# Another commnet 25 343+43 26 ''' 27 lexer = CalcLexer() 28 for tok in lexer.tokenize(data): 29 print('type=%r, value=%r, index=%r' 30 % (tok.type, tok.value, tok.index))
1 python3 274.ignore.lines.py 2 3 type='NUMBER', value=3, index=0 4 type='PLUS', value='+', index=2 5 type='NUMBER', value=42, index=4 6 type='PLUS', value='+', index=7 7 type='NUMBER', value=8, index=8 8 type='PLUS', value='+', index=30 9 type='NUMBER', value=46, index=32 10 type='PLUS', value='+', index=34 11 type='NUMBER', value=980, index=35 12 type='PLUS', value='+', index=38 13 type='NUMBER', value=51, index=39 14 type='PLUS', value='+', index=41 15 type='NUMBER', value=343, index=64 16 type='PLUS', value='+', index=67 17 type='NUMBER', value=43, index=68
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 5 tokens = {NUMBER,PLUS} 6 ignore = ' \t' 7 8 ignore_comment = r'\#.*' 9 ignore_newline = r'\n+' 10 11 PLUS = r'\+' 12 13 @_(r'\d+') 14 def NUMBER(self, t): 15 t.value = int(t.value) 16 return t 17 18 def error(self, t): 19 print("Illegal character '%s'" % t.value[0]) 20 self.index += 1 21 22 if __name__ == '__main__': 23 data = '''3 + 42 +8 # First comment 24 + 46+980+51+# Another commnet 25 343+43 26 ''' 27 lexer = CalcLexer() 28 for tok in lexer.tokenize(data): 29 print('type=%r, value=%r, index=%r, lineno=%r' 30 % (tok.type, tok.value, tok.index, tok.lineno))
1 python3 280.lineno.wrong.py 2 3 type='NUMBER', value=3, index=0, lineno=1 4 type='PLUS', value='+', index=2, lineno=1 5 type='NUMBER', value=42, index=4, lineno=1 6 type='PLUS', value='+', index=7, lineno=1 7 type='NUMBER', value=8, index=8, lineno=1 8 type='PLUS', value='+', index=30, lineno=1 9 type='NUMBER', value=46, index=32, lineno=1 10 type='PLUS', value='+', index=34, lineno=1 11 type='NUMBER', value=980, index=35, lineno=1 12 type='PLUS', value='+', index=38, lineno=1 13 type='NUMBER', value=51, index=39, lineno=1 14 type='PLUS', value='+', index=41, lineno=1 15 type='NUMBER', value=343, index=64, lineno=1 16 type='PLUS', value='+', index=67, lineno=1 17 type='NUMBER', value=43, index=68, lineno=1
1 from sly import Lexer 2 class CalcLexer(Lexer): 3 tokens = {NUMBER, PLUS} 4 ignore = ' \t' 5 ignore_comment = r'\#.*' 6 PLUS = r'\+' 7 8 @_(r'\d+') 9 def NUMBER(self, t): 10 t.value = int(t.value) 11 return t 12 13 # Line number tracking 14 @_(r'\n+') 15 def ignore_newline(self, t): 16 self.lineno += t.value.count('\n') 17 18 def error(self, t): 19 print("Illegal character '%s'" % t.value[0]) 20 self.index += 1 21 22 if __name__ == '__main__': 23 data = '''3 + 42 +8 # First comment 24 + 46+980+51+# Another commnet 25 343+43 26 ''' 27 lexer = CalcLexer() 28 for tok in lexer.tokenize(data): 29 print('type=%r, value=%r, index=%r, lineno=%r' 30 % (tok.type, tok.value, tok.index, tok.lineno))
1 python3 284.lineno.py 2 3 type='NUMBER', value=3, index=0, lineno=1 4 type='PLUS', value='+', index=2, lineno=1 5 type='NUMBER', value=42, index=4, lineno=1 6 type='PLUS', value='+', index=7, lineno=1 7 type='NUMBER', value=8, index=8, lineno=1 8 type='PLUS', value='+', index=30, lineno=2 9 type='NUMBER', value=46, index=32, lineno=2 10 type='PLUS', value='+', index=34, lineno=2 11 type='NUMBER', value=980, index=35, lineno=2 12 type='PLUS', value='+', index=38, lineno=2 13 type='NUMBER', value=51, index=39, lineno=2 14 type='PLUS', value='+', index=41, lineno=2 15 type='NUMBER', value=343, index=64, lineno=3 16 type='PLUS', value='+', index=67, lineno=3 17 type='NUMBER', value=43, index=68, lineno=3
1 from sly import Lexer 2 class CalcLexer(Lexer): 3 tokens = {NUMBER, PLUS, ID, IF, ELSE, WHILE} 4 ignore = ' \t' 5 ignore_comment = r'\#.*' 6 PLUS = r'\+' 7 ID = r'[a-zA-Z_][a-zA-Z0-9_]*' 8 9 # Special cases 10 ID['if'] = IF 11 ID['else'] = ELSE 12 ID['while'] = WHILE 13 14 @_(r'\d+') 15 def NUMBER(self, t): 16 t.value = int(t.value) 17 return t 18 19 # Line number tracking 20 @_(r'\n+') 21 def ignore_newline(self, t): 22 self.lineno += t.value.count('\n') 23 24 def error(self, t): 25 print("Illegal character '%s'" % t.value[0]) 26 self.index += 1 27 28 if __name__ == '__main__': 29 data = 'count+ if +ifelse+8+while # First comment' 30 lexer = CalcLexer() 31 for tok in lexer.tokenize(data): 32 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 322.token.remapping.py 2 3 type='ID', value='count' 4 type='PLUS', value='+' 5 type='IF', value='if' 6 type='PLUS', value='+' 7 type='ID', value='ifelse' 8 type='PLUS', value='+' 9 type='NUMBER', value=8 10 type='PLUS', value='+' 11 type='WHILE', value='while'
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 # Set of token names. 5 # This is always required 6 tokens = { ID, NUMBER, PLUS, MINUS, TIMES, 7 DIVIDE, ASSIGN, LPAREN, RPAREN } 8 9 # String containing ignored characters 10 # between tokens 11 ignore = ' \t' 12 13 # Regular expression rules for tokens 14 ID = r'[a-zA-Z_][a-zA-Z0-9_]*' 15 NUMBER = r'\d+' 16 PLUS = r'\+' 17 MINUS = r'-' 18 TIMES = r'\*' 19 DIVIDE = r'/' 20 ASSIGN = r'=' 21 LPAREN = r'\(' 22 RPAREN = r'\)' 23 24 if __name__ == '__main__': 25 data = 'x = 3 + 42 * (s - t)' 26 lexer = CalcLexer() 27 for tok in lexer.tokenize(data): 28 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 524.more.tokens.py 2 3 type='ID', value='x' 4 type='ASSIGN', value='=' 5 type='NUMBER', value='3' 6 type='PLUS', value='+' 7 type='NUMBER', value='42' 8 type='TIMES', value='*' 9 type='LPAREN', value='(' 10 type='ID', value='s' 11 type='MINUS', value='-' 12 type='ID', value='t' 13 type='RPAREN', value=')'
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 # Set of token names. 5 # This is always required 6 tokens = { ID, NUMBER, PLUS, MINUS, TIMES, 7 DIVIDE, LPAREN, RPAREN, ASSIGN, EQ } 8 9 # String containing ignored characters 10 # between tokens 11 ignore = ' \t' 12 13 # Regular expression rules for tokens 14 ID = r'[a-zA-Z_][a-zA-Z0-9_]*' 15 NUMBER = r'\d+' 16 PLUS = r'\+' 17 MINUS = r'-' 18 TIMES = r'\*' 19 DIVIDE = r'/' 20 EQ = r'==' # MUST APPEAR FIRST! (LONGER) 21 ASSIGN = r'=' 22 LPAREN = r'\(' 23 RPAREN = r'\)' 24 25 if __name__ == '__main__': 26 data = 'x = 3 + 42 * (s == t)' 27 lexer = CalcLexer() 28 for tok in lexer.tokenize(data): 29 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 544.longer.before.shorter.py 2 3 type='ID', value='x' 4 type='ASSIGN', value='=' 5 type='NUMBER', value='3' 6 type='PLUS', value='+' 7 type='NUMBER', value='42' 8 type='TIMES', value='*' 9 type='LPAREN', value='(' 10 type='ID', value='s' 11 type='EQ', value='==' 12 type='ID', value='t' 13 type='RPAREN', value=')'
1 from sly import Lexer 2 3 class CalcLexer(Lexer): 4 # Set of token names. 5 # This is always required 6 tokens = { ID, NUMBER, PLUS, MINUS, TIMES, 7 DIVIDE, LPAREN, RPAREN, ASSIGN, EQ } 8 9 # String containing ignored characters 10 # between tokens 11 ignore = ' \t' 12 13 # Regular expression rules for tokens 14 ID = r'[a-zA-Z_][a-zA-Z0-9_]*' 15 NUMBER = r'\d+' 16 PLUS = r'\+' 17 MINUS = r'-' 18 TIMES = r'\*' 19 DIVIDE = r'/' 20 ASSIGN = r'=' 21 EQ = r'==' # MUST APPEAR FIRST! (LONGER) 22 LPAREN = r'\(' 23 RPAREN = r'\)' 24 25 if __name__ == '__main__': 26 data = 'x = 3 + 42 * (s == t)' 27 lexer = CalcLexer() 28 for tok in lexer.tokenize(data): 29 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 555.longer.before.shorter.error.py 2 3 type='ID', value='x' 4 type='ASSIGN', value='=' 5 type='NUMBER', value='3' 6 type='PLUS', value='+' 7 type='NUMBER', value='42' 8 type='TIMES', value='*' 9 type='LPAREN', value='(' 10 type='ID', value='s' 11 type='ASSIGN', value='=' 12 type='ASSIGN', value='=' 13 type='ID', value='t' 14 type='RPAREN', value=')'
1 from sly import Lexer 2 class CalcLexer(Lexer): 3 # Set of token names. 4 # This is always required 5 tokens = { ID, NUMBER, PLUS, TIMES, ASSIGN, EQ} 6 7 # String containing ignored characters 8 # between tokens 9 ignore = ' \t' 10 11 # Regular expression rules for tokens 12 ID = r'[a-zA-Z_][a-zA-Z0-9_]*' 13 PLUS = r'\+' 14 ASSIGN = r'=' 15 EQ = r'==' # MUST APPEAR FIRST! (LONGER) 16 17 @_(r'0x[0-9a-fA-F]+', r'\d+') 18 def NUMBER(self, t): 19 if t.value.startswith('0x'): 20 t.value = int(t.value[2:], 16) 21 else: 22 t.value = int(t.value) 23 return t 24 25 if __name__ == '__main__': 26 data = 'x = 3 + 0xa2 33ab s == t' 27 lexer = CalcLexer() 28 for tok in lexer.tokenize(data): 29 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 575.adding.match.actions.py 2 3 type='ID', value='x' 4 type='ASSIGN', value='=' 5 type='NUMBER', value=3 6 type='PLUS', value='+' 7 type='NUMBER', value=162 8 type='NUMBER', value=33 9 type='ID', value='ab' 10 type='TIMES', value='*' 11 type='LPAREN', value='(' 12 type='ID', value='s' 13 type='ASSIGN', value='=' 14 type='ASSIGN', value='=' 15 type='ID', value='t' 16 type='RPAREN', value=')'
1 from sly import Lexer 2 class CalcLexer(Lexer): 3 tokens = {ID, NUMBER, ASSIGN, ASSIGN} 4 literals = {'+', '-', '(', ')'} 5 ignore = ' \t' 6 ID = r'[a-zA-Z_][a-zA-Z0-9_]*' 7 ASSIGN = r'=' 8 NUMBER = '\d+' 9 10 if __name__ == '__main__': 11 data = 'x += (234+d t) aa)' 12 lexer = CalcLexer() 13 for tok in lexer.tokenize(data): 14 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 584.literal.I.py 2 3 type='ID', value='x' 4 type='+', value='+' 5 type='ASSIGN', value='=' 6 type='(', value='(' 7 type='NUMBER', value='234' 8 type='+', value='+' 9 type='ID', value='d' 10 type='ID', value='t' 11 type=')', value=')' 12 type='ID', value='aa' 13 type=')', value=')'
1 from sly import Lexer 2 class CalcLexer(Lexer): 3 tokens = {ID, NUMBER, ASSIGN} 4 literals = {'{', '}', '+', '-', '(', ')'} 5 ignore = ' \t' 6 ID = r'[a-zA-Z_][a-zA-Z0-9_]*' 7 ASSIGN = r'=' 8 NUMBER = '\d+' 9 10 def __init__(self): self.nesting_level = 0 11 12 @_(r'\{') 13 def lbrace(self, t): 14 # Set token type to the expected literal 15 t.type = '{' 16 self.nesting_level += 1 17 print('nesting level: ', self.nesting_level) 18 return t 19 20 @_(r'\}') 21 def rbrace(self, t): 22 # Set token type to the expected literal 23 t.type = '}' 24 self.nesting_level -=1 25 print('nesting level: ', self.nesting_level) 26 return t 27 28 if __name__ == '__main__': 29 data = 'x += {s ( {t)} aa}' 30 lexer = CalcLexer() 31 for tok in lexer.tokenize(data): 32 print('type=%r, value=%r' % (tok.type, tok.value))
1 python3 588.literal.II.py 2 3 type='ID', value='x' 4 type='+', value='+' 5 type='ASSIGN', value='=' 6 nesting level: 1 7 type='{', value='{' 8 type='ID', value='s' 9 type='(', value='(' 10 nesting level: 2 11 type='{', value='{' 12 type='ID', value='t' 13 type=')', value=')' 14 nesting level: 1 15 type='}', value='}' 16 type='ID', value='aa' 17 nesting level: 0 18 type='}', value='}'
1 class CalcLexer(Lexer): 2 tokens = {INT, FLOAT, ID, WHILE, PRINT, ASSIGN, LT, LE} 3 literals = {'(', ')', '{', '}', ';', '+', '-', '*', '/'} 4 ignore = ' \t'; ignore_comment = r'\#.*' 5 ID = r'[a-zA-Z_][a-zA-Z0-9_]*'; ID['while'] = WHILE 6 ID['print'] = PRINT # ID['if'] = IF # ID['else'] = ELSE 7 ASSIGN = r'=' # EQ = r'==' 8 LE = r'<=' # NE = r'!=' 9 LT = r'<' #GE=r'>='; GT=r'>' 10 11 @_(r'\d+\.\d+') 12 def FLOAT(self, t): t.value = float(t.value); return t 13 14 @_(r'\d+') 15 def INT(self, t): t.value = int(t.value); return t 16 17 @_(r'\n+') 18 def ignore_newline(self, t): self.lineno += t.value.count('\n') 19 20 def error(self, t): 21 print('Line %d: Bad character %r'%(self.lineno, t.value[0])) 22 self.index += 1 23 24 data = '''x = 0.1; 25 while (x < 10) { 26 print x: # Counting 27 x = x + 1; 28 }''' 29 lexer = CalcLexer() 30 for tok in lexer.tokenize(data): 31 print('(',tok.type,',',tok.value,',', tok.lineno, ',', 32 tok.index,')')
1 python3 624.py 2 3 ( ID , x , 1 , 0 ) 4 ( ASSIGN , = , 1 , 2 ) 5 ( FLOAT , 0.1 , 1 , 4 ) 6 ( ; , ; , 1 , 7 ) 7 ( WHILE , while , 2 , 9 ) 8 ( ( , ( , 2 , 15 ) 9 ( ID , x , 2 , 16 ) 10 ( LT , < , 2 , 18 ) 11 ( INT , 10 , 2 , 20 ) 12 ( ) , ) , 2 , 22 ) 13 ( { , { , 2 , 24 ) 14 ( PRINT , print , 3 , 28 ) 15 ( ID , x , 3 , 34 ) 16 Line 3: Bad character ':' 17 ( ID , x , 4 , 50 ) 18 ( ASSIGN , = , 4 , 52 ) 19 ( ID , x , 4 , 54 ) 20 ( + , + , 4 , 56 ) 21 ( INT , 1 , 4 , 58 ) 22 ( ; , ; , 4 , 59 ) 23 ( } , } , 5 , 61 ) 24