Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 43 additions & 30 deletions nfa/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,17 @@ def pattern(pattern_string):
lexer = Lexer(pattern_string)
lexer.advance()
nfa_pair = NfaPair()
group(nfa_pair)
expr(nfa_pair)
# log_nfa(nfa_pair.start_node)

return nfa_pair.start_node


"""
group ::= ("(" expr ")")*
expr ::= factor_conn ("|" factor_conn)*
factor_conn ::= factor | factor factor*
factor ::= (term | term ("*" | "+" | "?"))*
term ::= char | "[" char "-" char "]" | .
term ::= char | "[" char "-" char "]" | . | "(" expr ")"
"""


Expand All @@ -44,6 +43,8 @@ def term(pair_out):
nfa_dot_char(pair_out)
elif lexer.match(Token.CCL_START):
nfa_set_nega_char(pair_out)
elif lexer.match(Token.OPEN_PAREN):
nfa_paren_around(pair_out)


# 匹配单个字符
Expand Down Expand Up @@ -147,7 +148,6 @@ def factor_conn(pair_out):

def is_conn(token):
nc = [
Token.OPEN_PAREN,
Token.CLOSE_PAREN,
Token.AT_EOL,
Token.EOS,
Expand Down Expand Up @@ -226,6 +226,19 @@ def nfa_option_closure(pair_out):
return True


# ()
def nfa_paren_around(pair_out):
if not lexer.match(Token.OPEN_PAREN):
return False

lexer.advance()
expr(pair_out)
if not lexer.match(Token.CLOSE_PAREN):
return False
lexer.advance()
return True


def expr(pair_out):
factor_conn(pair_out)
pair = NfaPair()
Expand All @@ -246,32 +259,32 @@ def expr(pair_out):
return True


def group(pair_out):
if lexer.match(Token.OPEN_PAREN):
lexer.advance()
expr(pair_out)
if lexer.match(Token.CLOSE_PAREN):
lexer.advance()
elif lexer.match(Token.EOS):
return False
else:
expr(pair_out)

while True:
pair = NfaPair()
if lexer.match(Token.OPEN_PAREN):
lexer.advance()
expr(pair)
pair_out.end_node.next_1 = pair.start_node
pair_out.end_node = pair.end_node
if lexer.match(Token.CLOSE_PAREN):
lexer.advance()
elif lexer.match(Token.EOS):
return False
else:
expr(pair)
pair_out.end_node.next_1 = pair.start_node
pair_out.end_node = pair.end_node
# def group(pair_out):
# if lexer.match(Token.OPEN_PAREN):
# lexer.advance()
# expr(pair_out)
# if lexer.match(Token.CLOSE_PAREN):
# lexer.advance()
# elif lexer.match(Token.EOS):
# return False
# else:
# expr(pair_out)

# while True:
# pair = NfaPair()
# if lexer.match(Token.OPEN_PAREN):
# lexer.advance()
# expr(pair)
# pair_out.end_node.next_1 = pair.start_node
# pair_out.end_node = pair.end_node
# if lexer.match(Token.CLOSE_PAREN):
# lexer.advance()
# elif lexer.match(Token.EOS):
# return False
# else:
# expr(pair)
# pair_out.end_node.next_1 = pair.start_node
# pair_out.end_node = pair.end_node



7 changes: 7 additions & 0 deletions test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ def __init__(self, str, pattern, result):
testLists.append(RegexMaterial("abbbbb", "[^c]+", True))
testLists.append(RegexMaterial("ccccc", "[^c]+", False))
testLists.append(RegexMaterial("123", "[1-3]+", True))
testLists.append(RegexMaterial("ad", "a(bc)*d", True))
testLists.append(RegexMaterial("abcd", "a(bc)*d", True))
testLists.append(RegexMaterial("abcbcd", "a(bc)*d", True))
testLists.append(RegexMaterial("abcdef", "a(b(cd)*e)?f", True))
testLists.append(RegexMaterial("abef", "a(b(cd)*e)?f", True))
testLists.append(RegexMaterial("af", "a(b(cd)*e)?f", True))
testLists.append(RegexMaterial("abf", "a(b(cd)*e)?f", False))

class TestRegex(unittest.TestCase):
def test(self):
Expand Down