Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions plain2code_console.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import logging

import tiktoken
from rich.console import Console
from rich.style import Style
from rich.tree import Tree

try:
import tiktoken

_tiktoken_available = True
except ImportError:
_tiktoken_available = False


class Plain2CodeConsole(Console):
INFO_STYLE = Style()
Expand All @@ -16,7 +22,10 @@ class Plain2CodeConsole(Console):

def __init__(self):
super().__init__()
self.llm_encoding = tiktoken.get_encoding("cl100k_base")
if _tiktoken_available:
self.llm_encoding = tiktoken.get_encoding("cl100k_base")
else:
self.llm_encoding = None

def info(self, *args, **kwargs):
logging.info(" ".join(map(str, args)))
Expand Down Expand Up @@ -86,7 +95,7 @@ def _create_tree_from_files(self, root_folder, files):
current_level = current_level.add(f"{part} [red]deleted[/red]")
else:
file_lines = len(content.splitlines())
file_tokens = len(self.llm_encoding.encode(content))
file_tokens = self._count_tokens(content)
current_level = current_level.add(f"{part} ({file_lines} lines, {file_tokens} tokens)")
else:
current_level = current_level.add(part)
Expand All @@ -95,6 +104,15 @@ def _create_tree_from_files(self, root_folder, files):

return tree

def _count_tokens(self, text):
"""Count tokens using tiktoken if available, otherwise estimate from character count."""
if self.llm_encoding is not None:
try:
return len(self.llm_encoding.encode(text))
except Exception:
pass
return len(text) // 4

def print_resources(self, resources_list, linked_resources):
if len(resources_list) == 0:
self.input("No linked resources found.")
Expand All @@ -103,7 +121,7 @@ def print_resources(self, resources_list, linked_resources):
self.input("Linked resources:")
for resource_name in resources_list:
if resource_name["target"] in linked_resources:
file_tokens = len(self.llm_encoding.encode(linked_resources[resource_name["target"]]))
file_tokens = self._count_tokens(linked_resources[resource_name["target"]])
self.input(
f"- {resource_name['text']} [#4169E1]({resource_name['target']}, {file_tokens} tokens)[/#4169E1]"
)
Expand Down