diff --git a/llmfoundry/data/finetuning/tasks.py b/llmfoundry/data/finetuning/tasks.py index dcc1c5491a..6b4bd25936 100644 --- a/llmfoundry/data/finetuning/tasks.py +++ b/llmfoundry/data/finetuning/tasks.py @@ -119,6 +119,15 @@ def preprocessing_fn(example: Dict) -> Dict[str, str]: ExampleType = Literal['prompt_response', 'chat'] TokenizedExample = dict[str, list[dict[str, list[int]]]] +_DEFAULT_CHAT_TEMPLATE = ( + '{% for message in messages %}' + "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}" + '{% endfor %}' + '{% if add_generation_prompt %}' + "{{ '<|im_start|>assistant\n' }}" + '{% endif %}' +) + def _get_example_type(example: Example) -> ExampleType: """Determines the type of the input example. @@ -243,17 +252,21 @@ def slice_out_last_turn( messages_through_current_turn: list[dict[str, str]], conversation_through_previous_turn: str, ) -> tuple[str, str]: + chat_template = None if tokenizer.chat_template is not None else _DEFAULT_CHAT_TEMPLATE + try: full_conversation = tokenizer.apply_chat_template( messages_through_current_turn, tokenize=False, date_string=get_date_string(), + chat_template=chat_template, ) prompt_with_history = tokenizer.apply_chat_template( messages_through_current_turn[:-1], tokenize=False, add_generation_prompt=True, date_string=get_date_string(), + chat_template=chat_template, ) except Exception as e: raise ChatTemplateError( diff --git a/llmfoundry/models/mpt/modeling_mpt.py b/llmfoundry/models/mpt/modeling_mpt.py index da576b29e1..0afb493844 100644 --- a/llmfoundry/models/mpt/modeling_mpt.py +++ b/llmfoundry/models/mpt/modeling_mpt.py @@ -99,6 +99,9 @@ class InvalidConfigAccessError(KeyError): # Not set but llama modeling code tries to read this attribute 'partial_rotary_factor', + # This key is accessed with a default of hidden_size / num_attention_heads + 'head_dim', + # Benign transformers attributes needed for __init__ '_get_generation_defaults', 'label2id', @@ -106,6 +109,7 @@ class InvalidConfigAccessError(KeyError): 'torch_dtype', 'problem_type', '__class__', + '_get_global_generation_defaults', } diff --git a/llmfoundry/tokenizers/tiktoken.py b/llmfoundry/tokenizers/tiktoken.py index 6458ad3ba4..36944589d0 100644 --- a/llmfoundry/tokenizers/tiktoken.py +++ b/llmfoundry/tokenizers/tiktoken.py @@ -69,6 +69,7 @@ def __init__( bos_token: Optional[str] = '<|endoftext|>', pad_token: Optional[str] = None, errors: str = 'replace', + chat_template: Optional[str] = None, **kwargs: Any, ): """Constructor creates a tiktoken tokenizer to use as the underlying. @@ -90,6 +91,8 @@ def __init__( errors (str, optional): Paradigm to follow when decoding bytes to UTF-8. See [bytes.decode](https://docs.python.org/3/library/stdtypes.html#bytes.decode) for more information. Defaults to `"replace"`. + chat_template (Optional[str], optional): The Hugging Face chat template. Default will use the ``default_chat_template`` + set on this class. kwargs (Any): Other relevant keyword arguments. """ try: @@ -178,6 +181,7 @@ def pickle_Encoding(enc: Encoding): bos_token=bos_token, pad_token=pad_token, errors=errors, + chat_template=chat_template or self.default_chat_template, **kwargs, ) diff --git a/setup.py b/setup.py index 86d696ed5c..72543b24c8 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ 'mosaicml[libcloud,wandb,oci,gcs,mlflow]>=0.26.0,<0.27', 'mlflow>=2.14.1,<2.18', 'accelerate>=0.25,<0.34', # for HF inference `device_map` - 'transformers>=4.43.2,<4.44', + 'transformers>=4.43.2,<4.47', 'mosaicml-streaming>=0.9.0,<0.10', 'torch>=2.4.0,<2.4.1', 'datasets>=2.19,<2.20', diff --git a/tests/data/test_template_tokenization.py b/tests/data/test_template_tokenization.py index 0697894bb2..65f8669ce6 100644 --- a/tests/data/test_template_tokenization.py +++ b/tests/data/test_template_tokenization.py @@ -7,6 +7,7 @@ import transformers from llmfoundry.data.finetuning.tasks import ( + _DEFAULT_CHAT_TEMPLATE, _slice_chat_formatted_example, dataset_constructor, tokenize_formatted_example, @@ -304,6 +305,9 @@ def test_multi_turn_chat_slicing( if use_date_string: tok.chat_template = "{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{{- \"Today Date: \" + date_string }}\n" + if not tok.chat_template: + tok.chat_template = _DEFAULT_CHAT_TEMPLATE + templated_prompt_response_turns = _slice_chat_formatted_example( example, tok, diff --git a/tests/models/test_model.py b/tests/models/test_model.py index a7769c237d..8a6290d5c4 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -1668,6 +1668,10 @@ def check_hf_model_equivalence( del expected_model_config_dict['_name_or_path'] del new_model_config_dict['_name_or_path'] + # Transformers changes this key on load from disk + del expected_model_config_dict['_attn_implementation_autoset'] + del new_model_config_dict['_attn_implementation_autoset'] + assert expected_model_config_dict == new_model_config_dict assert sum(p.numel() for p in model1.parameters() ) == sum(p.numel() for p in model2.parameters()) diff --git a/tests/tokenizers/test_tokenizer.py b/tests/tokenizers/test_tokenizer.py index d42f810214..5c6c07d4cc 100644 --- a/tests/tokenizers/test_tokenizer.py +++ b/tests/tokenizers/test_tokenizer.py @@ -6,6 +6,7 @@ from omegaconf import OmegaConf as om from transformers import AutoTokenizer +from llmfoundry.data.finetuning.tasks import _DEFAULT_CHAT_TEMPLATE from llmfoundry.tokenizers.utils import get_date_string @@ -115,6 +116,9 @@ def test_tokenizer_date_string(tokenizer_name: str, use_date_string: bool): if use_date_string: tokenizer.chat_template = "{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{{- \"Today Date: \" + date_string }}\n" + if not tokenizer.chat_template: + tokenizer.chat_template = _DEFAULT_CHAT_TEMPLATE + token_ids = tokenizer.apply_chat_template( messages, add_generation_prompt=True,