Training, Tuning and Deploying a PyTorch Text Classification Model

import os
import random
import datasets
import numpy as np
import pandas as pd
import torch
import transformers

from IPython.display import HTML, display
from datasets import ClassLabel, Sequence, load_dataset
from transformers import (AutoModelForSequenceClassification, AutoTokenizer,
                          EvalPrediction, Trainer, TrainingArguments,
                          default_data_collator)

print(f"Notebook runtime: {'GPU' if torch.cuda.is_available() else 'CPU'}")
print(f"PyTorch version : {torch.__version__}")
print(f"Transformers version : {datasets.__version__}")
print(f"Datasets version : {transformers.__version__}")

APP_NAME = "finetuned-bert-classifier"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

dataset = load_dataset("imdb")

print(
    "Total # of rows in training dataset {} and size {:5.2f} MB".format(
        dataset["train"].shape[0], dataset["train"].size_in_bytes / (1024 * 1024)
    )
)
print(
    "Total # of rows in test dataset {} and size {:5.2f} MB".format(
        dataset["test"].shape[0], dataset["test"].size_in_bytes / (1024 * 1024)
    )
)

label_list = dataset["train"].unique("label")

def show_random_elements(dataset, num_examples=2):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(dataset) - 1)
        while pick in picks:
            pick = random.randint(0, len(dataset) - 1)
        picks.append(pick)

    df = pd.DataFrame(dataset[picks])
    for column, typ in dataset.features.items():
        if isinstance(typ, ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
        elif isinstance(typ, Sequence) and isinstance(typ.feature, ClassLabel):
            df[column] = df[column].transform(
                lambda x: [typ.feature.names[i] for i in x]
            )
    display(HTML(df.to_html()))

show_random_elements(dataset["train"])

print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print("~~~~~~~~~~~~ ~~~~~~~~~~~~~~~")
print("~~~~~~~~~~~~ ~~~~~~~~~~~~~~~")
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

batch_size = 16
max_seq_length = 128
model_name_or_path = "bert-base-cased"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,use_fast=True,)

tokenizer("Hello, this is one sentence!")
example = dataset["train"][4]
print(example)

tokenizer(
    ["Hello", ",", "this", "is", "one", "sentence", "split", "into", "words", "."],
    is_split_into_words=True,
)

# Dataset loading repeated here to make this cell idempotent
# Since we are over-writing datasets variable
dataset = load_dataset("imdb")
print("~~~~~8~~~~~~~~")
# Mapping labels to ids
# NOTE: We can extract this automatically but the `Unique` method of the datasets
# is not reporting the label -1 which shows up in the pre-processing.
# Hence the additional -1 term in the dictionary
label_to_id = {1: 1, 0: 0, -1: 0}


def preprocess_function(examples):
    """
    Tokenize the input example texts
    NOTE: The same preprocessing step(s) will be applied
    at the time of inference as well.
    """
    args = (examples["text"],)
    result = tokenizer(
        *args, padding="max_length", max_length=max_seq_length, truncation=True
    )

    # Map labels to IDs (not necessary for GLUE tasks)
    if label_to_id is not None and "label" in examples:
        result["label"] = [label_to_id[example] for example in examples["label"]]

    return result

print("~~~~~9~~~~~~~~")
# apply preprocessing function to input examples
dataset = dataset.map(preprocess_function, batched=True, load_from_cache_file=True)


model = AutoModelForSequenceClassification.from_pretrained(
    model_name_or_path, num_labels=len(label_list)
)

args = TrainingArguments(
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=1,
    weight_decay=0.01,
    output_dir="/tmp/cls",
)

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.argmax(preds, axis=1)
    return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}

print("~~~~~10~~~~~~~~")

trainer = Trainer(
    model,
    args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    data_collator=default_data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

print("~~~~11~~~~~~~~")
trainer.train()
print("~~~~~12~~~~~~~~")
saved_model_local_path = "./models"

trainer.save_model(saved_model_local_path)
print("~~~~~13~~~~~~~~")
history = trainer.evaluate()
print("~~~~~14~~~~~~~~")
history

https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/community-content/pytorch_text_classification_using_vertex_sdk_and_gcloud/pytorch-text-classification-vertex-ai-train-tune-deploy.ipynb

标签: none

已有 23 条评论

vdvhwvpbnz

November 12th, 2024 at 06:42 pm

你的文章让我心情愉悦，每天都要来看一看。 http://www.55baobei.com/GM5Ca2mGDT.html

回复
szlwfxunuf

December 8th, 2024 at 07:52 pm

你的文章充满了欢乐，让人忍不住一笑。 http://www.55baobei.com/7DdCtGETmf.html

回复
vyuogtrykt

January 6th, 2025 at 02:18 pm

哈哈哈，写的太好了https://www.lawjida.com/

回复
ihbqrifdlw

February 28th, 2025 at 05:24 pm

?批判性评语?

回复
tdcijvtkde

February 28th, 2025 at 06:43 pm

内容的丰富性和深度让人仿佛置身于知识的海洋，受益匪浅。

回复
laxfaixtfj

February 28th, 2025 at 09:29 pm

建议后续持续追踪此话题，形成系列研究。

回复
mvfjxrockk

March 1st, 2025 at 06:25 am

反讽手法运用娴熟，令人会心一笑。

回复
bxksuitqoc

March 1st, 2025 at 08:35 am

建议后续持续追踪此话题，形成系列研究。

回复
lkndlmbjdu

March 2nd, 2025 at 02:35 pm

建议后续持续追踪此话题，形成系列研究。

回复
mkybxwcmbi

March 2nd, 2025 at 05:10 pm

文章中的实用建议和操作指南，让读者受益匪浅，值得珍藏。

回复
pasvjwyxmc

March 2nd, 2025 at 05:37 pm

文章中的实用建议和操作指南，让读者受益匪浅，值得珍藏。

回复
jfnlnvxajd

March 2nd, 2025 at 08:40 pm

立意高远，以小见大，引发读者对社会/人性的深层共鸣。

回复
khexirzaps

March 4th, 2025 at 03:19 pm

立意高远，以小见大，引发读者对社会/人性的深层共鸣。

回复
rjpshszgmv

March 7th, 2025 at 12:34 am

这篇文章不错！

回复
mpcxrsiqke

October 5th, 2025 at 03:49 am

新车即将上线真正的项目，期待你的参与coinsrore.com

回复
鍗庣撼鍏徃鍚堜綔寮€鎴锋墍闇€鏉愭枡锛熺數璇濆彿鐮?5587291507 寰俊STS5099

October 30th, 2025 at 11:53 pm

华纳公司合作开户所需材料？电话号码15587291507 微信STS5099
华纳公司合作开户所需材料？电话号码15587291507 微信STS5099
华纳公司合作开户所需材料？电话号码15587291507 微信STS5099
华纳公司合作开户所需材料？电话号码15587291507 微信STS5099
华纳公司合作开户所需材料？电话号码15587291507 微信STS5099
华纳公司合作开户所需材料？电话号码15587291507 微信STS5099
华纳公司合作开户所需材料？电话号码15587291507 微信STS5099
华纳公司合作开户所需材料？电话号码15587291507 微信STS5099

回复
鍗庣撼鍏徃鍚堜綔寮€鎴锋墍闇€鏉愭枡锛熺數璇濆彿鐮?5587291507 寰俊STS5099

November 2nd, 2025 at 02:47 pm

华纳东方明珠客服电话是多少？（▲18288362750?《?微信STS5099? 】
如何联系华纳东方明珠客服？（▲18288362750?《?微信STS5099? 】
华纳东方明珠官方客服联系方式？（▲18288362750?《?微信STS5099?
华纳东方明珠客服热线？（▲18288362750?《?微信STS5099?
华纳东方明珠24小时客服电话？（▲18288362750?《?微信STS5099? 】
华纳东方明珠官方客服在线咨询？（▲18288362750?《?微信STS5099?

回复
鍗庣撼鍏徃鍚堜綔寮€鎴锋墍闇€鏉愭枡锛熺數璇濆彿鐮?5587291507 寰俊STS5099

November 7th, 2025 at 03:16 pm

新盛客服电话是多少？（?183-8890-9465—《?薇-STS5099】【
新盛开户专线联系方式？（?183-8890--9465—《?薇-STS5099】【?扣6011643??】
新盛客服开户电话全攻略，让娱乐更顺畅！（?183-8890--9465—《?薇-STS5099】客服开户流程，华纳新盛客服开户流程图（?183-8890--9465—《?薇-STS5099】

回复
鍗庣撼鍏徃鍚堜綔寮€鎴锋墍闇€鏉愭枡锛熺數璇濆彿鐮?5587291507 寰俊STS5099

November 9th, 2025 at 08:47 am

果博东方客服开户联系方式【182-8836-2750—】?薇- cxs20250806】
果博东方公司客服电话联系方式【182-8836-2750—】?薇- cxs20250806】
果博东方开户流程【182-8836-2750—】?薇- cxs20250806】
果博东方客服怎么联系【182-8836-2750—】?薇- cxs20250806】

回复
鍗庣撼鍏徃鍚堜綔寮€鎴锋墍闇€鏉愭枡锛熺數璇濆彿鐮?5587291507 寰俊STS5099

November 9th, 2025 at 08:57 am

果博东方客服开户联系方式【182-8836-2750—】?薇- cxs20250806】
果博东方公司客服电话联系方式【182-8836-2750—】?薇- cxs20250806】
果博东方开户流程【182-8836-2750—】?薇- cxs20250806】
果博东方客服怎么联系【182-8836-2750—】?薇- cxs20250806】

回复
鍗庣撼鍏徃鍚堜綔寮€鎴锋墍闇€鏉愭枡锛熺數璇濆彿鐮?5587291507 寰俊STS5099

November 17th, 2025 at 03:20 pm

华纳圣淘沙开户步骤详解（183-8890-9465—?薇-STS5099【6011643】
华纳圣淘沙公司开户流程全解析（183-8890-9465—?薇-STS5099【6011643】
华纳圣淘沙公司账户注册指南（183-8890-9465—?薇-STS5099【6011643】
新手如何开通华纳圣淘沙公司账户（183-8890-9465—?薇-STS5099【6011643】
华纳圣淘沙企业开户标准流程（183-8890-9465—?薇-STS5099【6011643】
华纳圣淘沙公司开户：从零到一（183-8890-9465—?薇-STS5099【6011643】
官方指南：华纳圣淘沙公司开户流程（183-8890-9465—?薇-STS5099【6011643】
华纳圣淘沙公司开户流程说明书（183-8890-9465—?薇-STS5099【6011643】

回复
鍗庣撼鍏徃鍚堜綔寮€鎴锋墍闇€鏉愭枡锛熺數璇濆彿鐮?5587291507 寰俊STS5099

November 22nd, 2025 at 03:47 pm

《华纳圣淘沙公司开户流程全解析》→ 官方顾问一对一指导??? 安全联系：183第三段8890第四段9465
《华纳圣淘沙开户步骤详解》→ 」专属通道快速办理??? 安全联系：183第三段8890第四段9465
《华纳圣淘沙账户注册指南》→ 扫码获取完整资料清单?「微?? 安全联系：183第三段8890第四段9465
《新手开通华纳圣淘沙公司账户指南》→ 限时免费咨询开放??? 安全联系：183第三段8890第四段9465
《华纳圣淘沙企业开户标准流程》→ 资深顾问实时解答疑问??? 安全联系：183第三段8890第四段9465
《华纳圣淘沙开户步骤全景图》→ 点击获取极速开户方案??? 安全联系：183第三段8890第四段9465
《华纳圣淘沙账户创建全流程手册》→ 预约顾问免排队服务?9?? 安全联系：183第三段8890第四段9465 《从零开通华纳圣淘沙公司账户》→ 添加客服领取开户工具包?? 安全联系：183第三段8890第四段9465
《官方授权：华纳圣淘沙开户流程》→ 认证顾问全程代办?」?? 安全联系：183第三段8890第四段9465
《华纳圣淘沙开户说明书》→立即联系获取电子版文件??? 安全联系：183第三段8890第四段9465

回复
a0tj.cn

November 24th, 2025 at 09:11 pm

大连特色陪洗是什么a0gz.cn

回复

取消回复

Training, Tuning and Deploying a PyTorch Text Classification Model

已有 23 条评论

添加新评论

最新文章

最近回复

分类

归档

其它