dogvscat 一个关于协变量偏移（covariate shift）的例子

发表于 2023-06-27 更新于 2023-10-30 Disqus：

猫没卡通化你的训练集

猫卡通化过你的测试集

狗没卡通化过你的训练集

狗卡通化过你的测试集

为了体验一下协变量偏移（covariate shift）的例子我自己构建了一个猫狗分类的数据集，数据来自kaggla dogvscat的训练集，我把它重新分成了训练集和测试集两部分然后对测试集部分进行卡通化处理（卡通化代码地址）构成了一个协变量偏移的例子，试着实现协变量偏移纠正，不过可惜的事效果并不好。

理论部分

分布偏移的类型

训练数据分布\(p_S(x,y)\) 测试数据分布\(p_T(x,y)\) 一个清醒的现实是：如果没有任何关于\(p_S\)和\(p_T\)之间相互关系的假设，学习到一个分类器是不可能的

如果\(p_S(x)=p_T(x)\)但是标签全部翻转 \(p_S(y|x)=1-p_T(y|x)\)如果我们此时简单的使用\(p_S(x)\)那很难得到好的结果

所以我们可以对数据发生的变化进行一些限制的假设，并以此设计可以检测偏移变化的算法，然后动态调整数据。

情况一协变量偏移（covariate shift）

\(p(x)\)改变 \(p(y|x)\)没有改变
情况二标签偏移（label shift）
\(p(y)\)改变 \(p(x|y)\)没有改变
情况三概念偏移（concept shift）
标签的定义发生变化

为了更好的认识这一问题我们要了解经验风险和真实风险的概念

经验风险（empirical risk)

\[ \underset{f}{\operatorname{minimize}} \frac{1}{n} \sum_{i=1}^n l\left(f\left(\mathbf{x}_i\right), y_i\right) \]
真实风险（true risk）
\[ E_{p(\mathbf{x}, y)}[l(f(\mathbf{x}), y)]=\iint l(f(\mathbf{x}), y) p(\mathbf{x}, y) d \mathbf{x} d y \]

协变量偏移纠正

\(q(x)\)源分布 \(p(x)\)目标分布假设：\(p(y|x)=q(y|x)\)
\[ \iint l(f(\mathbf{x}), y) p(y \mid \mathbf{x}) p(\mathbf{x}) d \mathbf{x} d y=\iint l(f(\mathbf{x}), y) q(y \mid \mathbf{x}) q(\mathbf{x}) \frac{p(\mathbf{x})}{q(\mathbf{x})} d \mathbf{x} d y \]

我们需要根据数据来自正确分布与来自错误分布的概率之比，来重新衡量每个数据样本的权重
\[ \beta_i \stackrel{\text { def }}{=} \frac{p\left(\mathbf{x}_i\right)}{q\left(\mathbf{x}_i\right)} \]

将权重 \(\beta_i\) 代入到每个数据样本 \(\left(\mathbf{x}_i, y_i\right)\) 中, 我们可以使用”加权经验风险最小化“来训练模型:
\[ \underset{f}{\operatorname{minimize}} \frac{1}{n} \sum_{i=1}^n \beta_i l\left(f\left(\mathbf{x}_i\right), y_i\right) . \]
\(\beta_i\) 可以用一个二分类网络生成

代码实现

数据处理

import shutil
import numpy as np
import os
# kaggle原始数据集地址
original_dataset_dir = '....train'
total_num = int(len(os.listdir(original_dataset_dir))/2)   # total_num=12500
# os.listdir() 可以查看当前目录下的文件和目录个数
random_idx = np.array(range(total_num))
np.random.shuffle(random_idx)   # np.random.shuffle()  对第一维的随机打乱
base_dir = '...dogvscat'   # 待处理的数据集地址
if not os.path.exists(base_dir):
    os.mkdir(base_dir)               # 创建目录
 
# 训练集、测试集的划分
sub_dirs = ['train', 'test']
animals = ['cats', 'dogs']
train_idx = random_idx[:int(total_num * 0.8):]    # train_idx=10000
test_idx = random_idx[int(total_num * 0.8)::]  # test_idx=2500
numbers = [train_idx, test_idx]
for idx, sub_dir in enumerate(sub_dirs):
    dir = os.path.join(base_dir, sub_dir)   # os.path.join()函数连接两个或更多的路径名组件
    if not os.path.exists(dir):
        os.mkdir(dir)
    for animal in animals:
        animal_dir = os.path.join(dir, animal)
        if not os.path.exists(animal_dir):
            os.mkdir(animal_dir)
        fnames = [animal[:-1] + '.{}.jpg'.format(i) for i in numbers[idx]]
        for fname in fnames:
            src = os.path.join(original_dataset_dir, fname)
            dst = os.path.join(animal_dir, fname)
            shutil.copyfile(src, dst)   # src复制到dst中去
 
        # 验证训练集、验证集、测试集的划分的照片数目
        print(animal_dir + ' total images : %d' % (len(os.listdir(animal_dir))))

复制test文件夹重命名为testcartoon 进入文件夹下用如下的代码卡通化测试文件夹

python test.py --input_dir ./samples/inputs1 --output dir ./samples/result1/ --device cpu

创建协变量偏移检测器训练集

在dogvscat文件夹下

mkdir train2
cd train2
mkdir cartoon
cp ../../testcartoon/cats/*  .
cp  ../../testcartoon/dogs/* .
cd ..
mkdir normal
cd normal
cp ../../train/cats/*  .
cp ../../train/dogs/*  .

import shutil
import numpy as np
import os
# 处理过的数据文件
original_dataset_dir = '.....train2'#你的文件位置
for j in ['cartoon','normal']:
    original_dataset_dir2 = os.path.join(original_dataset_dir,j)
    ll=os.listdir(original_dataset_dir2)
    
    total_num = int(len(ll))
    random_idx = np.array(range(total_num))
    np.random.shuffle(random_idx)   # np.random.shuffle()  对第一维的随机打乱
    base_dir = '...cartoonvsnormal'   # 待处理的数据集地址
    #base_dir=os.path.join(base_dir,j)
    if not os.path.exists(base_dir):
           os.mkdir(base_dir)               # 创建目录
    train_idx = random_idx[:int(total_num * 0.8):]    # train_idx=10000
    test_idx = random_idx[int(total_num * 0.8)::]
    print(j,'train:',len(train_idx),'test',len(test_idx))
    dir1=os.path.join(base_dir, 'train')
    if not os.path.exists(dir1):
          os.mkdir(dir1) 
    dir1=os.path.join(dir1, j)
    if not os.path.exists(dir1):
          os.mkdir(dir1)
    for i in train_idx:
        src = os.path.join(original_dataset_dir2, ll[i])
        dst = os.path.join(dir1, ll[i])
        shutil.copyfile(src, dst)
    print(j,'train',len(os.listdir(dir1)))
    dir1=os.path.join(base_dir, 'test')
    if not os.path.exists(dir1):
          os.mkdir(dir1) 
    dir1=os.path.join(dir1, j)
    if not os.path.exists(dir1):
          os.mkdir(dir1)      
    for i in test_idx:
        src = os.path.join(original_dataset_dir2, ll[i])
        dst = os.path.join(dir1, ll[i])
        shutil.copyfile(src, dst)
    print(j,'test',len(os.listdir(dir1)))

然后你可以删除train2

以下是大概的文件结构

│─cartoonvsnormal
│ ├───train
│ │	 │─cats
│ │   └─dogs
│ ├───test
│     ├─cats
│     └─dogs
|
|
|───dogvscat
│   ├───train
│   │	│─cartoon
│   │  	└─normal
│   ├───test
│   │  ├─cartoon
│   │  └─normal
│   ├───testcartoon
│      ├─cartoon
│      └─normal

展示部分图片

猫没卡通化你的训练集

猫卡通化过你的测试集

狗没卡通化过你的训练集

狗卡通化过你的测试集

别问为什么没有对齐就是要逼死强迫症不过值得一提这些图片是我挑选过的卡通化有些图效果不是很好你可以尝试修改代码让它效果更好加油

协变量偏移检测器实现

import torch 
import torchvision
from torchvision import datasets,models,transforms
import os
from torch.autograd import Variable
import torch.nn as nn
import torch.utils.data
import torch.nn.functional as F
from d2l import torch as d2l#李沐动手学习深度学习的课的库 没有可以注释掉
import gc 
gc.collect()#清理内存
#加载函数
def getmean_str(data_dir,name):
    data_trainsforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    ])
    image_datasets = datasets.ImageFolder(root=data_dir,
                          transform=data_trainsforms)
    data_loader = torch.utils.data.DataLoader(
        image_datasets, batch_size=1, shuffle=False, num_workers=0)

    mean = torch.zeros(3)
    std = torch.zeros(3)
    for X, _ in data_loader:
        for d in range(3):
            mean[d] += X[:, d, :, :].mean()
            std[d] += X[:, d, :, :].std()
    mean.div_(len(image_datasets))
    std.div_(len(image_datasets))
    print(name ," mean:",mean,"std:",std)
    return list(mean.numpy()), list(std.numpy())

def load_data_cartoonvsnormal(data_dir,meanlist, stdlist,batch_size=32): 
    data_trainsforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(meanlist, stdlist),
    ])
    #拼接路径
    image_datasets = datasets.ImageFolder(root=data_dir,
                          transform=data_trainsforms)
    #数据加载器
    data_iter=torch.utils.data.DataLoader(image_datasets, batch_size=batch_size, shuffle=True)
    return (data_iter,image_datasets)#(train_iter,test_iter,image_train_datasets,image_test_datasets)
#训练函数
def train_epoch_ch4_9(net, train_iter, loss, updater, Use_gpu,detector=None): 
    train_acc=0.0
    train_loss=0.0
    for batch,data in enumerate(train_iter,1):
            X,y = data
            if Use_gpu:
                X,y = Variable(X.cuda()),Variable(y.cuda())
            else:
                X,y = Variable(X),Variable(y)
            y_hat = net(X)
            if detector!=None:
                
                beta= detector(X.detach())
                if Use_gpu:
                    beta=Variable(beta.cuda())
            optimizer.zero_grad()
            l = loss(y_hat,y) if detector==None else loss(y_hat,y,beta)
            l.backward()#反向传播
            optimizer.step()#优化
            _,pred =torch.max(y_hat,1)
            train_acc += torch.sum(pred == y)
            train_loss += l.item()
            
            if batch%200 == 0 :
                print("Batch:{},Train Loss:{:.4f},Train ACC:{:.4f}%".format(batch,train_loss/batch,100*train_acc/(y.numel()*batch)))
    return (train_loss/(len(train_iter))),(train_acc/((batch_size)*len(train_iter))).cpu().numpy()
#评价函数
def evaluate_accuracy_ch4_9(net, data_iter, Use_gpu): 
    if Use_gpu:
        net = net.cuda()
    net.eval()  # 将模型设置为评估模式
    #metric = d2l.Accumulator(2)  # 正确预测数、预测总数
    test_acc=0.0
    number=0.0
    with torch.no_grad():
        for data in data_iter:
            X,y=data
            if Use_gpu:#有gpu在gpu下评估
                X,y = Variable(X.cuda()),Variable(y.cuda())
            else:
                X,y = Variable(X),Variable(y)
            y_pred = net(X)#metric.add(d2l.accuracy(net(X), y), y.numel())
            _,pred =torch.max(y_pred,1)
            test_acc += torch.sum(pred == y)
            number+=y.numel()
        test_acc=((test_acc / number).cpu().numpy())    
    return test_acc
#训练函数
def train_ch4_9(net, train_iter, test_iter, loss, num_epochs, updater,Use_gpu,test_iter2=None,savename='../data/ch04-4-9-2and4-9-3/covariate_shift_detectormodel.pth',detector=None): 
    if test_iter2==None:
        animator = d2l.Animator(xlabel='epoch',ylabel='Y', xlim=[1, num_epochs],
                        legend=['train loss', 'train acc', 'test acc'])#李沐动手学习深度学习的课的库 没有可以注释掉
    else:
        animator = d2l.Animator(xlabel='epoch',ylabel='Y', xlim=[1, num_epochs],
                        legend=['train loss', 'train acc', 'test cartoon acc','test normal acc '])#李沐动手学习深度学习的课的库 没有可以注释掉
        
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch4_9(net, train_iter, loss, updater,Use_gpu,detector)
        test_acc = evaluate_accuracy_ch4_9(net, test_iter,Use_gpu)
        if test_iter2!=None:
            test_acc2 = evaluate_accuracy_ch4_9(net, test_iter2,Use_gpu)
            animator.add(epoch + 1, train_metrics + (test_acc,test_acc2,))#李沐动手学习深度学习的课的库 没有可以注释掉
            print("epoch{} Loss:{:.4f} Train Acc:{:.4f}% Test Cartoon Acc:{:.4f}% Test Normal Acc:{:.4f}%".format(epoch+1, train_metrics[0], 100*train_metrics[1],100*test_acc,100*test_acc2))
        else:
            animator.add(epoch + 1, train_metrics + (test_acc,))#李沐动手学习深度学习的课的库 没有可以注释掉
            print("epoch{} Loss:{:.4f} Train Acc:{:.4f}% Test  Acc:{:.4f}%".format(epoch+1, train_metrics[0], 100*train_metrics[1],100*test_acc))
    
    train_loss, train_acc = train_metrics
   
    torch.save(net.state_dict(),savename)
    
    torch.cuda.empty_cache()
    print("save",savename,"over")
    
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc

#数据加载

batch_size=32
data_dir = "../data/ch04-4-9-2and4-9-3/cartoonvsnormal"
mean_train_list,std_train_list=getmean_str(os.path.join(data_dir, 'train'),'train')
mean_test_list,std_test_list=getmean_str(os.path.join(data_dir, 'test'),'test')
cartoon_num=1.0*(len(os.listdir(os.path.join(data_dir, 'train','cartoon')))+len(os.listdir(os.path.join(data_dir, 'test','cartoon'))))
normal_num=1.0*(len(os.listdir(os.path.join(data_dir, 'train','normal')))+len(os.listdir(os.path.join(data_dir, 'test','normal'))))
train_iter,image_train_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'train'),mean_train_list,std_train_list,batch_size)
test_iter,image_test_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'test'),mean_test_list,std_test_list,batch_size)
index_classes = list(image_test_datasets.class_to_idx.keys())
print("类别一 {}，数量{}，类别二 {} 数量{}".format(index_classes[0],cartoon_num,index_classes[1],normal_num))
print("类别一 {}，数量{}，类别二 {} 数量{}".format(index_classes[0],cartoon_num,index_classes[1],normal_num))
#加载模型
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
Use_gpu = torch.cuda.is_available()

for parma in model.parameters():
    parma.requires_grad = False#屏蔽预训练模型的权重，只训练最后一层的全连接的权重
model.fc = torch.nn.Linear(2048,2)
nn.init.xavier_uniform_(model.fc.weight)


if Use_gpu:
    model = model.cuda()

#损失函数和优化器
weight=torch.tensor([normal_num,cartoon_num])
weight=weight.cuda() if Use_gpu else weight
loss_f = torch.nn.CrossEntropyLoss(weight=weight)#不同类别数据量不同 添加权重来平衡
optimizer = torch.optim.SGD(model.fc.parameters(),lr = 1e-4)

num_epochs = 15
train_ch4_9(model, train_iter, test_iter, loss_f, num_epochs,optimizer,Use_gpu)
#清除内存
del model
torch.cuda.empty_cache()

定义检测器

这里convariate_shift_detecor\((x_i)=\beta_i =\frac{p\left(\mathbf{x}_i\right)}{q\left(\mathbf{x}_i\right)}\)

def  covariate_shift_detector(input_image,model_pth='../data/ch04-4-9-2and4-9-3/covariate_shift_detectormodel.pth'):
    softmax=nn.Softmax(dim = 1)
    Use_gpu = torch.cuda.is_available()
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
    model.fc = torch.nn.Linear(2048,2)
    model.load_state_dict(torch.load(model_pth))
    if Use_gpu:
        model = model.cuda()
    model.eval()
    beta=lambda x:min(x[0][0]/x[0][1],1.0)
    with torch.no_grad():
        if len(input_image)==1:
            output=beta(softmax(model(input_image)))
            
        else:
            output=torch.tensor(list(map(lambda x: beta(softmax(model(torch.unsqueeze(x,0)))),input_image )))
        del model
        torch.cuda.empty_cache()
        return output

测试结果

import random
#test_iter,image_test_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'test'),batch_size)
testnum=random.randint(0,len(image_test_datasets)-1)
index_classes = list(image_test_datasets.class_to_idx.keys())
input_image=torch.unsqueeze(image_test_datasets[testnum][0],0)
if torch.cuda.is_available():
    input_image=input_image.cuda()
output=index_classes[0] if covariate_shift_detector(input_image)>=1 else index_classes[1]
print("testnum ",testnum," 推断类别：",output,"实际类别：",index_classes[image_test_datasets[testnum][1]])
del input_image

gc.collect()
Use_gpu = torch.cuda.is_available()
if Use_gpu:
    torch.cuda.empty_cache()
    print('clear')
input_image=[i[0].cuda() for i in image_test_datasets] if torch.cuda.is_available() else [i[0] for i in image_test_datasets]
output=covariate_shift_detector(input_image)
test_label=torch.tensor([i[1] for i in image_test_datasets])
cartoonacc=torch.tensor(list(map(lambda x: 1 if x >=1 else 0 ,output-torch.mul(output,test_label)))).sum()/(len(test_label)-test_label.sum())
normalacc=torch.tensor(list(map(lambda x: 1 if 0<x < 1 else 0 ,torch.mul(output,test_label)))).sum()/(test_label.sum())
print('卡通图片预测正确率:{:.4f}% ,正常图片预测正确率:{:.4f}%'.format(100*cartoonacc,100*normalacc))
del input_image

实现协变量偏移纠正

首先展示没有使用分类器来训练模型的结果，可以看到训练的后期，即使各个数据曲线已经趋平，训练准确率仍然高于测试准确率。

#import库 以及 用到的函数定义见4.9.2
#数据加载

batch_size=32
data_dir = "../data/ch04-4-9-2and4-9-3/dogvscat"
mean_train_list,std_train_list=getmean_str(os.path.join(data_dir, 'train'),'train')
mean_testcartoon_list,std_testcartoon_list=getmean_str(os.path.join(data_dir, 'testcartoon'),'testcartoon')
mean_testnormal_list,std_testnormal_list=getmean_str(os.path.join(data_dir, 'test'),'testnormal')
train_iter,image_train_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'train'),mean_train_list,std_train_list,batch_size)
testcartoon_iter,image_testcartoon_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'testcartoon'),mean_testcartoon_list,std_testcartoon_list,batch_size)
testnormal_iter,image_testnormal_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'test'),mean_testnormal_list,std_testnormal_list,batch_size)
#加载模型
model_no_detector = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
Use_gpu = torch.cuda.is_available()

for parma in model_no_detector.parameters():
    parma.requires_grad = False#屏蔽预训练模型的权重，只训练最后一层的全连接的权重
model_no_detector.fc = torch.nn.Linear(2048,2)
nn.init.xavier_uniform_(model_no_detector.fc.weight);


if Use_gpu:
    model_no_detector = model_no_detector.cuda()

#损失函数和优化器
loss_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_no_detector.fc.parameters(),lr =  1e-4)

num_epochs = 10
train_ch4_9(model_no_detector, train_iter, testcartoon_iter, loss_f, num_epochs,optimizer,Use_gpu,test_iter2=testnormal_iter,savename='../data/ch04-4-9-2and4-9-3/dogvscatwithoutdetectormodel.pth')
#清除内存
del model_no_detector
torch.cuda.empty_cache()

使用分类器来训练模型的结果

gc.collect()#内存清理
#数据加载
batch_size=32
data_dir = "../data/ch04-4-9-2and4-9-3/dogvscat"
mean_train_list,std_train_list=getmean_str(os.path.join(data_dir, 'train'),'train')
mean_testcartoon_list,std_testcartoon_list=getmean_str(os.path.join(data_dir, 'testcartoon'),'testcartoon')
mean_testnormal_list,std_testnormal_list=getmean_str(os.path.join(data_dir, 'test'),'testnormal')
train_iter,image_train_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'train'),mean_train_list,std_train_list,batch_size)
testcartoon_iter,image_testcartoon_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'testcartoon'),mean_testcartoon_list,std_testcartoon_list,batch_size)
testnormal_iter,image_testnormal_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'test'),mean_testnormal_list,std_testnormal_list,batch_size)
#加载模型
model_detector = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
Use_gpu = torch.cuda.is_available()
if Use_gpu:
    #del modelwithoutdetector
    torch.cuda.empty_cache()
    print('clear')
for parma in model_detector.parameters():
    parma.requires_grad = False#屏蔽预训练模型的权重，只训练最后一层的全连接的权重
model_detector.fc = torch.nn.Linear(2048,2)
nn.init.xavier_uniform_(model_detector.fc.weight); 


if Use_gpu:
    model_detector = model_detector.cuda()

#损失函数和优化器
def myloss(y_hat,y,beta):
    loss_f = torch.nn.CrossEntropyLoss(reduction='none')
    l=loss_f(y_hat,y)
    return 25*torch.mul(beta,l).mean() #直接使用beta计算结果 并没有改善 这可能是因为beta小于1 使loss变小 导致更新梯度以后 变化不大 所以乘以一个数 以改善这种情况
     
    
optimizer = torch.optim.SGD(model_detector.fc.parameters(),lr =1e-4) 

num_epochs = 10
train_ch4_9(model_detector, train_iter, testcartoon_iter,myloss, num_epochs,optimizer,Use_gpu,test_iter2=testnormal_iter,savename='../data/ch04-4-9-2and4-9-3/dogvscatwithdetectormodel.pth',detector=covariate_shift_detector)
#清除内存
del model_detector
torch.cuda.empty_cache()

比较

gc.collect()
Use_gpu = torch.cuda.is_available()
if Use_gpu:
    #del modelwithoutdetector
    torch.cuda.empty_cache()
    print('clear')
data_dir = "../data/ch04-4-9-2and4-9-3/dogvscat"
batch_size=32
mean_testcartoon_list,std_testcartoon_list=getmean_str(os.path.join(data_dir, 'testcartoon'),'testcartoon')
testcartoon_iter,image_testcartoon_datasets=load_data_cartoonvsnormal(os.path.join(data_dir, 'testcartoon'),mean_testcartoon_list,std_testcartoon_list,batch_size)
#加载模型
model_detector = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
model_detector.fc = torch.nn.Linear(2048,2)
model_detector.load_state_dict(torch.load('../data/ch04-4-9-2and4-9-3/dogvscatwithdetectormodel.pth'))
#加载模型\
model_no_detector = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
model_no_detector.fc = torch.nn.Linear(2048,2)
model_no_detector.load_state_dict(torch.load('../data/ch04-4-9-2and4-9-3/dogvscatwithoutdetectormodel.pth'))
print("没使用分类器测试准确率：{:.4f}%,使用分类器测试准确率：{:.4f}%".format(100*evaluate_accuracy_ch4_9(model_no_detector,testcartoon_iter,Use_gpu),100*evaluate_accuracy_ch4_9(model_detector, testcartoon_iter,Use_gpu)))
#清除内存
del model_no_detector
del model_detector
torch.cuda.empty_cache()

结果

testcartoon mean: tensor([0.4168, 0.3945, 0.3463]) std: tensor([0.2118, 0.2033, 0.1590]) 没使用分类器测试准确率：89.8000%,使用分类器测试准确率：92.3600%

事实上如果你在优化器不用SGD而是用Adam会得到更好的结果而且你会发现用不用分类器结果都差不多orz

在训练的时候会遇见test_acc高于train_acc的情况这可能是因为train_acc 是在每个batchsize之后统计的而 test_acc是在一个epoch后统计的（一个牵强的解释，始终不能理解为什么可以在一个epoch后就产生这么高的test_acc）